tmx-parser-2018 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/History.txt +11 -0
- data/README.md +113 -0
- data/Rakefile +20 -0
- data/lib/tmx-parser/document.rb +27 -0
- data/lib/tmx-parser/elements.rb +154 -0
- data/lib/tmx-parser/listener.rb +80 -0
- data/lib/tmx-parser/sax_document.rb +73 -0
- data/lib/tmx-parser/tag_names.rb +13 -0
- data/lib/tmx-parser/version.rb +5 -0
- data/lib/tmx-parser.rb +20 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/tmx-parser_spec.rb +245 -0
- data/tmx-parser.gemspec +22 -0
- metadata +73 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9b35d164435ff250f53443f41cde51f9da1128a937f8be81e2ee87506660d291
|
4
|
+
data.tar.gz: c484c6452c02ba983a73fd5864112f1c612bcd1727cdece552d3ba00f85b7e27
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 438d30db65dea8693a023b91f9d64cf1489bdd3e352a93bc19b3d968d667c642bc6d40453e6d02ee7e7faac33e62f97adae4e23eb0cf62d45709a491a7702201
|
7
|
+
data.tar.gz: b8fa9c0e7e658a3e8d8656b3fe36dbe3ec3ff199a67e6000893b27a48beff2ed1d47b46d837bc0b52773c931d41e93dc354e7ed8192c18f557db130e65650e17
|
data/Gemfile
ADDED
data/History.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
tmx-parser
|
2
|
+
=================
|
3
|
+
|
4
|
+
Parser for the Translation Memory eXchange (.tmx) file format.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
`gem install tmx-parser`
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
require 'tmx-parser'
|
14
|
+
```
|
15
|
+
|
16
|
+
## Functionality
|
17
|
+
|
18
|
+
Got a .tmx file you need to parse? Just use the `TmxParser#load` method. It'll return an enumerable `TmxParser::Document` object for your iterating pleasure:
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
doc = TmxParser.load(File.open('path/to/my.tmx'))
|
22
|
+
doc.each do |unit|
|
23
|
+
...
|
24
|
+
end
|
25
|
+
```
|
26
|
+
|
27
|
+
You can also pass a string to `#load`:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
doc = TmxParser.load(File.read('path/to/my.tmx'))
|
31
|
+
```
|
32
|
+
|
33
|
+
The parser works in a streaming fashion, meaning it tries not to hold the entire source document in memory all at once. It will instead yield each translation unit incrementally.
|
34
|
+
|
35
|
+
## Translation Units
|
36
|
+
|
37
|
+
Translation units are simple Ruby objects that contain properties (tmx `<prop>` elements) and variants (tmx `tuv` elements). You can also retrieve the tuid (translation unit id) and segtype (segment type). Given this document:
|
38
|
+
|
39
|
+
```xml
|
40
|
+
<tmx version="1.4">
|
41
|
+
<body>
|
42
|
+
<tu tuid="79b371014a8382a3b6efb86ec6ea97d9" segtype="block">
|
43
|
+
<prop type="x-segment-id">0</prop>
|
44
|
+
<prop type="x-some-property">six.hours</prop>
|
45
|
+
<tuv xml:lang="en-US"><seg>6 hours</seg></tuv>
|
46
|
+
<tuv xml:lang="de-DE"><seg>6 Stunden</seg></tuv>
|
47
|
+
</tu>
|
48
|
+
</body>
|
49
|
+
</tmx>
|
50
|
+
```
|
51
|
+
|
52
|
+
Here's what you can do:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
doc.each do |unit|
|
56
|
+
unit.tuid # => "79b371014a8382a3b6efb86ec6ea97d9"
|
57
|
+
unit.segtype # => "block"
|
58
|
+
|
59
|
+
unit.properties.keys # => ["x-segment-id", "x-some-property"]
|
60
|
+
unit.properties['x-segment-id'].value # => "0"
|
61
|
+
|
62
|
+
variant = unit.variants.first
|
63
|
+
variant.locale # => "en-US"
|
64
|
+
variant.elements # => ["6 hours"]
|
65
|
+
end
|
66
|
+
```
|
67
|
+
|
68
|
+
## Placeholders
|
69
|
+
|
70
|
+
Let's consider a different document:
|
71
|
+
|
72
|
+
```xml
|
73
|
+
<tmx version="1.4">
|
74
|
+
<body>
|
75
|
+
<tu tuid="#{tuid}" segtype="block">
|
76
|
+
<prop type="x-segment-id">0</prop>
|
77
|
+
<tuv xml:lang="en-US">
|
78
|
+
<seg><ph type="x-placeholder">{0}</ph> sessions</seg>
|
79
|
+
</tuv>
|
80
|
+
<tuv xml:lang="de-DE">
|
81
|
+
<seg><ph type="x-placeholder">{0}</ph> Einheiten</seg>
|
82
|
+
</tuv>
|
83
|
+
</tu>
|
84
|
+
</body>
|
85
|
+
</tmx>
|
86
|
+
```
|
87
|
+
|
88
|
+
The placeholders will be added to the variant's `elements` array:
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
doc.each do |unit|
|
92
|
+
variant = unit.variants.first
|
93
|
+
variant.elements # => ["#<TmxParser::Placeholder:0x5ad5be4a @text="{0}", @type="x-placeholder">", " sessions"]
|
94
|
+
end
|
95
|
+
```
|
96
|
+
|
97
|
+
Begin paired tags (tmx `bpt` elements) and end paired tags (tmx `ept` elements) are handled the same way.
|
98
|
+
|
99
|
+
## See Also
|
100
|
+
|
101
|
+
* TMX file format: [http://www.gala-global.org/oscarStandards/tmx/tmx14b.html](http://www.gala-global.org/oscarStandards/tmx/tmx14b.html)
|
102
|
+
|
103
|
+
## Requirements
|
104
|
+
|
105
|
+
No external requirements.
|
106
|
+
|
107
|
+
## Running Tests
|
108
|
+
|
109
|
+
`bundle exec rspec` should do the trick :)
|
110
|
+
|
111
|
+
## Authors
|
112
|
+
|
113
|
+
* Cameron C. Dutro: http://github.com/camertron
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
4
|
+
|
5
|
+
require 'rubygems' unless ENV['NO_RUBYGEMS']
|
6
|
+
|
7
|
+
require 'bundler'
|
8
|
+
require 'rspec/core/rake_task'
|
9
|
+
require 'rubygems/package_task'
|
10
|
+
|
11
|
+
require 'tmx-parser'
|
12
|
+
|
13
|
+
Bundler::GemHelper.install_tasks
|
14
|
+
|
15
|
+
task :default => :spec
|
16
|
+
|
17
|
+
desc 'Run specs'
|
18
|
+
RSpec::Core::RakeTask.new do |t|
|
19
|
+
t.pattern = './spec/**/*_spec.rb'
|
20
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module TmxParser
|
4
|
+
class Document
|
5
|
+
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
attr_reader :string_or_file_handle, :encoding
|
9
|
+
|
10
|
+
def initialize(string_or_file_handle, encoding = Encoding.default_external)
|
11
|
+
@string_or_file_handle = string_or_file_handle
|
12
|
+
@encoding = encoding
|
13
|
+
end
|
14
|
+
|
15
|
+
def each(&block)
|
16
|
+
if block_given?
|
17
|
+
listener = Listener.new(&block)
|
18
|
+
document = SaxDocument.new(listener)
|
19
|
+
parser = Nokogiri::XML::SAX::Parser.new(document, encoding.to_s)
|
20
|
+
parser.parse(string_or_file_handle)
|
21
|
+
else
|
22
|
+
to_enum(__method__)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module TmxParser
|
4
|
+
|
5
|
+
class Unit
|
6
|
+
attr_reader :tuid, :segtype, :properties, :variants
|
7
|
+
|
8
|
+
def initialize(tuid, segtype)
|
9
|
+
@tuid = tuid
|
10
|
+
@segtype = segtype
|
11
|
+
@properties = {}
|
12
|
+
@variants = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def copy
|
16
|
+
self.class.new(tuid.dup, segtype.dup).tap do |new_unit|
|
17
|
+
new_unit.variants.concat(variants.map(&:copy))
|
18
|
+
properties.each do |key, property_value|
|
19
|
+
new_unit.properties[key] = property_value.copy
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def ==(other_unit)
|
25
|
+
tuid == other_unit.tuid &&
|
26
|
+
segtype == other_unit.segtype &&
|
27
|
+
variants.each_with_index.all? do |v, idx|
|
28
|
+
other_unit.variants[idx] == v
|
29
|
+
end &&
|
30
|
+
properties.each_with_index.all? do |(key, prop_val), idx|
|
31
|
+
other_unit.properties[key] == prop_val
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class PropertyValue
|
37
|
+
attr_accessor :value
|
38
|
+
|
39
|
+
def initialize(init_value = '')
|
40
|
+
@value = init_value
|
41
|
+
end
|
42
|
+
|
43
|
+
def receive_text(str)
|
44
|
+
@value << str
|
45
|
+
end
|
46
|
+
|
47
|
+
def copy
|
48
|
+
self.class.new(value.dup)
|
49
|
+
end
|
50
|
+
|
51
|
+
def ==(other_property_value)
|
52
|
+
value == other_property_value.value
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Variant
|
57
|
+
attr_reader :locale
|
58
|
+
attr_accessor :elements
|
59
|
+
|
60
|
+
def initialize(locale)
|
61
|
+
@locale = locale
|
62
|
+
@elements = []
|
63
|
+
end
|
64
|
+
|
65
|
+
def receive_text(str)
|
66
|
+
@elements << str
|
67
|
+
end
|
68
|
+
|
69
|
+
def copy
|
70
|
+
self.class.new(locale.dup).tap do |new_variant|
|
71
|
+
new_variant.elements.concat(
|
72
|
+
elements.map do |element|
|
73
|
+
element.respond_to?(:copy) ? element.copy : element.dup
|
74
|
+
end
|
75
|
+
)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def ==(other_variant)
|
80
|
+
locale == locale &&
|
81
|
+
elements.each_with_index.all? do |element, idx|
|
82
|
+
other_variant.elements[idx] == element
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class Placeholder
|
88
|
+
attr_reader :type, :text
|
89
|
+
attr_accessor :start, :length
|
90
|
+
|
91
|
+
def initialize(type, text = '')
|
92
|
+
@type = type
|
93
|
+
@text = text
|
94
|
+
end
|
95
|
+
|
96
|
+
def receive_text(str)
|
97
|
+
@text << str
|
98
|
+
end
|
99
|
+
|
100
|
+
def copy
|
101
|
+
self.class.new(type.dup, text.dup).tap do |new_placeholder|
|
102
|
+
new_placeholder.start = start # can't dup fixnums
|
103
|
+
new_placeholder.length = length
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def ==(other_placeholder)
|
108
|
+
type == other_placeholder.type &&
|
109
|
+
text == other_placeholder.type &&
|
110
|
+
start == other_placeholder.start &&
|
111
|
+
length == other_placeholder.length
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
class Pair
|
116
|
+
attr_reader :text, :i
|
117
|
+
|
118
|
+
def initialize(i, text = '')
|
119
|
+
@i = i
|
120
|
+
@text = text
|
121
|
+
end
|
122
|
+
|
123
|
+
def receive_text(str)
|
124
|
+
@text << str
|
125
|
+
end
|
126
|
+
|
127
|
+
def type
|
128
|
+
raise NotImplementedError
|
129
|
+
end
|
130
|
+
|
131
|
+
def copy
|
132
|
+
self.class.new(i, text.dup)
|
133
|
+
end
|
134
|
+
|
135
|
+
def ==(other_pair)
|
136
|
+
i == other_pair.i &&
|
137
|
+
text == other_pair.text &&
|
138
|
+
type == other_pair.type
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
class BeginPair < Pair
|
143
|
+
def type
|
144
|
+
:begin
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
class EndPair < Pair
|
149
|
+
def type
|
150
|
+
:end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module TmxParser
|
4
|
+
|
5
|
+
class Listener
|
6
|
+
include TagNames
|
7
|
+
|
8
|
+
attr_reader :units, :proc
|
9
|
+
|
10
|
+
def initialize(&block)
|
11
|
+
@stack = []
|
12
|
+
@proc = block
|
13
|
+
end
|
14
|
+
|
15
|
+
def unit(tuid, segtype)
|
16
|
+
@current_unit = Unit.new(tuid, segtype)
|
17
|
+
end
|
18
|
+
|
19
|
+
def variant(locale)
|
20
|
+
variant = Variant.new(locale)
|
21
|
+
current_unit.variants << variant
|
22
|
+
stack.push(variant)
|
23
|
+
end
|
24
|
+
|
25
|
+
def property(name)
|
26
|
+
val = PropertyValue.new
|
27
|
+
current_unit.properties[name] = val
|
28
|
+
stack.push(val)
|
29
|
+
end
|
30
|
+
|
31
|
+
def text(str)
|
32
|
+
if last = stack.last
|
33
|
+
last.receive_text(str)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def done(tag_name)
|
38
|
+
if tag_name == UNIT_TAG
|
39
|
+
proc.call(current_unit)
|
40
|
+
else
|
41
|
+
if tag_name_for(stack.last) == tag_name
|
42
|
+
stack.pop
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def placeholder(type)
|
48
|
+
placeholder = Placeholder.new(type)
|
49
|
+
current_unit.variants.last.elements << placeholder
|
50
|
+
stack.push(placeholder)
|
51
|
+
end
|
52
|
+
|
53
|
+
def begin_paired_tag(i)
|
54
|
+
begin_pair = BeginPair.new(i)
|
55
|
+
current_unit.variants.last.elements << begin_pair
|
56
|
+
stack.push(begin_pair)
|
57
|
+
end
|
58
|
+
|
59
|
+
def end_paired_tag(i)
|
60
|
+
end_pair = EndPair.new(i)
|
61
|
+
current_unit.variants.last.elements << end_pair
|
62
|
+
stack.push(end_pair)
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def tag_name_for(obj)
|
68
|
+
case obj
|
69
|
+
when Variant then VARIANT_TAG
|
70
|
+
when PropertyValue then PROPERTY_TAG
|
71
|
+
when Placeholder then PLACEHOLDER_TAG
|
72
|
+
when BeginPair then BEGIN_PAIRED_TAG
|
73
|
+
when EndPair then END_PAIRED_TAG
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
attr_reader :current_unit, :stack
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module TmxParser
|
6
|
+
|
7
|
+
class SaxDocument < Nokogiri::XML::SAX::Document
|
8
|
+
include TagNames
|
9
|
+
|
10
|
+
attr_reader :listener
|
11
|
+
|
12
|
+
def initialize(listener)
|
13
|
+
@listener = listener
|
14
|
+
@capture_stack = [false]
|
15
|
+
@text = ''
|
16
|
+
end
|
17
|
+
|
18
|
+
def start_element(name, attrs = [])
|
19
|
+
case name
|
20
|
+
when UNIT_TAG
|
21
|
+
listener.unit(
|
22
|
+
get_attr('tuid', attrs), get_attr('segtype', attrs)
|
23
|
+
)
|
24
|
+
when VARIANT_TAG
|
25
|
+
locale = get_attr('xml:lang', attrs)
|
26
|
+
listener.variant(locale)
|
27
|
+
when SEGMENT_TAG
|
28
|
+
capture_text
|
29
|
+
when PROPERTY_TAG
|
30
|
+
capture_text
|
31
|
+
listener.property(get_attr('type', attrs))
|
32
|
+
when BEGIN_PAIRED_TAG
|
33
|
+
capture_text
|
34
|
+
listener.begin_paired_tag(get_attr('i', attrs))
|
35
|
+
when END_PAIRED_TAG
|
36
|
+
capture_text
|
37
|
+
listener.end_paired_tag(get_attr('i', attrs))
|
38
|
+
when PLACEHOLDER_TAG
|
39
|
+
capture_text
|
40
|
+
listener.placeholder(get_attr('type', attrs))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def end_element(name)
|
45
|
+
@capture_stack.pop
|
46
|
+
send_text
|
47
|
+
listener.done(name)
|
48
|
+
end
|
49
|
+
|
50
|
+
def characters(str)
|
51
|
+
@text += str if @capture_stack.last
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def send_text
|
57
|
+
listener.text(@text) unless @text.empty?
|
58
|
+
@text = ''
|
59
|
+
end
|
60
|
+
|
61
|
+
def capture_text
|
62
|
+
send_text
|
63
|
+
@capture_stack.push(true)
|
64
|
+
end
|
65
|
+
|
66
|
+
def get_attr(name, attrs)
|
67
|
+
if found = attrs.find { |a| a.first == name }
|
68
|
+
found.last
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
data/lib/tmx-parser.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module TmxParser
|
6
|
+
autoload :Document, 'tmx-parser/document'
|
7
|
+
autoload :SaxDocument, 'tmx-parser/sax_document'
|
8
|
+
autoload :Listener, 'tmx-parser/listener'
|
9
|
+
autoload :TagNames, 'tmx-parser/tag_names'
|
10
|
+
autoload :Unit, 'tmx-parser/elements'
|
11
|
+
autoload :PropertyValue, 'tmx-parser/elements'
|
12
|
+
autoload :Variant, 'tmx-parser/elements'
|
13
|
+
autoload :Placeholder, 'tmx-parser/elements'
|
14
|
+
autoload :BeginPair, 'tmx-parser/elements'
|
15
|
+
autoload :EndPair, 'tmx-parser/elements'
|
16
|
+
|
17
|
+
def self.load(string_or_file_handle, encoding = Encoding.default_external)
|
18
|
+
Document.new(string_or_file_handle, encoding)
|
19
|
+
end
|
20
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,245 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe TmxParser do
|
6
|
+
let(:parser) { TmxParser }
|
7
|
+
let(:tuid) { '79b371014a8382a3b6efb86ec6ea97d9' }
|
8
|
+
|
9
|
+
def find_variant(locale, unit)
|
10
|
+
unit.variants.find { |v| v.locale == locale }
|
11
|
+
end
|
12
|
+
|
13
|
+
context 'with a basic tmx document' do
|
14
|
+
let(:document) do
|
15
|
+
%Q{
|
16
|
+
<tmx version="1.4">
|
17
|
+
<body>
|
18
|
+
<tu tuid="#{tuid}" segtype="block">
|
19
|
+
<prop type="x-segment-id">0</prop>
|
20
|
+
<prop type="x-some-property">six.hours</prop>
|
21
|
+
<tuv xml:lang="en-US"><seg>6 hours</seg></tuv>
|
22
|
+
<tuv xml:lang="de-DE"><seg>6 Stunden</seg></tuv>
|
23
|
+
</tu>
|
24
|
+
</body>
|
25
|
+
</tmx>
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
describe '#copy' do
|
30
|
+
it 'deep copies the tree' do
|
31
|
+
parser.load(document).to_a.tap do |units|
|
32
|
+
original_unit = units.first
|
33
|
+
unit_copy = original_unit.copy
|
34
|
+
|
35
|
+
expect(unit_copy.tuid).to eq(original_unit.tuid)
|
36
|
+
expect(unit_copy.segtype).to eq(original_unit.segtype)
|
37
|
+
expect(unit_copy.variants.size).to eq(original_unit.variants.size)
|
38
|
+
|
39
|
+
unit_copy.properties.each_pair.with_index do |(key, prop_value_copy), idx|
|
40
|
+
original_prop_value = original_unit.properties[key]
|
41
|
+
expect(original_prop_value.value).to eq(prop_value_copy.value)
|
42
|
+
end
|
43
|
+
|
44
|
+
unit_copy.variants.each_with_index do |variant_copy, v_idx|
|
45
|
+
original_variant = original_unit.variants[v_idx]
|
46
|
+
expect(variant_copy.locale).to eq(original_variant.locale)
|
47
|
+
|
48
|
+
variant_copy.elements.each_with_index do |element_copy, e_idx|
|
49
|
+
original_element = original_variant.elements[e_idx]
|
50
|
+
expect(element_copy).to be_a(original_element.class)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe '#==' do
|
58
|
+
it 'returns true if the objects (even copies) are equivalent' do
|
59
|
+
parser.load(document).to_a.tap do |units|
|
60
|
+
expect(units.first).to eq(units.first.copy)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'returns false if the objects are not equivalent' do
|
65
|
+
parser.load(document).to_a.tap do |units|
|
66
|
+
unit = units.first
|
67
|
+
unit_copy = unit.copy
|
68
|
+
|
69
|
+
unit_copy.tuid.replace('foobar')
|
70
|
+
expect(unit).to_not eq(unit_copy)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'identifies the tuid and segtype' do
|
76
|
+
parser.load(document).to_a.tap do |units|
|
77
|
+
expect(units.size).to eq(1)
|
78
|
+
|
79
|
+
units.first.tap do |unit|
|
80
|
+
expect(unit.tuid).to eq(tuid)
|
81
|
+
expect(unit.segtype).to eq('block')
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'identifies the correct variants' do
|
87
|
+
parser.load(document).to_a.first.tap do |unit|
|
88
|
+
expect(unit.variants.size).to eq(2)
|
89
|
+
expect(find_variant('en-US', unit).elements).to eq(['6 hours'])
|
90
|
+
expect(find_variant('de-DE', unit).elements).to eq(['6 Stunden'])
|
91
|
+
|
92
|
+
unit.variants.each do |variant|
|
93
|
+
expect(variant).to be_a(TmxParser::Variant)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'identifies properties' do
|
99
|
+
parser.load(document).to_a.first.tap do |unit|
|
100
|
+
expect(unit.properties.size).to eq(2)
|
101
|
+
expect(unit.properties).to include('x-segment-id')
|
102
|
+
expect(unit.properties).to include('x-some-property')
|
103
|
+
expect(unit.properties['x-segment-id'].value).to eq('0')
|
104
|
+
expect(unit.properties['x-some-property'].value).to eq('six.hours')
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
context 'with a tmx document that contains a property that makes jruby cry' do
|
110
|
+
# For some reason, jruby doesn't like square brackets in property values.
|
111
|
+
# See: https://github.com/sparklemotion/nokogiri/issues/1261
|
112
|
+
|
113
|
+
let(:document) do
|
114
|
+
%Q{
|
115
|
+
<tmx version="1.4">
|
116
|
+
<body>
|
117
|
+
<tu tuid="#{tuid}" segtype="block">
|
118
|
+
<prop type="x-segment-id">0</prop>
|
119
|
+
<prop type="x-some-property">en:#:daily-data:#:[3]:#:times</prop>
|
120
|
+
<tuv xml:lang="en-US"><seg>6 hours</seg></tuv>
|
121
|
+
<tuv xml:lang="de-DE"><seg>6 Stunden</seg></tuv>
|
122
|
+
</tu>
|
123
|
+
</body>
|
124
|
+
</tmx>
|
125
|
+
}
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'identifies the property correctly' do
|
129
|
+
parser.load(document).to_a.first.tap do |unit|
|
130
|
+
expect(unit.properties).to include('x-some-property')
|
131
|
+
expect(unit.properties['x-some-property']).to be_a(TmxParser::PropertyValue)
|
132
|
+
expect(unit.properties['x-some-property'].value).to eq(
|
133
|
+
'en:#:daily-data:#:[3]:#:times'
|
134
|
+
)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
context 'with a tmx document that contains placeholders' do
|
140
|
+
let(:document) do
|
141
|
+
%Q{
|
142
|
+
<tmx version="1.4">
|
143
|
+
<body>
|
144
|
+
<tu tuid="#{tuid}" segtype="block">
|
145
|
+
<prop type="x-segment-id">0</prop>
|
146
|
+
<tuv xml:lang="en-US">
|
147
|
+
<seg><ph type="x-placeholder">{0}</ph> sessions</seg>
|
148
|
+
</tuv>
|
149
|
+
<tuv xml:lang="de-DE">
|
150
|
+
<seg><ph type="x-placeholder">{0}</ph> Einheiten</seg>
|
151
|
+
</tuv>
|
152
|
+
</tu>
|
153
|
+
</body>
|
154
|
+
</tmx>
|
155
|
+
}
|
156
|
+
end
|
157
|
+
|
158
|
+
it 'identifies the placeholders' do
|
159
|
+
parser.load(document).to_a.first.tap do |unit|
|
160
|
+
expect(unit.variants.size).to eq(2)
|
161
|
+
|
162
|
+
find_variant('en-US', unit).tap do |en_variant|
|
163
|
+
expect(en_variant.elements.size).to eq(2)
|
164
|
+
|
165
|
+
en_variant.elements.first.tap do |first_element|
|
166
|
+
expect(first_element.type).to eq('x-placeholder')
|
167
|
+
expect(first_element.text).to eq('{0}')
|
168
|
+
end
|
169
|
+
|
170
|
+
en_variant.elements.last.tap do |last_element|
|
171
|
+
expect(last_element).to be_a(String)
|
172
|
+
expect(last_element).to eq(' sessions')
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
find_variant('de-DE', unit).tap do |en_variant|
|
177
|
+
expect(en_variant.elements.size).to eq(2)
|
178
|
+
|
179
|
+
en_variant.elements.first.tap do |first_element|
|
180
|
+
expect(first_element).to be_a(TmxParser::Placeholder)
|
181
|
+
expect(first_element.type).to eq('x-placeholder')
|
182
|
+
expect(first_element.text).to eq('{0}')
|
183
|
+
end
|
184
|
+
|
185
|
+
en_variant.elements.last.tap do |last_element|
|
186
|
+
expect(last_element).to be_a(String)
|
187
|
+
expect(last_element).to eq(' Einheiten')
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
context 'with a tmx document that contains paired tags' do
|
195
|
+
let(:document) do
|
196
|
+
%Q{
|
197
|
+
<tmx version="1.4">
|
198
|
+
<body>
|
199
|
+
<tu tuid="#{tuid}" segtype="block">
|
200
|
+
<prop type="x-segment-id">0</prop>
|
201
|
+
<tuv xml:lang="en-US">
|
202
|
+
<seg>Build your healthy habit of daily training with <bpt i="3"><strong></bpt>email training reminders.<ept i="3"></strong></ept></seg>
|
203
|
+
</tuv>
|
204
|
+
<tuv xml:lang="de-DE">
|
205
|
+
<seg><bpt i="3"><strong></bpt>Mit Erinnerungen per E-Mail<ept i="3"></strong></ept> können Sie das tägliche Training zu einer schönen Angewohnheit werden lassen.</seg>
|
206
|
+
</tuv>
|
207
|
+
</tu>
|
208
|
+
</body>
|
209
|
+
</tmx>
|
210
|
+
}
|
211
|
+
end
|
212
|
+
|
213
|
+
it 'identifies the tags' do
|
214
|
+
parser.load(document).to_a.first.tap do |unit|
|
215
|
+
expect(unit.variants.size).to eq(2)
|
216
|
+
|
217
|
+
find_variant('en-US', unit).tap do |en_variant|
|
218
|
+
expect(en_variant.elements.size).to eq(4)
|
219
|
+
|
220
|
+
en_variant.elements[0].tap do |element|
|
221
|
+
expect(element).to be_a(String)
|
222
|
+
expect(element).to eq('Build your healthy habit of daily training with ')
|
223
|
+
end
|
224
|
+
|
225
|
+
en_variant.elements[1].tap do |element|
|
226
|
+
expect(element).to be_a(TmxParser::BeginPair)
|
227
|
+
expect(element.i).to eq('3')
|
228
|
+
expect(element.text).to eq('<strong>')
|
229
|
+
end
|
230
|
+
|
231
|
+
en_variant.elements[2].tap do |element|
|
232
|
+
expect(element).to be_a(String)
|
233
|
+
expect(element).to eq('email training reminders.')
|
234
|
+
end
|
235
|
+
|
236
|
+
en_variant.elements[3].tap do |element|
|
237
|
+
expect(element).to be_a(TmxParser::EndPair)
|
238
|
+
expect(element.i).to eq('3')
|
239
|
+
expect(element.text).to eq('</strong>')
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
data/tmx-parser.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
4
|
+
require 'tmx-parser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "tmx-parser-2018"
|
8
|
+
s.version = ::TmxParser::VERSION
|
9
|
+
s.authors = ["Cameron Dutro", "Michiel de Mare"]
|
10
|
+
s.email = ["camertron@gmail.com", "michiel@tolq.com"]
|
11
|
+
s.homepage = "http://github.com/mdemare"
|
12
|
+
|
13
|
+
s.description = s.summary = "Parser for the Translation Memory eXchange (.tmx) file format."
|
14
|
+
|
15
|
+
s.platform = Gem::Platform::RUBY
|
16
|
+
s.has_rdoc = true
|
17
|
+
|
18
|
+
s.require_path = 'lib'
|
19
|
+
s.files = Dir["{lib,spec}/**/*", "Gemfile", "History.txt", "README.md", "Rakefile", "tmx-parser.gemspec"]
|
20
|
+
|
21
|
+
s.add_dependency 'nokogiri', '~> 1.8'
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tmx-parser-2018
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Cameron Dutro
|
8
|
+
- Michiel de Mare
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2018-02-07 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.8'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '1.8'
|
28
|
+
description: Parser for the Translation Memory eXchange (.tmx) file format.
|
29
|
+
email:
|
30
|
+
- camertron@gmail.com
|
31
|
+
- michiel@tolq.com
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- Gemfile
|
37
|
+
- History.txt
|
38
|
+
- README.md
|
39
|
+
- Rakefile
|
40
|
+
- lib/tmx-parser.rb
|
41
|
+
- lib/tmx-parser/document.rb
|
42
|
+
- lib/tmx-parser/elements.rb
|
43
|
+
- lib/tmx-parser/listener.rb
|
44
|
+
- lib/tmx-parser/sax_document.rb
|
45
|
+
- lib/tmx-parser/tag_names.rb
|
46
|
+
- lib/tmx-parser/version.rb
|
47
|
+
- spec/spec_helper.rb
|
48
|
+
- spec/tmx-parser_spec.rb
|
49
|
+
- tmx-parser.gemspec
|
50
|
+
homepage: http://github.com/mdemare
|
51
|
+
licenses: []
|
52
|
+
metadata: {}
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options: []
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
requirements: []
|
68
|
+
rubyforge_project:
|
69
|
+
rubygems_version: 2.7.4
|
70
|
+
signing_key:
|
71
|
+
specification_version: 4
|
72
|
+
summary: Parser for the Translation Memory eXchange (.tmx) file format.
|
73
|
+
test_files: []
|