tmx-parser-2018 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9b35d164435ff250f53443f41cde51f9da1128a937f8be81e2ee87506660d291
4
+ data.tar.gz: c484c6452c02ba983a73fd5864112f1c612bcd1727cdece552d3ba00f85b7e27
5
+ SHA512:
6
+ metadata.gz: 438d30db65dea8693a023b91f9d64cf1489bdd3e352a93bc19b3d968d667c642bc6d40453e6d02ee7e7faac33e62f97adae4e23eb0cf62d45709a491a7702201
7
+ data.tar.gz: b8fa9c0e7e658a3e8d8656b3fe36dbe3ec3ff199a67e6000893b27a48beff2ed1d47b46d837bc0b52773c931d41e93dc354e7ed8192c18f557db130e65650e17
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
data/History.txt ADDED
@@ -0,0 +1,11 @@
1
+ == 1.0.0
2
+
3
+ * Birthday!
4
+
5
+ == 1.0.1
6
+
7
+ * Add ability to specify encoding.
8
+
9
+ == 1.1.0
10
+
11
+ * Add #copy and #== methods to elements.
data/README.md ADDED
@@ -0,0 +1,113 @@
1
+ tmx-parser
2
+ =================
3
+
4
+ Parser for the Translation Memory eXchange (.tmx) file format.
5
+
6
+ ## Installation
7
+
8
+ `gem install tmx-parser`
9
+
10
+ ## Usage
11
+
12
+ ```ruby
13
+ require 'tmx-parser'
14
+ ```
15
+
16
+ ## Functionality
17
+
18
+ Got a .tmx file you need to parse? Just use the `TmxParser#load` method. It'll return an enumerable `TmxParser::Document` object for your iterating pleasure:
19
+
20
+ ```ruby
21
+ doc = TmxParser.load(File.open('path/to/my.tmx'))
22
+ doc.each do |unit|
23
+ ...
24
+ end
25
+ ```
26
+
27
+ You can also pass a string to `#load`:
28
+
29
+ ```ruby
30
+ doc = TmxParser.load(File.read('path/to/my.tmx'))
31
+ ```
32
+
33
+ The parser works in a streaming fashion, meaning it tries not to hold the entire source document in memory all at once. It will instead yield each translation unit incrementally.
34
+
35
+ ## Translation Units
36
+
37
+ Translation units are simple Ruby objects that contain properties (tmx `<prop>` elements) and variants (tmx `tuv` elements). You can also retrieve the tuid (translation unit id) and segtype (segment type). Given this document:
38
+
39
+ ```xml
40
+ <tmx version="1.4">
41
+ <body>
42
+ <tu tuid="79b371014a8382a3b6efb86ec6ea97d9" segtype="block">
43
+ <prop type="x-segment-id">0</prop>
44
+ <prop type="x-some-property">six.hours</prop>
45
+ <tuv xml:lang="en-US"><seg>6 hours</seg></tuv>
46
+ <tuv xml:lang="de-DE"><seg>6 Stunden</seg></tuv>
47
+ </tu>
48
+ </body>
49
+ </tmx>
50
+ ```
51
+
52
+ Here's what you can do:
53
+
54
+ ```ruby
55
+ doc.each do |unit|
56
+ unit.tuid # => "79b371014a8382a3b6efb86ec6ea97d9"
57
+ unit.segtype # => "block"
58
+
59
+ unit.properties.keys # => ["x-segment-id", "x-some-property"]
60
+ unit.properties['x-segment-id'].value # => "0"
61
+
62
+ variant = unit.variants.first
63
+ variant.locale # => "en-US"
64
+ variant.elements # => ["6 hours"]
65
+ end
66
+ ```
67
+
68
+ ## Placeholders
69
+
70
+ Let's consider a different document:
71
+
72
+ ```xml
73
+ <tmx version="1.4">
74
+ <body>
75
+ <tu tuid="#{tuid}" segtype="block">
76
+ <prop type="x-segment-id">0</prop>
77
+ <tuv xml:lang="en-US">
78
+ <seg><ph type="x-placeholder">{0}</ph> sessions</seg>
79
+ </tuv>
80
+ <tuv xml:lang="de-DE">
81
+ <seg><ph type="x-placeholder">{0}</ph> Einheiten</seg>
82
+ </tuv>
83
+ </tu>
84
+ </body>
85
+ </tmx>
86
+ ```
87
+
88
+ The placeholders will be added to the variant's `elements` array:
89
+
90
+ ```ruby
91
+ doc.each do |unit|
92
+ variant = unit.variants.first
93
+ variant.elements # => ["#<TmxParser::Placeholder:0x5ad5be4a @text="{0}", @type="x-placeholder">", " sessions"]
94
+ end
95
+ ```
96
+
97
+ Begin paired tags (tmx `bpt` elements) and end paired tags (tmx `ept` elements) are handled the same way.
98
+
99
+ ## See Also
100
+
101
+ * TMX file format: [http://www.gala-global.org/oscarStandards/tmx/tmx14b.html](http://www.gala-global.org/oscarStandards/tmx/tmx14b.html)
102
+
103
+ ## Requirements
104
+
105
+ No external requirements.
106
+
107
+ ## Running Tests
108
+
109
+ `bundle exec rspec` should do the trick :)
110
+
111
+ ## Authors
112
+
113
+ * Cameron C. Dutro: http://github.com/camertron
data/Rakefile ADDED
@@ -0,0 +1,20 @@
1
+ # encoding: UTF-8
2
+
3
+ $:.unshift File.join(File.dirname(__FILE__), 'lib')
4
+
5
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
6
+
7
+ require 'bundler'
8
+ require 'rspec/core/rake_task'
9
+ require 'rubygems/package_task'
10
+
11
+ require 'tmx-parser'
12
+
13
+ Bundler::GemHelper.install_tasks
14
+
15
+ task :default => :spec
16
+
17
+ desc 'Run specs'
18
+ RSpec::Core::RakeTask.new do |t|
19
+ t.pattern = './spec/**/*_spec.rb'
20
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: UTF-8
2
+
3
+ module TmxParser
4
+ class Document
5
+
6
+ include Enumerable
7
+
8
+ attr_reader :string_or_file_handle, :encoding
9
+
10
+ def initialize(string_or_file_handle, encoding = Encoding.default_external)
11
+ @string_or_file_handle = string_or_file_handle
12
+ @encoding = encoding
13
+ end
14
+
15
+ def each(&block)
16
+ if block_given?
17
+ listener = Listener.new(&block)
18
+ document = SaxDocument.new(listener)
19
+ parser = Nokogiri::XML::SAX::Parser.new(document, encoding.to_s)
20
+ parser.parse(string_or_file_handle)
21
+ else
22
+ to_enum(__method__)
23
+ end
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,154 @@
1
+ # encoding: UTF-8
2
+
3
+ module TmxParser
4
+
5
+ class Unit
6
+ attr_reader :tuid, :segtype, :properties, :variants
7
+
8
+ def initialize(tuid, segtype)
9
+ @tuid = tuid
10
+ @segtype = segtype
11
+ @properties = {}
12
+ @variants = []
13
+ end
14
+
15
+ def copy
16
+ self.class.new(tuid.dup, segtype.dup).tap do |new_unit|
17
+ new_unit.variants.concat(variants.map(&:copy))
18
+ properties.each do |key, property_value|
19
+ new_unit.properties[key] = property_value.copy
20
+ end
21
+ end
22
+ end
23
+
24
+ def ==(other_unit)
25
+ tuid == other_unit.tuid &&
26
+ segtype == other_unit.segtype &&
27
+ variants.each_with_index.all? do |v, idx|
28
+ other_unit.variants[idx] == v
29
+ end &&
30
+ properties.each_with_index.all? do |(key, prop_val), idx|
31
+ other_unit.properties[key] == prop_val
32
+ end
33
+ end
34
+ end
35
+
36
+ class PropertyValue
37
+ attr_accessor :value
38
+
39
+ def initialize(init_value = '')
40
+ @value = init_value
41
+ end
42
+
43
+ def receive_text(str)
44
+ @value << str
45
+ end
46
+
47
+ def copy
48
+ self.class.new(value.dup)
49
+ end
50
+
51
+ def ==(other_property_value)
52
+ value == other_property_value.value
53
+ end
54
+ end
55
+
56
+ class Variant
57
+ attr_reader :locale
58
+ attr_accessor :elements
59
+
60
+ def initialize(locale)
61
+ @locale = locale
62
+ @elements = []
63
+ end
64
+
65
+ def receive_text(str)
66
+ @elements << str
67
+ end
68
+
69
+ def copy
70
+ self.class.new(locale.dup).tap do |new_variant|
71
+ new_variant.elements.concat(
72
+ elements.map do |element|
73
+ element.respond_to?(:copy) ? element.copy : element.dup
74
+ end
75
+ )
76
+ end
77
+ end
78
+
79
+ def ==(other_variant)
80
+ locale == locale &&
81
+ elements.each_with_index.all? do |element, idx|
82
+ other_variant.elements[idx] == element
83
+ end
84
+ end
85
+ end
86
+
87
+ class Placeholder
88
+ attr_reader :type, :text
89
+ attr_accessor :start, :length
90
+
91
+ def initialize(type, text = '')
92
+ @type = type
93
+ @text = text
94
+ end
95
+
96
+ def receive_text(str)
97
+ @text << str
98
+ end
99
+
100
+ def copy
101
+ self.class.new(type.dup, text.dup).tap do |new_placeholder|
102
+ new_placeholder.start = start # can't dup fixnums
103
+ new_placeholder.length = length
104
+ end
105
+ end
106
+
107
+ def ==(other_placeholder)
108
+ type == other_placeholder.type &&
109
+ text == other_placeholder.type &&
110
+ start == other_placeholder.start &&
111
+ length == other_placeholder.length
112
+ end
113
+ end
114
+
115
+ class Pair
116
+ attr_reader :text, :i
117
+
118
+ def initialize(i, text = '')
119
+ @i = i
120
+ @text = text
121
+ end
122
+
123
+ def receive_text(str)
124
+ @text << str
125
+ end
126
+
127
+ def type
128
+ raise NotImplementedError
129
+ end
130
+
131
+ def copy
132
+ self.class.new(i, text.dup)
133
+ end
134
+
135
+ def ==(other_pair)
136
+ i == other_pair.i &&
137
+ text == other_pair.text &&
138
+ type == other_pair.type
139
+ end
140
+ end
141
+
142
+ class BeginPair < Pair
143
+ def type
144
+ :begin
145
+ end
146
+ end
147
+
148
+ class EndPair < Pair
149
+ def type
150
+ :end
151
+ end
152
+ end
153
+
154
+ end
@@ -0,0 +1,80 @@
1
+ # encoding: UTF-8
2
+
3
+ module TmxParser
4
+
5
+ class Listener
6
+ include TagNames
7
+
8
+ attr_reader :units, :proc
9
+
10
+ def initialize(&block)
11
+ @stack = []
12
+ @proc = block
13
+ end
14
+
15
+ def unit(tuid, segtype)
16
+ @current_unit = Unit.new(tuid, segtype)
17
+ end
18
+
19
+ def variant(locale)
20
+ variant = Variant.new(locale)
21
+ current_unit.variants << variant
22
+ stack.push(variant)
23
+ end
24
+
25
+ def property(name)
26
+ val = PropertyValue.new
27
+ current_unit.properties[name] = val
28
+ stack.push(val)
29
+ end
30
+
31
+ def text(str)
32
+ if last = stack.last
33
+ last.receive_text(str)
34
+ end
35
+ end
36
+
37
+ def done(tag_name)
38
+ if tag_name == UNIT_TAG
39
+ proc.call(current_unit)
40
+ else
41
+ if tag_name_for(stack.last) == tag_name
42
+ stack.pop
43
+ end
44
+ end
45
+ end
46
+
47
+ def placeholder(type)
48
+ placeholder = Placeholder.new(type)
49
+ current_unit.variants.last.elements << placeholder
50
+ stack.push(placeholder)
51
+ end
52
+
53
+ def begin_paired_tag(i)
54
+ begin_pair = BeginPair.new(i)
55
+ current_unit.variants.last.elements << begin_pair
56
+ stack.push(begin_pair)
57
+ end
58
+
59
+ def end_paired_tag(i)
60
+ end_pair = EndPair.new(i)
61
+ current_unit.variants.last.elements << end_pair
62
+ stack.push(end_pair)
63
+ end
64
+
65
+ private
66
+
67
+ def tag_name_for(obj)
68
+ case obj
69
+ when Variant then VARIANT_TAG
70
+ when PropertyValue then PROPERTY_TAG
71
+ when Placeholder then PLACEHOLDER_TAG
72
+ when BeginPair then BEGIN_PAIRED_TAG
73
+ when EndPair then END_PAIRED_TAG
74
+ end
75
+ end
76
+
77
+ attr_reader :current_unit, :stack
78
+
79
+ end
80
+ end
@@ -0,0 +1,73 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'nokogiri'
4
+
5
+ module TmxParser
6
+
7
+ class SaxDocument < Nokogiri::XML::SAX::Document
8
+ include TagNames
9
+
10
+ attr_reader :listener
11
+
12
+ def initialize(listener)
13
+ @listener = listener
14
+ @capture_stack = [false]
15
+ @text = ''
16
+ end
17
+
18
+ def start_element(name, attrs = [])
19
+ case name
20
+ when UNIT_TAG
21
+ listener.unit(
22
+ get_attr('tuid', attrs), get_attr('segtype', attrs)
23
+ )
24
+ when VARIANT_TAG
25
+ locale = get_attr('xml:lang', attrs)
26
+ listener.variant(locale)
27
+ when SEGMENT_TAG
28
+ capture_text
29
+ when PROPERTY_TAG
30
+ capture_text
31
+ listener.property(get_attr('type', attrs))
32
+ when BEGIN_PAIRED_TAG
33
+ capture_text
34
+ listener.begin_paired_tag(get_attr('i', attrs))
35
+ when END_PAIRED_TAG
36
+ capture_text
37
+ listener.end_paired_tag(get_attr('i', attrs))
38
+ when PLACEHOLDER_TAG
39
+ capture_text
40
+ listener.placeholder(get_attr('type', attrs))
41
+ end
42
+ end
43
+
44
+ def end_element(name)
45
+ @capture_stack.pop
46
+ send_text
47
+ listener.done(name)
48
+ end
49
+
50
+ def characters(str)
51
+ @text += str if @capture_stack.last
52
+ end
53
+
54
+ private
55
+
56
+ def send_text
57
+ listener.text(@text) unless @text.empty?
58
+ @text = ''
59
+ end
60
+
61
+ def capture_text
62
+ send_text
63
+ @capture_stack.push(true)
64
+ end
65
+
66
+ def get_attr(name, attrs)
67
+ if found = attrs.find { |a| a.first == name }
68
+ found.last
69
+ end
70
+ end
71
+ end
72
+
73
+ end
@@ -0,0 +1,13 @@
1
+ # encoding: UTF-8
2
+
3
+ module TmxParser
4
+ module TagNames
5
+ UNIT_TAG = 'tu'
6
+ VARIANT_TAG = 'tuv'
7
+ PROPERTY_TAG = 'prop'
8
+ SEGMENT_TAG = 'seg'
9
+ PLACEHOLDER_TAG = 'ph'
10
+ BEGIN_PAIRED_TAG = 'bpt'
11
+ END_PAIRED_TAG = 'ept'
12
+ end
13
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: UTF-8
2
+
3
+ module TmxParser
4
+ VERSION = '1.1.0'
5
+ end
data/lib/tmx-parser.rb ADDED
@@ -0,0 +1,20 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'nokogiri'
4
+
5
+ module TmxParser
6
+ autoload :Document, 'tmx-parser/document'
7
+ autoload :SaxDocument, 'tmx-parser/sax_document'
8
+ autoload :Listener, 'tmx-parser/listener'
9
+ autoload :TagNames, 'tmx-parser/tag_names'
10
+ autoload :Unit, 'tmx-parser/elements'
11
+ autoload :PropertyValue, 'tmx-parser/elements'
12
+ autoload :Variant, 'tmx-parser/elements'
13
+ autoload :Placeholder, 'tmx-parser/elements'
14
+ autoload :BeginPair, 'tmx-parser/elements'
15
+ autoload :EndPair, 'tmx-parser/elements'
16
+
17
+ def self.load(string_or_file_handle, encoding = Encoding.default_external)
18
+ Document.new(string_or_file_handle, encoding)
19
+ end
20
+ end
@@ -0,0 +1,8 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'pry-nav'
4
+ require 'rspec'
5
+ require 'tmx-parser'
6
+
7
+ RSpec.configure do |config|
8
+ end
@@ -0,0 +1,245 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe TmxParser do
6
+ let(:parser) { TmxParser }
7
+ let(:tuid) { '79b371014a8382a3b6efb86ec6ea97d9' }
8
+
9
+ def find_variant(locale, unit)
10
+ unit.variants.find { |v| v.locale == locale }
11
+ end
12
+
13
+ context 'with a basic tmx document' do
14
+ let(:document) do
15
+ %Q{
16
+ <tmx version="1.4">
17
+ <body>
18
+ <tu tuid="#{tuid}" segtype="block">
19
+ <prop type="x-segment-id">0</prop>
20
+ <prop type="x-some-property">six.hours</prop>
21
+ <tuv xml:lang="en-US"><seg>6 hours</seg></tuv>
22
+ <tuv xml:lang="de-DE"><seg>6 Stunden</seg></tuv>
23
+ </tu>
24
+ </body>
25
+ </tmx>
26
+ }
27
+ end
28
+
29
+ describe '#copy' do
30
+ it 'deep copies the tree' do
31
+ parser.load(document).to_a.tap do |units|
32
+ original_unit = units.first
33
+ unit_copy = original_unit.copy
34
+
35
+ expect(unit_copy.tuid).to eq(original_unit.tuid)
36
+ expect(unit_copy.segtype).to eq(original_unit.segtype)
37
+ expect(unit_copy.variants.size).to eq(original_unit.variants.size)
38
+
39
+ unit_copy.properties.each_pair.with_index do |(key, prop_value_copy), idx|
40
+ original_prop_value = original_unit.properties[key]
41
+ expect(original_prop_value.value).to eq(prop_value_copy.value)
42
+ end
43
+
44
+ unit_copy.variants.each_with_index do |variant_copy, v_idx|
45
+ original_variant = original_unit.variants[v_idx]
46
+ expect(variant_copy.locale).to eq(original_variant.locale)
47
+
48
+ variant_copy.elements.each_with_index do |element_copy, e_idx|
49
+ original_element = original_variant.elements[e_idx]
50
+ expect(element_copy).to be_a(original_element.class)
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ describe '#==' do
58
+ it 'returns true if the objects (even copies) are equivalent' do
59
+ parser.load(document).to_a.tap do |units|
60
+ expect(units.first).to eq(units.first.copy)
61
+ end
62
+ end
63
+
64
+ it 'returns false if the objects are not equivalent' do
65
+ parser.load(document).to_a.tap do |units|
66
+ unit = units.first
67
+ unit_copy = unit.copy
68
+
69
+ unit_copy.tuid.replace('foobar')
70
+ expect(unit).to_not eq(unit_copy)
71
+ end
72
+ end
73
+ end
74
+
75
+ it 'identifies the tuid and segtype' do
76
+ parser.load(document).to_a.tap do |units|
77
+ expect(units.size).to eq(1)
78
+
79
+ units.first.tap do |unit|
80
+ expect(unit.tuid).to eq(tuid)
81
+ expect(unit.segtype).to eq('block')
82
+ end
83
+ end
84
+ end
85
+
86
+ it 'identifies the correct variants' do
87
+ parser.load(document).to_a.first.tap do |unit|
88
+ expect(unit.variants.size).to eq(2)
89
+ expect(find_variant('en-US', unit).elements).to eq(['6 hours'])
90
+ expect(find_variant('de-DE', unit).elements).to eq(['6 Stunden'])
91
+
92
+ unit.variants.each do |variant|
93
+ expect(variant).to be_a(TmxParser::Variant)
94
+ end
95
+ end
96
+ end
97
+
98
+ it 'identifies properties' do
99
+ parser.load(document).to_a.first.tap do |unit|
100
+ expect(unit.properties.size).to eq(2)
101
+ expect(unit.properties).to include('x-segment-id')
102
+ expect(unit.properties).to include('x-some-property')
103
+ expect(unit.properties['x-segment-id'].value).to eq('0')
104
+ expect(unit.properties['x-some-property'].value).to eq('six.hours')
105
+ end
106
+ end
107
+ end
108
+
109
+ context 'with a tmx document that contains a property that makes jruby cry' do
110
+ # For some reason, jruby doesn't like square brackets in property values.
111
+ # See: https://github.com/sparklemotion/nokogiri/issues/1261
112
+
113
+ let(:document) do
114
+ %Q{
115
+ <tmx version="1.4">
116
+ <body>
117
+ <tu tuid="#{tuid}" segtype="block">
118
+ <prop type="x-segment-id">0</prop>
119
+ <prop type="x-some-property">en:#:daily-data:#:[3]:#:times</prop>
120
+ <tuv xml:lang="en-US"><seg>6 hours</seg></tuv>
121
+ <tuv xml:lang="de-DE"><seg>6 Stunden</seg></tuv>
122
+ </tu>
123
+ </body>
124
+ </tmx>
125
+ }
126
+ end
127
+
128
+ it 'identifies the property correctly' do
129
+ parser.load(document).to_a.first.tap do |unit|
130
+ expect(unit.properties).to include('x-some-property')
131
+ expect(unit.properties['x-some-property']).to be_a(TmxParser::PropertyValue)
132
+ expect(unit.properties['x-some-property'].value).to eq(
133
+ 'en:#:daily-data:#:[3]:#:times'
134
+ )
135
+ end
136
+ end
137
+ end
138
+
139
+ context 'with a tmx document that contains placeholders' do
140
+ let(:document) do
141
+ %Q{
142
+ <tmx version="1.4">
143
+ <body>
144
+ <tu tuid="#{tuid}" segtype="block">
145
+ <prop type="x-segment-id">0</prop>
146
+ <tuv xml:lang="en-US">
147
+ <seg><ph type="x-placeholder">{0}</ph> sessions</seg>
148
+ </tuv>
149
+ <tuv xml:lang="de-DE">
150
+ <seg><ph type="x-placeholder">{0}</ph> Einheiten</seg>
151
+ </tuv>
152
+ </tu>
153
+ </body>
154
+ </tmx>
155
+ }
156
+ end
157
+
158
+ it 'identifies the placeholders' do
159
+ parser.load(document).to_a.first.tap do |unit|
160
+ expect(unit.variants.size).to eq(2)
161
+
162
+ find_variant('en-US', unit).tap do |en_variant|
163
+ expect(en_variant.elements.size).to eq(2)
164
+
165
+ en_variant.elements.first.tap do |first_element|
166
+ expect(first_element.type).to eq('x-placeholder')
167
+ expect(first_element.text).to eq('{0}')
168
+ end
169
+
170
+ en_variant.elements.last.tap do |last_element|
171
+ expect(last_element).to be_a(String)
172
+ expect(last_element).to eq(' sessions')
173
+ end
174
+ end
175
+
176
+ find_variant('de-DE', unit).tap do |en_variant|
177
+ expect(en_variant.elements.size).to eq(2)
178
+
179
+ en_variant.elements.first.tap do |first_element|
180
+ expect(first_element).to be_a(TmxParser::Placeholder)
181
+ expect(first_element.type).to eq('x-placeholder')
182
+ expect(first_element.text).to eq('{0}')
183
+ end
184
+
185
+ en_variant.elements.last.tap do |last_element|
186
+ expect(last_element).to be_a(String)
187
+ expect(last_element).to eq(' Einheiten')
188
+ end
189
+ end
190
+ end
191
+ end
192
+ end
193
+
194
+ context 'with a tmx document that contains paired tags' do
195
+ let(:document) do
196
+ %Q{
197
+ <tmx version="1.4">
198
+ <body>
199
+ <tu tuid="#{tuid}" segtype="block">
200
+ <prop type="x-segment-id">0</prop>
201
+ <tuv xml:lang="en-US">
202
+ <seg>Build your healthy habit of daily training with <bpt i="3">&lt;strong&gt;</bpt>email training reminders.<ept i="3">&lt;/strong&gt;</ept></seg>
203
+ </tuv>
204
+ <tuv xml:lang="de-DE">
205
+ <seg><bpt i="3">&lt;strong&gt;</bpt>Mit Erinnerungen per E-Mail<ept i="3">&lt;/strong&gt;</ept> können Sie das tägliche Training zu einer schönen Angewohnheit werden lassen.</seg>
206
+ </tuv>
207
+ </tu>
208
+ </body>
209
+ </tmx>
210
+ }
211
+ end
212
+
213
+ it 'identifies the tags' do
214
+ parser.load(document).to_a.first.tap do |unit|
215
+ expect(unit.variants.size).to eq(2)
216
+
217
+ find_variant('en-US', unit).tap do |en_variant|
218
+ expect(en_variant.elements.size).to eq(4)
219
+
220
+ en_variant.elements[0].tap do |element|
221
+ expect(element).to be_a(String)
222
+ expect(element).to eq('Build your healthy habit of daily training with ')
223
+ end
224
+
225
+ en_variant.elements[1].tap do |element|
226
+ expect(element).to be_a(TmxParser::BeginPair)
227
+ expect(element.i).to eq('3')
228
+ expect(element.text).to eq('<strong>')
229
+ end
230
+
231
+ en_variant.elements[2].tap do |element|
232
+ expect(element).to be_a(String)
233
+ expect(element).to eq('email training reminders.')
234
+ end
235
+
236
+ en_variant.elements[3].tap do |element|
237
+ expect(element).to be_a(TmxParser::EndPair)
238
+ expect(element.i).to eq('3')
239
+ expect(element.text).to eq('</strong>')
240
+ end
241
+ end
242
+ end
243
+ end
244
+ end
245
+ end
@@ -0,0 +1,22 @@
1
+ # encoding: UTF-8
2
+
3
+ $:.unshift File.join(File.dirname(__FILE__), 'lib')
4
+ require 'tmx-parser/version'
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "tmx-parser-2018"
8
+ s.version = ::TmxParser::VERSION
9
+ s.authors = ["Cameron Dutro", "Michiel de Mare"]
10
+ s.email = ["camertron@gmail.com", "michiel@tolq.com"]
11
+ s.homepage = "http://github.com/mdemare"
12
+
13
+ s.description = s.summary = "Parser for the Translation Memory eXchange (.tmx) file format."
14
+
15
+ s.platform = Gem::Platform::RUBY
16
+ s.has_rdoc = true
17
+
18
+ s.require_path = 'lib'
19
+ s.files = Dir["{lib,spec}/**/*", "Gemfile", "History.txt", "README.md", "Rakefile", "tmx-parser.gemspec"]
20
+
21
+ s.add_dependency 'nokogiri', '~> 1.8'
22
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tmx-parser-2018
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Cameron Dutro
8
+ - Michiel de Mare
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2018-02-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.8'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.8'
28
+ description: Parser for the Translation Memory eXchange (.tmx) file format.
29
+ email:
30
+ - camertron@gmail.com
31
+ - michiel@tolq.com
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - Gemfile
37
+ - History.txt
38
+ - README.md
39
+ - Rakefile
40
+ - lib/tmx-parser.rb
41
+ - lib/tmx-parser/document.rb
42
+ - lib/tmx-parser/elements.rb
43
+ - lib/tmx-parser/listener.rb
44
+ - lib/tmx-parser/sax_document.rb
45
+ - lib/tmx-parser/tag_names.rb
46
+ - lib/tmx-parser/version.rb
47
+ - spec/spec_helper.rb
48
+ - spec/tmx-parser_spec.rb
49
+ - tmx-parser.gemspec
50
+ homepage: http://github.com/mdemare
51
+ licenses: []
52
+ metadata: {}
53
+ post_install_message:
54
+ rdoc_options: []
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project:
69
+ rubygems_version: 2.7.4
70
+ signing_key:
71
+ specification_version: 4
72
+ summary: Parser for the Translation Memory eXchange (.tmx) file format.
73
+ test_files: []