tmx-parser 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/History.txt +3 -0
- data/README.md +113 -0
- data/Rakefile +20 -0
- data/lib/tmx-parser.rb +20 -0
- data/lib/tmx-parser/document.rb +26 -0
- data/lib/tmx-parser/elements.rb +85 -0
- data/lib/tmx-parser/listener.rb +80 -0
- data/lib/tmx-parser/sax_document.rb +73 -0
- data/lib/tmx-parser/tag_names.rb +13 -0
- data/lib/tmx-parser/version.rb +5 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/tmx-parser_spec.rb +199 -0
- data/tmx-parser.gemspec +24 -0
- metadata +100 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a95740797f7386113397902ddd8b2293066e1929
|
4
|
+
data.tar.gz: ad8ce82161502d6e3746889cc13e66d11869b0b6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 275c81a33ea76c4cac5e4d17e20d6b5478820991375bb739266ae820bfc52a142a83bc2d0712b823df4b7c8e2e381d6b11d71f2737a9dbd4fef8c65a39880d3c
|
7
|
+
data.tar.gz: 549f44444e7746edb4d3140294d73bfae5cb29d27010103966470777f570e0df6966cc1296e7ff9e62f1f47a24db1fb2361805eaecd8947c32e89b6a2abaab6d
|
data/Gemfile
ADDED
data/History.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
tmx-parser
|
2
|
+
=================
|
3
|
+
|
4
|
+
Parser for the Translation Memory eXchange (.tmx) file format.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
`gem install tmx-parser`
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
require 'tmx-parser'
|
14
|
+
```
|
15
|
+
|
16
|
+
## Functionality
|
17
|
+
|
18
|
+
Got a .tmx file you need to parse? Just use the `TmxParser#load` method. It'll return an enumerable `TmxParser::Document` object for your iterating pleasure:
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
doc = TmxParser.load(File.open('path/to/my.tmx'))
|
22
|
+
doc.each do |unit|
|
23
|
+
...
|
24
|
+
end
|
25
|
+
```
|
26
|
+
|
27
|
+
You can also pass a string to `#load`:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
doc = TmxParser.load(File.read('path/to/my.tmx'))
|
31
|
+
```
|
32
|
+
|
33
|
+
The parser works in a streaming fashion, meaning it tries not to hold the entire source document in memory all at once. It will instead yield each translation unit incrementally.
|
34
|
+
|
35
|
+
## Translation Units
|
36
|
+
|
37
|
+
Translation units are simple Ruby objects that contain properties (tmx `<prop>` elements) and variants (tmx `tuv` elements). You can also retrieve the tuid (translation unit id) and segtype (segment type). Given this document:
|
38
|
+
|
39
|
+
```xml
|
40
|
+
<tmx version="1.4">
|
41
|
+
<body>
|
42
|
+
<tu tuid="79b371014a8382a3b6efb86ec6ea97d9" segtype="block">
|
43
|
+
<prop type="x-segment-id">0</prop>
|
44
|
+
<prop type="x-some-property">six.hours</prop>
|
45
|
+
<tuv xml:lang="en-US"><seg>6 hours</seg></tuv>
|
46
|
+
<tuv xml:lang="de-DE"><seg>6 Stunden</seg></tuv>
|
47
|
+
</tu>
|
48
|
+
</body>
|
49
|
+
</tmx>
|
50
|
+
```
|
51
|
+
|
52
|
+
Here's what you can do:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
doc.each do |unit|
|
56
|
+
unit.tuid # => "79b371014a8382a3b6efb86ec6ea97d9"
|
57
|
+
unit.segtype # => "block"
|
58
|
+
|
59
|
+
unit.properties.keys # => ["x-segment-id", "x-some-property"]
|
60
|
+
unit.properties['x-segment-id'].value # => "0"
|
61
|
+
|
62
|
+
variant = unit.variants.first
|
63
|
+
variant.locale # => "en-US"
|
64
|
+
variant.elements # => ["6 hours"]
|
65
|
+
end
|
66
|
+
```
|
67
|
+
|
68
|
+
## Placeholders
|
69
|
+
|
70
|
+
Let's consider a different document:
|
71
|
+
|
72
|
+
```xml
|
73
|
+
<tmx version="1.4">
|
74
|
+
<body>
|
75
|
+
<tu tuid="#{tuid}" segtype="block">
|
76
|
+
<prop type="x-segment-id">0</prop>
|
77
|
+
<tuv xml:lang="en-US">
|
78
|
+
<seg><ph type="x-placeholder">{0}</ph> sessions</seg>
|
79
|
+
</tuv>
|
80
|
+
<tuv xml:lang="de-DE">
|
81
|
+
<seg><ph type="x-placeholder">{0}</ph> Einheiten</seg>
|
82
|
+
</tuv>
|
83
|
+
</tu>
|
84
|
+
</body>
|
85
|
+
</tmx>
|
86
|
+
```
|
87
|
+
|
88
|
+
The placeholders will be added to the variant's `elements` array:
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
doc.each do |unit|
|
92
|
+
variant = unit.variants.first
|
93
|
+
variant.elements # => ["#<TmxParser::Placeholder:0x5ad5be4a @text="{0}", @type="x-placeholder">", " sessions"]
|
94
|
+
end
|
95
|
+
```
|
96
|
+
|
97
|
+
Begin paired tags (tmx `bpt` elements) and end paired tags (tmx `ept` elements) are handled the same way.
|
98
|
+
|
99
|
+
## See Also
|
100
|
+
|
101
|
+
* TMX file format: [http://www.gala-global.org/oscarStandards/tmx/tmx14b.html](http://www.gala-global.org/oscarStandards/tmx/tmx14b.html)
|
102
|
+
|
103
|
+
## Requirements
|
104
|
+
|
105
|
+
No external requirements.
|
106
|
+
|
107
|
+
## Running Tests
|
108
|
+
|
109
|
+
`bundle exec rspec` should do the trick :)
|
110
|
+
|
111
|
+
## Authors
|
112
|
+
|
113
|
+
* Cameron C. Dutro: http://github.com/camertron
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
4
|
+
|
5
|
+
require 'rubygems' unless ENV['NO_RUBYGEMS']
|
6
|
+
|
7
|
+
require 'bundler'
|
8
|
+
require 'rspec/core/rake_task'
|
9
|
+
require 'rubygems/package_task'
|
10
|
+
|
11
|
+
require 'tmx-parser'
|
12
|
+
|
13
|
+
Bundler::GemHelper.install_tasks
|
14
|
+
|
15
|
+
task :default => :spec
|
16
|
+
|
17
|
+
desc 'Run specs'
|
18
|
+
RSpec::Core::RakeTask.new do |t|
|
19
|
+
t.pattern = './spec/**/*_spec.rb'
|
20
|
+
end
|
data/lib/tmx-parser.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module TmxParser
|
6
|
+
autoload :Document, 'tmx-parser/document'
|
7
|
+
autoload :SaxDocument, 'tmx-parser/sax_document'
|
8
|
+
autoload :Listener, 'tmx-parser/listener'
|
9
|
+
autoload :TagNames, 'tmx-parser/tag_names'
|
10
|
+
autoload :Unit, 'tmx-parser/elements'
|
11
|
+
autoload :PropertyValue, 'tmx-parser/elements'
|
12
|
+
autoload :Variant, 'tmx-parser/elements'
|
13
|
+
autoload :Placeholder, 'tmx-parser/elements'
|
14
|
+
autoload :BeginPair, 'tmx-parser/elements'
|
15
|
+
autoload :EndPair, 'tmx-parser/elements'
|
16
|
+
|
17
|
+
def self.load(string_or_file_handle)
|
18
|
+
Document.new(string_or_file_handle)
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module TmxParser
|
4
|
+
class Document
|
5
|
+
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
attr_reader :string_or_file_handle
|
9
|
+
|
10
|
+
def initialize(string_or_file_handle)
|
11
|
+
@string_or_file_handle = string_or_file_handle
|
12
|
+
end
|
13
|
+
|
14
|
+
def each(&block)
|
15
|
+
if block_given?
|
16
|
+
listener = Listener.new(&block)
|
17
|
+
document = SaxDocument.new(listener)
|
18
|
+
parser = Nokogiri::XML::SAX::Parser.new(document)
|
19
|
+
parser.parse(string_or_file_handle)
|
20
|
+
else
|
21
|
+
to_enum(__method__)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module TmxParser
|
4
|
+
|
5
|
+
class Unit
|
6
|
+
attr_reader :tuid, :segtype, :properties, :variants
|
7
|
+
|
8
|
+
def initialize(tuid, segtype)
|
9
|
+
@tuid = tuid
|
10
|
+
@segtype = segtype
|
11
|
+
@properties = {}
|
12
|
+
@variants = []
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class PropertyValue
|
17
|
+
attr_accessor :value
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
@value = ''
|
21
|
+
end
|
22
|
+
|
23
|
+
def receive_text(str)
|
24
|
+
@value << str
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class Variant
|
29
|
+
attr_reader :locale
|
30
|
+
attr_accessor :elements
|
31
|
+
|
32
|
+
def initialize(locale)
|
33
|
+
@locale = locale
|
34
|
+
@elements = []
|
35
|
+
end
|
36
|
+
|
37
|
+
def receive_text(str)
|
38
|
+
@elements << str
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
class Placeholder
|
43
|
+
attr_reader :type, :text
|
44
|
+
attr_accessor :start, :length
|
45
|
+
|
46
|
+
def initialize(type)
|
47
|
+
@type = type
|
48
|
+
@text = ''
|
49
|
+
end
|
50
|
+
|
51
|
+
def receive_text(str)
|
52
|
+
@text << str
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Pair
|
57
|
+
attr_reader :text, :i
|
58
|
+
|
59
|
+
def initialize(i)
|
60
|
+
@i = i
|
61
|
+
@text = ''
|
62
|
+
end
|
63
|
+
|
64
|
+
def receive_text(str)
|
65
|
+
@text << str
|
66
|
+
end
|
67
|
+
|
68
|
+
def type
|
69
|
+
raise NotImplementedError
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
class BeginPair < Pair
|
74
|
+
def type
|
75
|
+
:begin
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class EndPair < Pair
|
80
|
+
def type
|
81
|
+
:end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module TmxParser
|
4
|
+
|
5
|
+
class Listener
|
6
|
+
include TagNames
|
7
|
+
|
8
|
+
attr_reader :units, :proc
|
9
|
+
|
10
|
+
def initialize(&block)
|
11
|
+
@stack = []
|
12
|
+
@proc = block
|
13
|
+
end
|
14
|
+
|
15
|
+
def unit(tuid, segtype)
|
16
|
+
@current_unit = Unit.new(tuid, segtype)
|
17
|
+
end
|
18
|
+
|
19
|
+
def variant(locale)
|
20
|
+
variant = Variant.new(locale)
|
21
|
+
current_unit.variants << variant
|
22
|
+
stack.push(variant)
|
23
|
+
end
|
24
|
+
|
25
|
+
def property(name)
|
26
|
+
val = PropertyValue.new
|
27
|
+
current_unit.properties[name] = val
|
28
|
+
stack.push(val)
|
29
|
+
end
|
30
|
+
|
31
|
+
def text(str)
|
32
|
+
if last = stack.last
|
33
|
+
last.receive_text(str)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def done(tag_name)
|
38
|
+
if tag_name == UNIT_TAG
|
39
|
+
proc.call(current_unit)
|
40
|
+
else
|
41
|
+
if tag_name_for(stack.last) == tag_name
|
42
|
+
stack.pop
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def placeholder(type)
|
48
|
+
placeholder = Placeholder.new(type)
|
49
|
+
current_unit.variants.last.elements << placeholder
|
50
|
+
stack.push(placeholder)
|
51
|
+
end
|
52
|
+
|
53
|
+
def begin_paired_tag(i)
|
54
|
+
begin_pair = BeginPair.new(i)
|
55
|
+
current_unit.variants.last.elements << begin_pair
|
56
|
+
stack.push(begin_pair)
|
57
|
+
end
|
58
|
+
|
59
|
+
def end_paired_tag(i)
|
60
|
+
end_pair = EndPair.new(i)
|
61
|
+
current_unit.variants.last.elements << end_pair
|
62
|
+
stack.push(end_pair)
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def tag_name_for(obj)
|
68
|
+
case obj
|
69
|
+
when Variant then VARIANT_TAG
|
70
|
+
when PropertyValue then PROPERTY_TAG
|
71
|
+
when Placeholder then PLACEHOLDER_TAG
|
72
|
+
when BeginPair then BEGIN_PAIRED_TAG
|
73
|
+
when EndPair then END_PAIRED_TAG
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
attr_reader :current_unit, :stack
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module TmxParser
|
6
|
+
|
7
|
+
class SaxDocument < Nokogiri::XML::SAX::Document
|
8
|
+
include TagNames
|
9
|
+
|
10
|
+
attr_reader :listener
|
11
|
+
|
12
|
+
def initialize(listener)
|
13
|
+
@listener = listener
|
14
|
+
@capture_stack = [false]
|
15
|
+
@text = ''
|
16
|
+
end
|
17
|
+
|
18
|
+
def start_element(name, attrs = [])
|
19
|
+
case name
|
20
|
+
when UNIT_TAG
|
21
|
+
listener.unit(
|
22
|
+
get_attr('tuid', attrs), get_attr('segtype', attrs)
|
23
|
+
)
|
24
|
+
when VARIANT_TAG
|
25
|
+
locale = get_attr('xml:lang', attrs)
|
26
|
+
listener.variant(locale)
|
27
|
+
when SEGMENT_TAG
|
28
|
+
capture_text
|
29
|
+
when PROPERTY_TAG
|
30
|
+
capture_text
|
31
|
+
listener.property(get_attr('type', attrs))
|
32
|
+
when BEGIN_PAIRED_TAG
|
33
|
+
capture_text
|
34
|
+
listener.begin_paired_tag(get_attr('i', attrs))
|
35
|
+
when END_PAIRED_TAG
|
36
|
+
capture_text
|
37
|
+
listener.end_paired_tag(get_attr('i', attrs))
|
38
|
+
when PLACEHOLDER_TAG
|
39
|
+
capture_text
|
40
|
+
listener.placeholder(get_attr('type', attrs))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def end_element(name)
|
45
|
+
@capture_stack.pop
|
46
|
+
send_text
|
47
|
+
listener.done(name)
|
48
|
+
end
|
49
|
+
|
50
|
+
def characters(str)
|
51
|
+
@text += str if @capture_stack.last
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def send_text
|
57
|
+
listener.text(@text) unless @text.empty?
|
58
|
+
@text = ''
|
59
|
+
end
|
60
|
+
|
61
|
+
def capture_text
|
62
|
+
send_text
|
63
|
+
@capture_stack.push(true)
|
64
|
+
end
|
65
|
+
|
66
|
+
def get_attr(name, attrs)
|
67
|
+
if found = attrs.find { |a| a.first == name }
|
68
|
+
found.last
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe TmxParser do
|
6
|
+
let(:parser) { TmxParser }
|
7
|
+
let(:tuid) { '79b371014a8382a3b6efb86ec6ea97d9' }
|
8
|
+
|
9
|
+
def find_variant(locale, unit)
|
10
|
+
unit.variants.find { |v| v.locale == locale }
|
11
|
+
end
|
12
|
+
|
13
|
+
context 'with a basic tmx document' do
|
14
|
+
let(:document) do
|
15
|
+
%Q{
|
16
|
+
<tmx version="1.4">
|
17
|
+
<body>
|
18
|
+
<tu tuid="#{tuid}" segtype="block">
|
19
|
+
<prop type="x-segment-id">0</prop>
|
20
|
+
<prop type="x-some-property">six.hours</prop>
|
21
|
+
<tuv xml:lang="en-US"><seg>6 hours</seg></tuv>
|
22
|
+
<tuv xml:lang="de-DE"><seg>6 Stunden</seg></tuv>
|
23
|
+
</tu>
|
24
|
+
</body>
|
25
|
+
</tmx>
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'identifies the tuid and segtype' do
|
30
|
+
parser.load(document).to_a.tap do |units|
|
31
|
+
expect(units.size).to eq(1)
|
32
|
+
|
33
|
+
units.first.tap do |unit|
|
34
|
+
expect(unit.tuid).to eq(tuid)
|
35
|
+
expect(unit.segtype).to eq('block')
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'identifies the correct variants' do
|
41
|
+
parser.load(document).to_a.first.tap do |unit|
|
42
|
+
expect(unit.variants.size).to eq(2)
|
43
|
+
expect(find_variant('en-US', unit).elements).to eq(['6 hours'])
|
44
|
+
expect(find_variant('de-DE', unit).elements).to eq(['6 Stunden'])
|
45
|
+
|
46
|
+
unit.variants.each do |variant|
|
47
|
+
expect(variant).to be_a(TmxParser::Variant)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'identifies properties' do
|
53
|
+
parser.load(document).to_a.first.tap do |unit|
|
54
|
+
expect(unit.properties.size).to eq(2)
|
55
|
+
expect(unit.properties).to include('x-segment-id')
|
56
|
+
expect(unit.properties).to include('x-some-property')
|
57
|
+
expect(unit.properties['x-segment-id'].value).to eq('0')
|
58
|
+
expect(unit.properties['x-some-property'].value).to eq('six.hours')
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
context 'with a tmx document that contains a property that makes jruby cry' do
|
64
|
+
# For some reason, jruby doesn't like square brackets in property values.
|
65
|
+
# See: https://github.com/sparklemotion/nokogiri/issues/1261
|
66
|
+
|
67
|
+
let(:document) do
|
68
|
+
%Q{
|
69
|
+
<tmx version="1.4">
|
70
|
+
<body>
|
71
|
+
<tu tuid="#{tuid}" segtype="block">
|
72
|
+
<prop type="x-segment-id">0</prop>
|
73
|
+
<prop type="x-some-property">en:#:daily-data:#:[3]:#:times</prop>
|
74
|
+
<tuv xml:lang="en-US"><seg>6 hours</seg></tuv>
|
75
|
+
<tuv xml:lang="de-DE"><seg>6 Stunden</seg></tuv>
|
76
|
+
</tu>
|
77
|
+
</body>
|
78
|
+
</tmx>
|
79
|
+
}
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'identifies the property correctly' do
|
83
|
+
parser.load(document).to_a.first.tap do |unit|
|
84
|
+
expect(unit.properties).to include('x-some-property')
|
85
|
+
expect(unit.properties['x-some-property']).to be_a(TmxParser::PropertyValue)
|
86
|
+
expect(unit.properties['x-some-property'].value).to eq(
|
87
|
+
'en:#:daily-data:#:[3]:#:times'
|
88
|
+
)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
context 'with a tmx document that contains placeholders' do
|
94
|
+
let(:document) do
|
95
|
+
%Q{
|
96
|
+
<tmx version="1.4">
|
97
|
+
<body>
|
98
|
+
<tu tuid="#{tuid}" segtype="block">
|
99
|
+
<prop type="x-segment-id">0</prop>
|
100
|
+
<tuv xml:lang="en-US">
|
101
|
+
<seg><ph type="x-placeholder">{0}</ph> sessions</seg>
|
102
|
+
</tuv>
|
103
|
+
<tuv xml:lang="de-DE">
|
104
|
+
<seg><ph type="x-placeholder">{0}</ph> Einheiten</seg>
|
105
|
+
</tuv>
|
106
|
+
</tu>
|
107
|
+
</body>
|
108
|
+
</tmx>
|
109
|
+
}
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'identifies the placeholders' do
|
113
|
+
parser.load(document).to_a.first.tap do |unit|
|
114
|
+
expect(unit.variants.size).to eq(2)
|
115
|
+
|
116
|
+
find_variant('en-US', unit).tap do |en_variant|
|
117
|
+
expect(en_variant.elements.size).to eq(2)
|
118
|
+
|
119
|
+
en_variant.elements.first.tap do |first_element|
|
120
|
+
expect(first_element.type).to eq('x-placeholder')
|
121
|
+
expect(first_element.text).to eq('{0}')
|
122
|
+
end
|
123
|
+
|
124
|
+
en_variant.elements.last.tap do |last_element|
|
125
|
+
expect(last_element).to be_a(String)
|
126
|
+
expect(last_element).to eq(' sessions')
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
find_variant('de-DE', unit).tap do |en_variant|
|
131
|
+
expect(en_variant.elements.size).to eq(2)
|
132
|
+
|
133
|
+
en_variant.elements.first.tap do |first_element|
|
134
|
+
expect(first_element).to be_a(TmxParser::Placeholder)
|
135
|
+
expect(first_element.type).to eq('x-placeholder')
|
136
|
+
expect(first_element.text).to eq('{0}')
|
137
|
+
end
|
138
|
+
|
139
|
+
en_variant.elements.last.tap do |last_element|
|
140
|
+
expect(last_element).to be_a(String)
|
141
|
+
expect(last_element).to eq(' Einheiten')
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
context 'with a tmx document that contains paired tags' do
|
149
|
+
let(:document) do
|
150
|
+
%Q{
|
151
|
+
<tmx version="1.4">
|
152
|
+
<body>
|
153
|
+
<tu tuid="#{tuid}" segtype="block">
|
154
|
+
<prop type="x-segment-id">0</prop>
|
155
|
+
<tuv xml:lang="en-US">
|
156
|
+
<seg>Build your healthy habit of daily training with <bpt i="3"><strong></bpt>email training reminders.<ept i="3"></strong></ept></seg>
|
157
|
+
</tuv>
|
158
|
+
<tuv xml:lang="de-DE">
|
159
|
+
<seg><bpt i="3"><strong></bpt>Mit Erinnerungen per E-Mail<ept i="3"></strong></ept> können Sie das tägliche Training zu einer schönen Angewohnheit werden lassen.</seg>
|
160
|
+
</tuv>
|
161
|
+
</tu>
|
162
|
+
</body>
|
163
|
+
</tmx>
|
164
|
+
}
|
165
|
+
end
|
166
|
+
|
167
|
+
it 'identifies the tags' do
|
168
|
+
parser.load(document).to_a.first.tap do |unit|
|
169
|
+
expect(unit.variants.size).to eq(2)
|
170
|
+
|
171
|
+
find_variant('en-US', unit).tap do |en_variant|
|
172
|
+
expect(en_variant.elements.size).to eq(4)
|
173
|
+
|
174
|
+
en_variant.elements[0].tap do |element|
|
175
|
+
expect(element).to be_a(String)
|
176
|
+
expect(element).to eq('Build your healthy habit of daily training with ')
|
177
|
+
end
|
178
|
+
|
179
|
+
en_variant.elements[1].tap do |element|
|
180
|
+
expect(element).to be_a(TmxParser::BeginPair)
|
181
|
+
expect(element.i).to eq('3')
|
182
|
+
expect(element.text).to eq('<strong>')
|
183
|
+
end
|
184
|
+
|
185
|
+
en_variant.elements[2].tap do |element|
|
186
|
+
expect(element).to be_a(String)
|
187
|
+
expect(element).to eq('email training reminders.')
|
188
|
+
end
|
189
|
+
|
190
|
+
en_variant.elements[3].tap do |element|
|
191
|
+
expect(element).to be_a(TmxParser::EndPair)
|
192
|
+
expect(element.i).to eq('3')
|
193
|
+
expect(element.text).to eq('</strong>')
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
data/tmx-parser.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
4
|
+
require 'tmx-parser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "tmx-parser"
|
8
|
+
s.version = ::TmxParser::VERSION
|
9
|
+
s.authors = ["Cameron Dutro"]
|
10
|
+
s.email = ["camertron@gmail.com"]
|
11
|
+
s.homepage = "http://github.com/camertron"
|
12
|
+
|
13
|
+
s.description = s.summary = "Parser for the Translation Memory eXchange (.tmx) file format."
|
14
|
+
|
15
|
+
s.platform = Gem::Platform::RUBY
|
16
|
+
s.has_rdoc = true
|
17
|
+
|
18
|
+
s.require_path = 'lib'
|
19
|
+
s.files = Dir["{lib,spec}/**/*", "Gemfile", "History.txt", "README.md", "Rakefile", "tmx-parser.gemspec"]
|
20
|
+
|
21
|
+
s.add_dependency 'nokogiri', '~> 1.6.0'
|
22
|
+
s.add_development_dependency 'pry-nav', '~> 0.2.0'
|
23
|
+
s.add_development_dependency 'rspec', '~> 3.2.0'
|
24
|
+
end
|
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tmx-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Cameron Dutro
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-03-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.6.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.6.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pry-nav
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.2.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.2.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 3.2.0
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 3.2.0
|
55
|
+
description: Parser for the Translation Memory eXchange (.tmx) file format.
|
56
|
+
email:
|
57
|
+
- camertron@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- Gemfile
|
63
|
+
- History.txt
|
64
|
+
- README.md
|
65
|
+
- Rakefile
|
66
|
+
- lib/tmx-parser.rb
|
67
|
+
- lib/tmx-parser/document.rb
|
68
|
+
- lib/tmx-parser/elements.rb
|
69
|
+
- lib/tmx-parser/listener.rb
|
70
|
+
- lib/tmx-parser/sax_document.rb
|
71
|
+
- lib/tmx-parser/tag_names.rb
|
72
|
+
- lib/tmx-parser/version.rb
|
73
|
+
- spec/spec_helper.rb
|
74
|
+
- spec/tmx-parser_spec.rb
|
75
|
+
- tmx-parser.gemspec
|
76
|
+
homepage: http://github.com/camertron
|
77
|
+
licenses: []
|
78
|
+
metadata: {}
|
79
|
+
post_install_message:
|
80
|
+
rdoc_options: []
|
81
|
+
require_paths:
|
82
|
+
- lib
|
83
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
requirements: []
|
94
|
+
rubyforge_project:
|
95
|
+
rubygems_version: 2.4.6
|
96
|
+
signing_key:
|
97
|
+
specification_version: 4
|
98
|
+
summary: Parser for the Translation Memory eXchange (.tmx) file format.
|
99
|
+
test_files: []
|
100
|
+
has_rdoc: true
|