tmx-parser-2018 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/History.txt +11 -0
- data/README.md +113 -0
- data/Rakefile +20 -0
- data/lib/tmx-parser/document.rb +27 -0
- data/lib/tmx-parser/elements.rb +154 -0
- data/lib/tmx-parser/listener.rb +80 -0
- data/lib/tmx-parser/sax_document.rb +73 -0
- data/lib/tmx-parser/tag_names.rb +13 -0
- data/lib/tmx-parser/version.rb +5 -0
- data/lib/tmx-parser.rb +20 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/tmx-parser_spec.rb +245 -0
- data/tmx-parser.gemspec +22 -0
- metadata +73 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9b35d164435ff250f53443f41cde51f9da1128a937f8be81e2ee87506660d291
|
4
|
+
data.tar.gz: c484c6452c02ba983a73fd5864112f1c612bcd1727cdece552d3ba00f85b7e27
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 438d30db65dea8693a023b91f9d64cf1489bdd3e352a93bc19b3d968d667c642bc6d40453e6d02ee7e7faac33e62f97adae4e23eb0cf62d45709a491a7702201
|
7
|
+
data.tar.gz: b8fa9c0e7e658a3e8d8656b3fe36dbe3ec3ff199a67e6000893b27a48beff2ed1d47b46d837bc0b52773c931d41e93dc354e7ed8192c18f557db130e65650e17
|
data/Gemfile
ADDED
data/History.txt
ADDED
data/README.md
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
tmx-parser
|
2
|
+
=================
|
3
|
+
|
4
|
+
Parser for the Translation Memory eXchange (.tmx) file format.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
`gem install tmx-parser`
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
require 'tmx-parser'
|
14
|
+
```
|
15
|
+
|
16
|
+
## Functionality
|
17
|
+
|
18
|
+
Got a .tmx file you need to parse? Just use the `TmxParser#load` method. It'll return an enumerable `TmxParser::Document` object for your iterating pleasure:
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
doc = TmxParser.load(File.open('path/to/my.tmx'))
|
22
|
+
doc.each do |unit|
|
23
|
+
...
|
24
|
+
end
|
25
|
+
```
|
26
|
+
|
27
|
+
You can also pass a string to `#load`:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
doc = TmxParser.load(File.read('path/to/my.tmx'))
|
31
|
+
```
|
32
|
+
|
33
|
+
The parser works in a streaming fashion, meaning it tries not to hold the entire source document in memory all at once. It will instead yield each translation unit incrementally.
|
34
|
+
|
35
|
+
## Translation Units
|
36
|
+
|
37
|
+
Translation units are simple Ruby objects that contain properties (tmx `<prop>` elements) and variants (tmx `tuv` elements). You can also retrieve the tuid (translation unit id) and segtype (segment type). Given this document:
|
38
|
+
|
39
|
+
```xml
|
40
|
+
<tmx version="1.4">
|
41
|
+
<body>
|
42
|
+
<tu tuid="79b371014a8382a3b6efb86ec6ea97d9" segtype="block">
|
43
|
+
<prop type="x-segment-id">0</prop>
|
44
|
+
<prop type="x-some-property">six.hours</prop>
|
45
|
+
<tuv xml:lang="en-US"><seg>6 hours</seg></tuv>
|
46
|
+
<tuv xml:lang="de-DE"><seg>6 Stunden</seg></tuv>
|
47
|
+
</tu>
|
48
|
+
</body>
|
49
|
+
</tmx>
|
50
|
+
```
|
51
|
+
|
52
|
+
Here's what you can do:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
doc.each do |unit|
|
56
|
+
unit.tuid # => "79b371014a8382a3b6efb86ec6ea97d9"
|
57
|
+
unit.segtype # => "block"
|
58
|
+
|
59
|
+
unit.properties.keys # => ["x-segment-id", "x-some-property"]
|
60
|
+
unit.properties['x-segment-id'].value # => "0"
|
61
|
+
|
62
|
+
variant = unit.variants.first
|
63
|
+
variant.locale # => "en-US"
|
64
|
+
variant.elements # => ["6 hours"]
|
65
|
+
end
|
66
|
+
```
|
67
|
+
|
68
|
+
## Placeholders
|
69
|
+
|
70
|
+
Let's consider a different document:
|
71
|
+
|
72
|
+
```xml
|
73
|
+
<tmx version="1.4">
|
74
|
+
<body>
|
75
|
+
<tu tuid="#{tuid}" segtype="block">
|
76
|
+
<prop type="x-segment-id">0</prop>
|
77
|
+
<tuv xml:lang="en-US">
|
78
|
+
<seg><ph type="x-placeholder">{0}</ph> sessions</seg>
|
79
|
+
</tuv>
|
80
|
+
<tuv xml:lang="de-DE">
|
81
|
+
<seg><ph type="x-placeholder">{0}</ph> Einheiten</seg>
|
82
|
+
</tuv>
|
83
|
+
</tu>
|
84
|
+
</body>
|
85
|
+
</tmx>
|
86
|
+
```
|
87
|
+
|
88
|
+
The placeholders will be added to the variant's `elements` array:
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
doc.each do |unit|
|
92
|
+
variant = unit.variants.first
|
93
|
+
variant.elements # => ["#<TmxParser::Placeholder:0x5ad5be4a @text="{0}", @type="x-placeholder">", " sessions"]
|
94
|
+
end
|
95
|
+
```
|
96
|
+
|
97
|
+
Begin paired tags (tmx `bpt` elements) and end paired tags (tmx `ept` elements) are handled the same way.
|
98
|
+
|
99
|
+
## See Also
|
100
|
+
|
101
|
+
* TMX file format: [http://www.gala-global.org/oscarStandards/tmx/tmx14b.html](http://www.gala-global.org/oscarStandards/tmx/tmx14b.html)
|
102
|
+
|
103
|
+
## Requirements
|
104
|
+
|
105
|
+
No external requirements.
|
106
|
+
|
107
|
+
## Running Tests
|
108
|
+
|
109
|
+
`bundle exec rspec` should do the trick :)
|
110
|
+
|
111
|
+
## Authors
|
112
|
+
|
113
|
+
* Cameron C. Dutro: http://github.com/camertron
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
4
|
+
|
5
|
+
require 'rubygems' unless ENV['NO_RUBYGEMS']
|
6
|
+
|
7
|
+
require 'bundler'
|
8
|
+
require 'rspec/core/rake_task'
|
9
|
+
require 'rubygems/package_task'
|
10
|
+
|
11
|
+
require 'tmx-parser'
|
12
|
+
|
13
|
+
Bundler::GemHelper.install_tasks
|
14
|
+
|
15
|
+
task :default => :spec
|
16
|
+
|
17
|
+
desc 'Run specs'
|
18
|
+
RSpec::Core::RakeTask.new do |t|
|
19
|
+
t.pattern = './spec/**/*_spec.rb'
|
20
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module TmxParser
|
4
|
+
class Document
|
5
|
+
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
attr_reader :string_or_file_handle, :encoding
|
9
|
+
|
10
|
+
def initialize(string_or_file_handle, encoding = Encoding.default_external)
|
11
|
+
@string_or_file_handle = string_or_file_handle
|
12
|
+
@encoding = encoding
|
13
|
+
end
|
14
|
+
|
15
|
+
def each(&block)
|
16
|
+
if block_given?
|
17
|
+
listener = Listener.new(&block)
|
18
|
+
document = SaxDocument.new(listener)
|
19
|
+
parser = Nokogiri::XML::SAX::Parser.new(document, encoding.to_s)
|
20
|
+
parser.parse(string_or_file_handle)
|
21
|
+
else
|
22
|
+
to_enum(__method__)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module TmxParser
|
4
|
+
|
5
|
+
class Unit
|
6
|
+
attr_reader :tuid, :segtype, :properties, :variants
|
7
|
+
|
8
|
+
def initialize(tuid, segtype)
|
9
|
+
@tuid = tuid
|
10
|
+
@segtype = segtype
|
11
|
+
@properties = {}
|
12
|
+
@variants = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def copy
|
16
|
+
self.class.new(tuid.dup, segtype.dup).tap do |new_unit|
|
17
|
+
new_unit.variants.concat(variants.map(&:copy))
|
18
|
+
properties.each do |key, property_value|
|
19
|
+
new_unit.properties[key] = property_value.copy
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def ==(other_unit)
|
25
|
+
tuid == other_unit.tuid &&
|
26
|
+
segtype == other_unit.segtype &&
|
27
|
+
variants.each_with_index.all? do |v, idx|
|
28
|
+
other_unit.variants[idx] == v
|
29
|
+
end &&
|
30
|
+
properties.each_with_index.all? do |(key, prop_val), idx|
|
31
|
+
other_unit.properties[key] == prop_val
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class PropertyValue
|
37
|
+
attr_accessor :value
|
38
|
+
|
39
|
+
def initialize(init_value = '')
|
40
|
+
@value = init_value
|
41
|
+
end
|
42
|
+
|
43
|
+
def receive_text(str)
|
44
|
+
@value << str
|
45
|
+
end
|
46
|
+
|
47
|
+
def copy
|
48
|
+
self.class.new(value.dup)
|
49
|
+
end
|
50
|
+
|
51
|
+
def ==(other_property_value)
|
52
|
+
value == other_property_value.value
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class Variant
|
57
|
+
attr_reader :locale
|
58
|
+
attr_accessor :elements
|
59
|
+
|
60
|
+
def initialize(locale)
|
61
|
+
@locale = locale
|
62
|
+
@elements = []
|
63
|
+
end
|
64
|
+
|
65
|
+
def receive_text(str)
|
66
|
+
@elements << str
|
67
|
+
end
|
68
|
+
|
69
|
+
def copy
|
70
|
+
self.class.new(locale.dup).tap do |new_variant|
|
71
|
+
new_variant.elements.concat(
|
72
|
+
elements.map do |element|
|
73
|
+
element.respond_to?(:copy) ? element.copy : element.dup
|
74
|
+
end
|
75
|
+
)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def ==(other_variant)
|
80
|
+
locale == locale &&
|
81
|
+
elements.each_with_index.all? do |element, idx|
|
82
|
+
other_variant.elements[idx] == element
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class Placeholder
|
88
|
+
attr_reader :type, :text
|
89
|
+
attr_accessor :start, :length
|
90
|
+
|
91
|
+
def initialize(type, text = '')
|
92
|
+
@type = type
|
93
|
+
@text = text
|
94
|
+
end
|
95
|
+
|
96
|
+
def receive_text(str)
|
97
|
+
@text << str
|
98
|
+
end
|
99
|
+
|
100
|
+
def copy
|
101
|
+
self.class.new(type.dup, text.dup).tap do |new_placeholder|
|
102
|
+
new_placeholder.start = start # can't dup fixnums
|
103
|
+
new_placeholder.length = length
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def ==(other_placeholder)
|
108
|
+
type == other_placeholder.type &&
|
109
|
+
text == other_placeholder.type &&
|
110
|
+
start == other_placeholder.start &&
|
111
|
+
length == other_placeholder.length
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
class Pair
|
116
|
+
attr_reader :text, :i
|
117
|
+
|
118
|
+
def initialize(i, text = '')
|
119
|
+
@i = i
|
120
|
+
@text = text
|
121
|
+
end
|
122
|
+
|
123
|
+
def receive_text(str)
|
124
|
+
@text << str
|
125
|
+
end
|
126
|
+
|
127
|
+
def type
|
128
|
+
raise NotImplementedError
|
129
|
+
end
|
130
|
+
|
131
|
+
def copy
|
132
|
+
self.class.new(i, text.dup)
|
133
|
+
end
|
134
|
+
|
135
|
+
def ==(other_pair)
|
136
|
+
i == other_pair.i &&
|
137
|
+
text == other_pair.text &&
|
138
|
+
type == other_pair.type
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
class BeginPair < Pair
|
143
|
+
def type
|
144
|
+
:begin
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
class EndPair < Pair
|
149
|
+
def type
|
150
|
+
:end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module TmxParser
|
4
|
+
|
5
|
+
class Listener
|
6
|
+
include TagNames
|
7
|
+
|
8
|
+
attr_reader :units, :proc
|
9
|
+
|
10
|
+
def initialize(&block)
|
11
|
+
@stack = []
|
12
|
+
@proc = block
|
13
|
+
end
|
14
|
+
|
15
|
+
def unit(tuid, segtype)
|
16
|
+
@current_unit = Unit.new(tuid, segtype)
|
17
|
+
end
|
18
|
+
|
19
|
+
def variant(locale)
|
20
|
+
variant = Variant.new(locale)
|
21
|
+
current_unit.variants << variant
|
22
|
+
stack.push(variant)
|
23
|
+
end
|
24
|
+
|
25
|
+
def property(name)
|
26
|
+
val = PropertyValue.new
|
27
|
+
current_unit.properties[name] = val
|
28
|
+
stack.push(val)
|
29
|
+
end
|
30
|
+
|
31
|
+
def text(str)
|
32
|
+
if last = stack.last
|
33
|
+
last.receive_text(str)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def done(tag_name)
|
38
|
+
if tag_name == UNIT_TAG
|
39
|
+
proc.call(current_unit)
|
40
|
+
else
|
41
|
+
if tag_name_for(stack.last) == tag_name
|
42
|
+
stack.pop
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def placeholder(type)
|
48
|
+
placeholder = Placeholder.new(type)
|
49
|
+
current_unit.variants.last.elements << placeholder
|
50
|
+
stack.push(placeholder)
|
51
|
+
end
|
52
|
+
|
53
|
+
def begin_paired_tag(i)
|
54
|
+
begin_pair = BeginPair.new(i)
|
55
|
+
current_unit.variants.last.elements << begin_pair
|
56
|
+
stack.push(begin_pair)
|
57
|
+
end
|
58
|
+
|
59
|
+
def end_paired_tag(i)
|
60
|
+
end_pair = EndPair.new(i)
|
61
|
+
current_unit.variants.last.elements << end_pair
|
62
|
+
stack.push(end_pair)
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def tag_name_for(obj)
|
68
|
+
case obj
|
69
|
+
when Variant then VARIANT_TAG
|
70
|
+
when PropertyValue then PROPERTY_TAG
|
71
|
+
when Placeholder then PLACEHOLDER_TAG
|
72
|
+
when BeginPair then BEGIN_PAIRED_TAG
|
73
|
+
when EndPair then END_PAIRED_TAG
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
attr_reader :current_unit, :stack
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module TmxParser
|
6
|
+
|
7
|
+
class SaxDocument < Nokogiri::XML::SAX::Document
|
8
|
+
include TagNames
|
9
|
+
|
10
|
+
attr_reader :listener
|
11
|
+
|
12
|
+
def initialize(listener)
|
13
|
+
@listener = listener
|
14
|
+
@capture_stack = [false]
|
15
|
+
@text = ''
|
16
|
+
end
|
17
|
+
|
18
|
+
def start_element(name, attrs = [])
|
19
|
+
case name
|
20
|
+
when UNIT_TAG
|
21
|
+
listener.unit(
|
22
|
+
get_attr('tuid', attrs), get_attr('segtype', attrs)
|
23
|
+
)
|
24
|
+
when VARIANT_TAG
|
25
|
+
locale = get_attr('xml:lang', attrs)
|
26
|
+
listener.variant(locale)
|
27
|
+
when SEGMENT_TAG
|
28
|
+
capture_text
|
29
|
+
when PROPERTY_TAG
|
30
|
+
capture_text
|
31
|
+
listener.property(get_attr('type', attrs))
|
32
|
+
when BEGIN_PAIRED_TAG
|
33
|
+
capture_text
|
34
|
+
listener.begin_paired_tag(get_attr('i', attrs))
|
35
|
+
when END_PAIRED_TAG
|
36
|
+
capture_text
|
37
|
+
listener.end_paired_tag(get_attr('i', attrs))
|
38
|
+
when PLACEHOLDER_TAG
|
39
|
+
capture_text
|
40
|
+
listener.placeholder(get_attr('type', attrs))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def end_element(name)
|
45
|
+
@capture_stack.pop
|
46
|
+
send_text
|
47
|
+
listener.done(name)
|
48
|
+
end
|
49
|
+
|
50
|
+
def characters(str)
|
51
|
+
@text += str if @capture_stack.last
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def send_text
|
57
|
+
listener.text(@text) unless @text.empty?
|
58
|
+
@text = ''
|
59
|
+
end
|
60
|
+
|
61
|
+
def capture_text
|
62
|
+
send_text
|
63
|
+
@capture_stack.push(true)
|
64
|
+
end
|
65
|
+
|
66
|
+
def get_attr(name, attrs)
|
67
|
+
if found = attrs.find { |a| a.first == name }
|
68
|
+
found.last
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
data/lib/tmx-parser.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module TmxParser
|
6
|
+
autoload :Document, 'tmx-parser/document'
|
7
|
+
autoload :SaxDocument, 'tmx-parser/sax_document'
|
8
|
+
autoload :Listener, 'tmx-parser/listener'
|
9
|
+
autoload :TagNames, 'tmx-parser/tag_names'
|
10
|
+
autoload :Unit, 'tmx-parser/elements'
|
11
|
+
autoload :PropertyValue, 'tmx-parser/elements'
|
12
|
+
autoload :Variant, 'tmx-parser/elements'
|
13
|
+
autoload :Placeholder, 'tmx-parser/elements'
|
14
|
+
autoload :BeginPair, 'tmx-parser/elements'
|
15
|
+
autoload :EndPair, 'tmx-parser/elements'
|
16
|
+
|
17
|
+
def self.load(string_or_file_handle, encoding = Encoding.default_external)
|
18
|
+
Document.new(string_or_file_handle, encoding)
|
19
|
+
end
|
20
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,245 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe TmxParser do
|
6
|
+
let(:parser) { TmxParser }
|
7
|
+
let(:tuid) { '79b371014a8382a3b6efb86ec6ea97d9' }
|
8
|
+
|
9
|
+
def find_variant(locale, unit)
|
10
|
+
unit.variants.find { |v| v.locale == locale }
|
11
|
+
end
|
12
|
+
|
13
|
+
context 'with a basic tmx document' do
|
14
|
+
let(:document) do
|
15
|
+
%Q{
|
16
|
+
<tmx version="1.4">
|
17
|
+
<body>
|
18
|
+
<tu tuid="#{tuid}" segtype="block">
|
19
|
+
<prop type="x-segment-id">0</prop>
|
20
|
+
<prop type="x-some-property">six.hours</prop>
|
21
|
+
<tuv xml:lang="en-US"><seg>6 hours</seg></tuv>
|
22
|
+
<tuv xml:lang="de-DE"><seg>6 Stunden</seg></tuv>
|
23
|
+
</tu>
|
24
|
+
</body>
|
25
|
+
</tmx>
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
describe '#copy' do
|
30
|
+
it 'deep copies the tree' do
|
31
|
+
parser.load(document).to_a.tap do |units|
|
32
|
+
original_unit = units.first
|
33
|
+
unit_copy = original_unit.copy
|
34
|
+
|
35
|
+
expect(unit_copy.tuid).to eq(original_unit.tuid)
|
36
|
+
expect(unit_copy.segtype).to eq(original_unit.segtype)
|
37
|
+
expect(unit_copy.variants.size).to eq(original_unit.variants.size)
|
38
|
+
|
39
|
+
unit_copy.properties.each_pair.with_index do |(key, prop_value_copy), idx|
|
40
|
+
original_prop_value = original_unit.properties[key]
|
41
|
+
expect(original_prop_value.value).to eq(prop_value_copy.value)
|
42
|
+
end
|
43
|
+
|
44
|
+
unit_copy.variants.each_with_index do |variant_copy, v_idx|
|
45
|
+
original_variant = original_unit.variants[v_idx]
|
46
|
+
expect(variant_copy.locale).to eq(original_variant.locale)
|
47
|
+
|
48
|
+
variant_copy.elements.each_with_index do |element_copy, e_idx|
|
49
|
+
original_element = original_variant.elements[e_idx]
|
50
|
+
expect(element_copy).to be_a(original_element.class)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe '#==' do
|
58
|
+
it 'returns true if the objects (even copies) are equivalent' do
|
59
|
+
parser.load(document).to_a.tap do |units|
|
60
|
+
expect(units.first).to eq(units.first.copy)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'returns false if the objects are not equivalent' do
|
65
|
+
parser.load(document).to_a.tap do |units|
|
66
|
+
unit = units.first
|
67
|
+
unit_copy = unit.copy
|
68
|
+
|
69
|
+
unit_copy.tuid.replace('foobar')
|
70
|
+
expect(unit).to_not eq(unit_copy)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'identifies the tuid and segtype' do
|
76
|
+
parser.load(document).to_a.tap do |units|
|
77
|
+
expect(units.size).to eq(1)
|
78
|
+
|
79
|
+
units.first.tap do |unit|
|
80
|
+
expect(unit.tuid).to eq(tuid)
|
81
|
+
expect(unit.segtype).to eq('block')
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'identifies the correct variants' do
|
87
|
+
parser.load(document).to_a.first.tap do |unit|
|
88
|
+
expect(unit.variants.size).to eq(2)
|
89
|
+
expect(find_variant('en-US', unit).elements).to eq(['6 hours'])
|
90
|
+
expect(find_variant('de-DE', unit).elements).to eq(['6 Stunden'])
|
91
|
+
|
92
|
+
unit.variants.each do |variant|
|
93
|
+
expect(variant).to be_a(TmxParser::Variant)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'identifies properties' do
|
99
|
+
parser.load(document).to_a.first.tap do |unit|
|
100
|
+
expect(unit.properties.size).to eq(2)
|
101
|
+
expect(unit.properties).to include('x-segment-id')
|
102
|
+
expect(unit.properties).to include('x-some-property')
|
103
|
+
expect(unit.properties['x-segment-id'].value).to eq('0')
|
104
|
+
expect(unit.properties['x-some-property'].value).to eq('six.hours')
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
context 'with a tmx document that contains a property that makes jruby cry' do
|
110
|
+
# For some reason, jruby doesn't like square brackets in property values.
|
111
|
+
# See: https://github.com/sparklemotion/nokogiri/issues/1261
|
112
|
+
|
113
|
+
let(:document) do
|
114
|
+
%Q{
|
115
|
+
<tmx version="1.4">
|
116
|
+
<body>
|
117
|
+
<tu tuid="#{tuid}" segtype="block">
|
118
|
+
<prop type="x-segment-id">0</prop>
|
119
|
+
<prop type="x-some-property">en:#:daily-data:#:[3]:#:times</prop>
|
120
|
+
<tuv xml:lang="en-US"><seg>6 hours</seg></tuv>
|
121
|
+
<tuv xml:lang="de-DE"><seg>6 Stunden</seg></tuv>
|
122
|
+
</tu>
|
123
|
+
</body>
|
124
|
+
</tmx>
|
125
|
+
}
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'identifies the property correctly' do
|
129
|
+
parser.load(document).to_a.first.tap do |unit|
|
130
|
+
expect(unit.properties).to include('x-some-property')
|
131
|
+
expect(unit.properties['x-some-property']).to be_a(TmxParser::PropertyValue)
|
132
|
+
expect(unit.properties['x-some-property'].value).to eq(
|
133
|
+
'en:#:daily-data:#:[3]:#:times'
|
134
|
+
)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
context 'with a tmx document that contains placeholders' do
|
140
|
+
let(:document) do
|
141
|
+
%Q{
|
142
|
+
<tmx version="1.4">
|
143
|
+
<body>
|
144
|
+
<tu tuid="#{tuid}" segtype="block">
|
145
|
+
<prop type="x-segment-id">0</prop>
|
146
|
+
<tuv xml:lang="en-US">
|
147
|
+
<seg><ph type="x-placeholder">{0}</ph> sessions</seg>
|
148
|
+
</tuv>
|
149
|
+
<tuv xml:lang="de-DE">
|
150
|
+
<seg><ph type="x-placeholder">{0}</ph> Einheiten</seg>
|
151
|
+
</tuv>
|
152
|
+
</tu>
|
153
|
+
</body>
|
154
|
+
</tmx>
|
155
|
+
}
|
156
|
+
end
|
157
|
+
|
158
|
+
it 'identifies the placeholders' do
|
159
|
+
parser.load(document).to_a.first.tap do |unit|
|
160
|
+
expect(unit.variants.size).to eq(2)
|
161
|
+
|
162
|
+
find_variant('en-US', unit).tap do |en_variant|
|
163
|
+
expect(en_variant.elements.size).to eq(2)
|
164
|
+
|
165
|
+
en_variant.elements.first.tap do |first_element|
|
166
|
+
expect(first_element.type).to eq('x-placeholder')
|
167
|
+
expect(first_element.text).to eq('{0}')
|
168
|
+
end
|
169
|
+
|
170
|
+
en_variant.elements.last.tap do |last_element|
|
171
|
+
expect(last_element).to be_a(String)
|
172
|
+
expect(last_element).to eq(' sessions')
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
find_variant('de-DE', unit).tap do |en_variant|
|
177
|
+
expect(en_variant.elements.size).to eq(2)
|
178
|
+
|
179
|
+
en_variant.elements.first.tap do |first_element|
|
180
|
+
expect(first_element).to be_a(TmxParser::Placeholder)
|
181
|
+
expect(first_element.type).to eq('x-placeholder')
|
182
|
+
expect(first_element.text).to eq('{0}')
|
183
|
+
end
|
184
|
+
|
185
|
+
en_variant.elements.last.tap do |last_element|
|
186
|
+
expect(last_element).to be_a(String)
|
187
|
+
expect(last_element).to eq(' Einheiten')
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
context 'with a tmx document that contains paired tags' do
|
195
|
+
let(:document) do
|
196
|
+
%Q{
|
197
|
+
<tmx version="1.4">
|
198
|
+
<body>
|
199
|
+
<tu tuid="#{tuid}" segtype="block">
|
200
|
+
<prop type="x-segment-id">0</prop>
|
201
|
+
<tuv xml:lang="en-US">
|
202
|
+
<seg>Build your healthy habit of daily training with <bpt i="3"><strong></bpt>email training reminders.<ept i="3"></strong></ept></seg>
|
203
|
+
</tuv>
|
204
|
+
<tuv xml:lang="de-DE">
|
205
|
+
<seg><bpt i="3"><strong></bpt>Mit Erinnerungen per E-Mail<ept i="3"></strong></ept> können Sie das tägliche Training zu einer schönen Angewohnheit werden lassen.</seg>
|
206
|
+
</tuv>
|
207
|
+
</tu>
|
208
|
+
</body>
|
209
|
+
</tmx>
|
210
|
+
}
|
211
|
+
end
|
212
|
+
|
213
|
+
it 'identifies the tags' do
|
214
|
+
parser.load(document).to_a.first.tap do |unit|
|
215
|
+
expect(unit.variants.size).to eq(2)
|
216
|
+
|
217
|
+
find_variant('en-US', unit).tap do |en_variant|
|
218
|
+
expect(en_variant.elements.size).to eq(4)
|
219
|
+
|
220
|
+
en_variant.elements[0].tap do |element|
|
221
|
+
expect(element).to be_a(String)
|
222
|
+
expect(element).to eq('Build your healthy habit of daily training with ')
|
223
|
+
end
|
224
|
+
|
225
|
+
en_variant.elements[1].tap do |element|
|
226
|
+
expect(element).to be_a(TmxParser::BeginPair)
|
227
|
+
expect(element.i).to eq('3')
|
228
|
+
expect(element.text).to eq('<strong>')
|
229
|
+
end
|
230
|
+
|
231
|
+
en_variant.elements[2].tap do |element|
|
232
|
+
expect(element).to be_a(String)
|
233
|
+
expect(element).to eq('email training reminders.')
|
234
|
+
end
|
235
|
+
|
236
|
+
en_variant.elements[3].tap do |element|
|
237
|
+
expect(element).to be_a(TmxParser::EndPair)
|
238
|
+
expect(element.i).to eq('3')
|
239
|
+
expect(element.text).to eq('</strong>')
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
data/tmx-parser.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), 'lib')
|
4
|
+
require 'tmx-parser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "tmx-parser-2018"
|
8
|
+
s.version = ::TmxParser::VERSION
|
9
|
+
s.authors = ["Cameron Dutro", "Michiel de Mare"]
|
10
|
+
s.email = ["camertron@gmail.com", "michiel@tolq.com"]
|
11
|
+
s.homepage = "http://github.com/mdemare"
|
12
|
+
|
13
|
+
s.description = s.summary = "Parser for the Translation Memory eXchange (.tmx) file format."
|
14
|
+
|
15
|
+
s.platform = Gem::Platform::RUBY
|
16
|
+
s.has_rdoc = true
|
17
|
+
|
18
|
+
s.require_path = 'lib'
|
19
|
+
s.files = Dir["{lib,spec}/**/*", "Gemfile", "History.txt", "README.md", "Rakefile", "tmx-parser.gemspec"]
|
20
|
+
|
21
|
+
s.add_dependency 'nokogiri', '~> 1.8'
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tmx-parser-2018
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Cameron Dutro
|
8
|
+
- Michiel de Mare
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2018-02-07 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.8'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '1.8'
|
28
|
+
description: Parser for the Translation Memory eXchange (.tmx) file format.
|
29
|
+
email:
|
30
|
+
- camertron@gmail.com
|
31
|
+
- michiel@tolq.com
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- Gemfile
|
37
|
+
- History.txt
|
38
|
+
- README.md
|
39
|
+
- Rakefile
|
40
|
+
- lib/tmx-parser.rb
|
41
|
+
- lib/tmx-parser/document.rb
|
42
|
+
- lib/tmx-parser/elements.rb
|
43
|
+
- lib/tmx-parser/listener.rb
|
44
|
+
- lib/tmx-parser/sax_document.rb
|
45
|
+
- lib/tmx-parser/tag_names.rb
|
46
|
+
- lib/tmx-parser/version.rb
|
47
|
+
- spec/spec_helper.rb
|
48
|
+
- spec/tmx-parser_spec.rb
|
49
|
+
- tmx-parser.gemspec
|
50
|
+
homepage: http://github.com/mdemare
|
51
|
+
licenses: []
|
52
|
+
metadata: {}
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options: []
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
requirements: []
|
68
|
+
rubyforge_project:
|
69
|
+
rubygems_version: 2.7.4
|
70
|
+
signing_key:
|
71
|
+
specification_version: 4
|
72
|
+
summary: Parser for the Translation Memory eXchange (.tmx) file format.
|
73
|
+
test_files: []
|