translatomatic 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.translatomatic/config.yml +18 -0
- data/.travis.yml +33 -33
- data/Gemfile +6 -4
- data/README.de.md +53 -18
- data/README.es.md +55 -20
- data/README.fr.md +54 -19
- data/README.it.md +58 -23
- data/README.ja.md +54 -19
- data/README.ko.md +58 -23
- data/README.md +167 -141
- data/README.ms.md +51 -16
- data/README.pt.md +58 -23
- data/README.ru.md +53 -18
- data/README.sv.md +53 -18
- data/README.zh.md +53 -18
- data/bin/translatomatic +6 -6
- data/bin/travis +24 -26
- data/config/locales/translatomatic/de.yml +22 -11
- data/config/locales/translatomatic/en.yml +21 -12
- data/config/locales/translatomatic/es.yml +22 -11
- data/config/locales/translatomatic/fr.yml +22 -12
- data/config/locales/translatomatic/it.yml +22 -11
- data/config/locales/translatomatic/ja.yml +22 -11
- data/config/locales/translatomatic/ko.yml +22 -11
- data/config/locales/translatomatic/ms.yml +22 -11
- data/config/locales/translatomatic/pt.yml +22 -11
- data/config/locales/translatomatic/ru.yml +22 -11
- data/config/locales/translatomatic/sv.yml +22 -11
- data/config/locales/translatomatic/zh.yml +22 -11
- data/db/migrate/201712170000_initial.rb +25 -25
- data/lib/translatomatic/cli/base.rb +81 -73
- data/lib/translatomatic/cli/config.rb +110 -81
- data/lib/translatomatic/cli/main.rb +85 -72
- data/lib/translatomatic/cli/translate.rb +141 -106
- data/lib/translatomatic/cli.rb +8 -8
- data/lib/translatomatic/config.rb +302 -155
- data/lib/translatomatic/converter.rb +28 -260
- data/lib/translatomatic/database.rb +134 -134
- data/lib/translatomatic/define_options.rb +22 -0
- data/lib/translatomatic/escaped_unicode.rb +0 -0
- data/lib/translatomatic/extractor/base.rb +16 -16
- data/lib/translatomatic/extractor/ruby.rb +6 -6
- data/lib/translatomatic/extractor.rb +5 -5
- data/lib/translatomatic/file_translator.rb +269 -0
- data/lib/translatomatic/http_request.rb +162 -162
- data/lib/translatomatic/locale.rb +76 -76
- data/lib/translatomatic/logger.rb +23 -23
- data/lib/translatomatic/model/locale.rb +25 -25
- data/lib/translatomatic/model/text.rb +19 -19
- data/lib/translatomatic/model.rb +1 -1
- data/lib/translatomatic/option.rb +37 -41
- data/lib/translatomatic/progress_updater.rb +13 -13
- data/lib/translatomatic/resource_file/base.rb +269 -192
- data/lib/translatomatic/resource_file/csv.rb +37 -0
- data/lib/translatomatic/resource_file/html.rb +54 -47
- data/lib/translatomatic/resource_file/markdown.rb +50 -55
- data/lib/translatomatic/resource_file/plist.rb +153 -19
- data/lib/translatomatic/resource_file/po.rb +107 -0
- data/lib/translatomatic/resource_file/properties.rb +91 -90
- data/lib/translatomatic/resource_file/resw.rb +50 -30
- data/lib/translatomatic/resource_file/subtitle.rb +75 -0
- data/lib/translatomatic/resource_file/text.rb +24 -30
- data/lib/translatomatic/resource_file/xcode_strings.rb +75 -80
- data/lib/translatomatic/resource_file/xml.rb +98 -91
- data/lib/translatomatic/resource_file/yaml.rb +94 -116
- data/lib/translatomatic/resource_file.rb +87 -78
- data/lib/translatomatic/string.rb +188 -188
- data/lib/translatomatic/tmx/document.rb +99 -99
- data/lib/translatomatic/translation_result.rb +63 -63
- data/lib/translatomatic/{converter_stats.rb → translation_stats.rb} +17 -17
- data/lib/translatomatic/translator/base.rb +1 -1
- data/lib/translatomatic/translator/google.rb +2 -0
- data/lib/translatomatic/translator.rb +10 -2
- data/lib/translatomatic/util.rb +45 -45
- data/lib/translatomatic/version.rb +7 -7
- data/lib/translatomatic.rb +52 -49
- data/translatomatic.gemspec +3 -2
- metadata +25 -5
@@ -1,188 +1,188 @@
|
|
1
|
-
module Translatomatic
|
2
|
-
# A string object with an associated locale.
|
3
|
-
class String
|
4
|
-
|
5
|
-
# @return [String] The string
|
6
|
-
attr_reader :value
|
7
|
-
|
8
|
-
# @return [Translatomatic::Locale] The string's locale
|
9
|
-
attr_reader :locale
|
10
|
-
|
11
|
-
# @return [Translatomatic::String] If this string is a substring of
|
12
|
-
# another string, returns the original string. Otherwise, returns nil.
|
13
|
-
attr_reader :parent
|
14
|
-
|
15
|
-
# @return [Number] If this string is a substring of another string,
|
16
|
-
# returns the starting offset of this string in the original.
|
17
|
-
attr_reader :offset
|
18
|
-
|
19
|
-
def initialize(value, locale, options = {})
|
20
|
-
@value = value.to_s || ''
|
21
|
-
@locale = Translatomatic::Locale.parse(locale)
|
22
|
-
@offset = options[:offset] || 0
|
23
|
-
@parent = options[:parent]
|
24
|
-
end
|
25
|
-
|
26
|
-
# @return [String] The value of the string
|
27
|
-
def to_s
|
28
|
-
@value
|
29
|
-
end
|
30
|
-
|
31
|
-
# @return [Number] The length of the string
|
32
|
-
def length
|
33
|
-
@value.length
|
34
|
-
end
|
35
|
-
|
36
|
-
# @return [boolean] True if the string is empty
|
37
|
-
def empty?
|
38
|
-
@value.empty?
|
39
|
-
end
|
40
|
-
|
41
|
-
# Invokes value.match
|
42
|
-
# @param pattern [Regexp,String] The regex pattern to match
|
43
|
-
# @return [MatchData] Object describing the match, or nil if no match
|
44
|
-
def match(pattern)
|
45
|
-
@value.match(pattern)
|
46
|
-
end
|
47
|
-
|
48
|
-
# @return [boolean] true if this string is a substring of another string
|
49
|
-
def substring?
|
50
|
-
@parent ? true : false
|
51
|
-
end
|
52
|
-
|
53
|
-
# @return [Symbol] The type of string, corresponding to TMX segtype.
|
54
|
-
# @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
|
55
|
-
def type
|
56
|
-
if sentences.length >= 2
|
57
|
-
:paragraph
|
58
|
-
else
|
59
|
-
script = script_data
|
60
|
-
@value.strip.match(/#{script.delimiter}\s*$/) ? :sentence : :phrase
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
# Find all sentences in the string
|
65
|
-
# @return [Array<Translatomatic::String] List of sentences
|
66
|
-
def sentences
|
67
|
-
substrings(sentence_regex)
|
68
|
-
end
|
69
|
-
|
70
|
-
# Find all substrings matching the given regex
|
71
|
-
# @return [Array<Translatomatic::String] List of substrings
|
72
|
-
def substrings(regex)
|
73
|
-
matches = matches(@value, regex)
|
74
|
-
strings = []
|
75
|
-
matches.each do |match|
|
76
|
-
substring = match.to_s
|
77
|
-
# find leading and trailing whitespace
|
78
|
-
next if substring.length == 0
|
79
|
-
|
80
|
-
parts = substring.match(/\A(\s*)(.*?)(\s*)\z/m).to_a
|
81
|
-
value = parts[2]
|
82
|
-
offset = match.offset(0)[0]
|
83
|
-
offset += parts[1].length # leading whitespace
|
84
|
-
strings << self.class.new(value, locale, offset: offset, parent: self)
|
85
|
-
end
|
86
|
-
|
87
|
-
# return [self] if there's only one substring and it's equal to self
|
88
|
-
strings.length == 1 && strings[0].eql?(self) ? [self] : strings
|
89
|
-
end
|
90
|
-
|
91
|
-
# @return [boolean] true if other is a {Translatomatic::String} with
|
92
|
-
# the same value and locale.
|
93
|
-
def eql?(other)
|
94
|
-
other.kind_of?(Translatomatic::String) && other.hash == hash
|
95
|
-
end
|
96
|
-
|
97
|
-
# (see #eql?)
|
98
|
-
def ==(other)
|
99
|
-
eql?(other)
|
100
|
-
end
|
101
|
-
|
102
|
-
# @!visibility private
|
103
|
-
def hash
|
104
|
-
[value, locale].hash
|
105
|
-
end
|
106
|
-
|
107
|
-
private
|
108
|
-
|
109
|
-
# @!visibility private
|
110
|
-
class Script
|
111
|
-
attr_reader :language
|
112
|
-
attr_reader :delimiter # sentence delimiter
|
113
|
-
attr_reader :trailing_space # delimiter requires trailing space or eol
|
114
|
-
attr_reader :left_to_right # script direction
|
115
|
-
|
116
|
-
def initialize(language:, delimiter:, trailing_space:, direction:)
|
117
|
-
@language = language
|
118
|
-
@delimiter = delimiter
|
119
|
-
@trailing_space = trailing_space
|
120
|
-
@left_to_right = direction == :ltr
|
121
|
-
raise "invalid direction" unless [:ltr, :rtl].include?(direction)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
SCRIPT_DATA = [
|
126
|
-
# [language, delimiter, trailing space, direction]
|
127
|
-
# japanese, no space after
|
128
|
-
["ja", "\u3002", false, :ltr],
|
129
|
-
# chinese, no space after
|
130
|
-
["zh", "\u3002", false, :ltr], # can be written any direction
|
131
|
-
# armenian, space after
|
132
|
-
["hy", ":", true, :ltr],
|
133
|
-
# hindi, space after
|
134
|
-
["hi", "।", true, :ltr],
|
135
|
-
# urdu, space after, right to left
|
136
|
-
["ur", "\u06d4", true, :rtl],
|
137
|
-
# thai, spaces used to separate sentences
|
138
|
-
["th", "\\s", false, :ltr],
|
139
|
-
# arabic, right to left
|
140
|
-
["ar", "\\.", true, :rtl],
|
141
|
-
# hebrew, right to left
|
142
|
-
["he", "\\.", true, :rtl],
|
143
|
-
# all other languages
|
144
|
-
["default", "\\.", true, :ltr],
|
145
|
-
]
|
146
|
-
|
147
|
-
class << self
|
148
|
-
attr_reader :script_data
|
149
|
-
end
|
150
|
-
|
151
|
-
begin
|
152
|
-
script_data = {}
|
153
|
-
SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
|
154
|
-
script = Script.new(language: lang, delimiter: delimiter,
|
155
|
-
trailing_space: trailing, direction: ltr)
|
156
|
-
script_data[lang] = script
|
157
|
-
end
|
158
|
-
@script_data = script_data
|
159
|
-
end
|
160
|
-
|
161
|
-
def matches(s, re)
|
162
|
-
start_at = 0
|
163
|
-
matches = []
|
164
|
-
while(m = s.match(re, start_at))
|
165
|
-
break if m.to_s.empty?
|
166
|
-
matches.push(m)
|
167
|
-
start_at = m.end(0)
|
168
|
-
end
|
169
|
-
matches
|
170
|
-
end
|
171
|
-
|
172
|
-
def sentence_regex
|
173
|
-
script = script_data
|
174
|
-
if script.trailing_space
|
175
|
-
regex = /.*?(?:#{script.delimiter}\s+|\z)/m
|
176
|
-
else
|
177
|
-
# no trailing space after delimiter
|
178
|
-
regex = /.*?(?:#{script.delimiter}|\z)/m
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
def script_data
|
183
|
-
data = self.class.script_data
|
184
|
-
data[locale.language] || data["default"]
|
185
|
-
end
|
186
|
-
|
187
|
-
end
|
188
|
-
end
|
1
|
+
module Translatomatic
|
2
|
+
# A string object with an associated locale.
|
3
|
+
class String
|
4
|
+
|
5
|
+
# @return [String] The string
|
6
|
+
attr_reader :value
|
7
|
+
|
8
|
+
# @return [Translatomatic::Locale] The string's locale
|
9
|
+
attr_reader :locale
|
10
|
+
|
11
|
+
# @return [Translatomatic::String] If this string is a substring of
|
12
|
+
# another string, returns the original string. Otherwise, returns nil.
|
13
|
+
attr_reader :parent
|
14
|
+
|
15
|
+
# @return [Number] If this string is a substring of another string,
|
16
|
+
# returns the starting offset of this string in the original.
|
17
|
+
attr_reader :offset
|
18
|
+
|
19
|
+
def initialize(value, locale, options = {})
|
20
|
+
@value = value.to_s || ''
|
21
|
+
@locale = Translatomatic::Locale.parse(locale)
|
22
|
+
@offset = options[:offset] || 0
|
23
|
+
@parent = options[:parent]
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [String] The value of the string
|
27
|
+
def to_s
|
28
|
+
@value
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Number] The length of the string
|
32
|
+
def length
|
33
|
+
@value.length
|
34
|
+
end
|
35
|
+
|
36
|
+
# @return [boolean] True if the string is empty
|
37
|
+
def empty?
|
38
|
+
@value.empty?
|
39
|
+
end
|
40
|
+
|
41
|
+
# Invokes value.match
|
42
|
+
# @param pattern [Regexp,String] The regex pattern to match
|
43
|
+
# @return [MatchData] Object describing the match, or nil if no match
|
44
|
+
def match(pattern)
|
45
|
+
@value.match(pattern)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [boolean] true if this string is a substring of another string
|
49
|
+
def substring?
|
50
|
+
@parent ? true : false
|
51
|
+
end
|
52
|
+
|
53
|
+
# @return [Symbol] The type of string, corresponding to TMX segtype.
|
54
|
+
# @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
|
55
|
+
def type
|
56
|
+
if sentences.length >= 2
|
57
|
+
:paragraph
|
58
|
+
else
|
59
|
+
script = script_data
|
60
|
+
@value.strip.match(/#{script.delimiter}\s*$/) ? :sentence : :phrase
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Find all sentences in the string
|
65
|
+
# @return [Array<Translatomatic::String] List of sentences
|
66
|
+
def sentences
|
67
|
+
substrings(sentence_regex)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Find all substrings matching the given regex
|
71
|
+
# @return [Array<Translatomatic::String] List of substrings
|
72
|
+
def substrings(regex)
|
73
|
+
matches = matches(@value, regex)
|
74
|
+
strings = []
|
75
|
+
matches.each do |match|
|
76
|
+
substring = match.to_s
|
77
|
+
# find leading and trailing whitespace
|
78
|
+
next if substring.length == 0
|
79
|
+
|
80
|
+
parts = substring.match(/\A(\s*)(.*?)(\s*)\z/m).to_a
|
81
|
+
value = parts[2]
|
82
|
+
offset = match.offset(0)[0]
|
83
|
+
offset += parts[1].length # leading whitespace
|
84
|
+
strings << self.class.new(value, locale, offset: offset, parent: self)
|
85
|
+
end
|
86
|
+
|
87
|
+
# return [self] if there's only one substring and it's equal to self
|
88
|
+
strings.length == 1 && strings[0].eql?(self) ? [self] : strings
|
89
|
+
end
|
90
|
+
|
91
|
+
# @return [boolean] true if other is a {Translatomatic::String} with
|
92
|
+
# the same value and locale.
|
93
|
+
def eql?(other)
|
94
|
+
other.kind_of?(Translatomatic::String) && other.hash == hash
|
95
|
+
end
|
96
|
+
|
97
|
+
# (see #eql?)
|
98
|
+
def ==(other)
|
99
|
+
eql?(other)
|
100
|
+
end
|
101
|
+
|
102
|
+
# @!visibility private
|
103
|
+
def hash
|
104
|
+
[value, locale].hash
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
# @!visibility private
|
110
|
+
class Script
|
111
|
+
attr_reader :language
|
112
|
+
attr_reader :delimiter # sentence delimiter
|
113
|
+
attr_reader :trailing_space # delimiter requires trailing space or eol
|
114
|
+
attr_reader :left_to_right # script direction
|
115
|
+
|
116
|
+
def initialize(language:, delimiter:, trailing_space:, direction:)
|
117
|
+
@language = language
|
118
|
+
@delimiter = delimiter
|
119
|
+
@trailing_space = trailing_space
|
120
|
+
@left_to_right = direction == :ltr
|
121
|
+
raise "invalid direction" unless [:ltr, :rtl].include?(direction)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
SCRIPT_DATA = [
|
126
|
+
# [language, delimiter, trailing space, direction]
|
127
|
+
# japanese, no space after
|
128
|
+
["ja", "\u3002", false, :ltr],
|
129
|
+
# chinese, no space after
|
130
|
+
["zh", "\u3002", false, :ltr], # can be written any direction
|
131
|
+
# armenian, space after
|
132
|
+
["hy", ":", true, :ltr],
|
133
|
+
# hindi, space after
|
134
|
+
["hi", "।", true, :ltr],
|
135
|
+
# urdu, space after, right to left
|
136
|
+
["ur", "\u06d4", true, :rtl],
|
137
|
+
# thai, spaces used to separate sentences
|
138
|
+
["th", "\\s", false, :ltr],
|
139
|
+
# arabic, right to left
|
140
|
+
["ar", "\\.", true, :rtl],
|
141
|
+
# hebrew, right to left
|
142
|
+
["he", "\\.", true, :rtl],
|
143
|
+
# all other languages
|
144
|
+
["default", "\\.", true, :ltr],
|
145
|
+
]
|
146
|
+
|
147
|
+
class << self
|
148
|
+
attr_reader :script_data
|
149
|
+
end
|
150
|
+
|
151
|
+
begin
|
152
|
+
script_data = {}
|
153
|
+
SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
|
154
|
+
script = Script.new(language: lang, delimiter: delimiter,
|
155
|
+
trailing_space: trailing, direction: ltr)
|
156
|
+
script_data[lang] = script
|
157
|
+
end
|
158
|
+
@script_data = script_data
|
159
|
+
end
|
160
|
+
|
161
|
+
def matches(s, re)
|
162
|
+
start_at = 0
|
163
|
+
matches = []
|
164
|
+
while(m = s.match(re, start_at))
|
165
|
+
break if m.to_s.empty?
|
166
|
+
matches.push(m)
|
167
|
+
start_at = m.end(0)
|
168
|
+
end
|
169
|
+
matches
|
170
|
+
end
|
171
|
+
|
172
|
+
def sentence_regex
|
173
|
+
script = script_data
|
174
|
+
if script.trailing_space
|
175
|
+
regex = /.*?(?:#{script.delimiter}\s+|\z)/m
|
176
|
+
else
|
177
|
+
# no trailing space after delimiter
|
178
|
+
regex = /.*?(?:#{script.delimiter}|\z)/m
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def script_data
|
183
|
+
data = self.class.script_data
|
184
|
+
data[locale.language] || data["default"]
|
185
|
+
end
|
186
|
+
|
187
|
+
end
|
188
|
+
end
|
@@ -1,99 +1,99 @@
|
|
1
|
-
module Translatomatic::TMX
|
2
|
-
# Translation Memory Exchange document
|
3
|
-
class Document
|
4
|
-
|
5
|
-
# Create a new instance
|
6
|
-
# @param units [Array<TranslationUnit>] A list of translation units
|
7
|
-
# @param source_locale [Locale] Source locale
|
8
|
-
# @return [Translatomatic::TMX::Document] a new TMX object
|
9
|
-
def initialize(units, source_locale)
|
10
|
-
units = [units] unless units.kind_of?(Array)
|
11
|
-
@units = units
|
12
|
-
@source_locale = source_locale
|
13
|
-
end
|
14
|
-
|
15
|
-
# @return [String] An XML string
|
16
|
-
def to_xml(options = {})
|
17
|
-
builder = Nokogiri::XML::Builder.new do |xml|
|
18
|
-
dtd = options[:dtd] || TMX_DTD
|
19
|
-
xml.doc.create_internal_subset('tmx', nil, dtd)
|
20
|
-
xml.tmx(version: "1.4") do
|
21
|
-
xml.header(creationtool: "Translatomatic",
|
22
|
-
creationtoolversion: Translatomatic::VERSION,
|
23
|
-
datatype: "PlainText",
|
24
|
-
segtype: "phrase", # default segtype
|
25
|
-
adminlang: @source_locale.to_s,
|
26
|
-
srclang: @source_locale.to_s,
|
27
|
-
"o-tmf": DEFAULT_OTMF
|
28
|
-
)
|
29
|
-
xml.body { tmx_body(xml) }
|
30
|
-
end
|
31
|
-
end
|
32
|
-
builder.to_xml
|
33
|
-
end
|
34
|
-
|
35
|
-
# Create a TMX document from the given converter
|
36
|
-
# @param texts [Array<Translatomatic::Model::Text>] List of texts
|
37
|
-
# @return [Translatomatic::TMX::Document] TMX document
|
38
|
-
def self.from_texts(texts)
|
39
|
-
# group texts by from_text_id to create units
|
40
|
-
# source_locale: use from_text.locale
|
41
|
-
# origin: use text.translator
|
42
|
-
sources = texts.select { |i| i.from_text.nil? }
|
43
|
-
source_locales = sources.collect { |i| i.locale }.uniq
|
44
|
-
raise t("tmx.multiple_locales") if source_locales.length > 1
|
45
|
-
units = units_from_texts(texts)
|
46
|
-
|
47
|
-
return new(units, source_locales[0])
|
48
|
-
end
|
49
|
-
|
50
|
-
def self.valid?(xml)
|
51
|
-
options = Nokogiri::XML::ParseOptions::DTDVALID
|
52
|
-
doc = Nokogiri::XML::Document.parse(xml, nil, nil, options)
|
53
|
-
doc.internal_subset.validate(doc)
|
54
|
-
end
|
55
|
-
|
56
|
-
private
|
57
|
-
|
58
|
-
class << self
|
59
|
-
include Translatomatic::Util
|
60
|
-
end
|
61
|
-
|
62
|
-
TMX_DTD = "http://www.ttt.org/oscarstandards/tmx/tmx14.dtd"
|
63
|
-
DEFAULT_OTMF = "Translatomatic"
|
64
|
-
|
65
|
-
def tmx_body(xml)
|
66
|
-
@units.each do |unit|
|
67
|
-
xml.tu("segtype": unit.strings[0].type) do
|
68
|
-
unit.strings.each do |string|
|
69
|
-
xml.tuv("xml:lang": string.locale.to_s) do
|
70
|
-
xml.seg string.value
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
# @return [Array<Translatomatic::TMX::TranslationUnit] translation unit list
|
78
|
-
def self.units_from_texts(texts)
|
79
|
-
# group texts by from_text_id
|
80
|
-
texts_by_from_id = {}
|
81
|
-
texts.each do |text|
|
82
|
-
id = text.from_text_id || text.id
|
83
|
-
list = (texts_by_from_id[id] ||= [])
|
84
|
-
list << text
|
85
|
-
end
|
86
|
-
|
87
|
-
# create list of Translation Units
|
88
|
-
texts_by_from_id.values.collect do |list|
|
89
|
-
strings = list.uniq.collect { |i| string(i.value, i.locale) }
|
90
|
-
tmx_unit(strings)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
def self.tmx_unit(strings)
|
95
|
-
Translatomatic::TMX::TranslationUnit.new(strings)
|
96
|
-
end
|
97
|
-
|
98
|
-
end # class
|
99
|
-
end # module
|
1
|
+
module Translatomatic::TMX
|
2
|
+
# Translation Memory Exchange document
|
3
|
+
class Document
|
4
|
+
|
5
|
+
# Create a new instance
|
6
|
+
# @param units [Array<TranslationUnit>] A list of translation units
|
7
|
+
# @param source_locale [Locale] Source locale
|
8
|
+
# @return [Translatomatic::TMX::Document] a new TMX object
|
9
|
+
def initialize(units, source_locale)
|
10
|
+
units = [units] unless units.kind_of?(Array)
|
11
|
+
@units = units
|
12
|
+
@source_locale = source_locale
|
13
|
+
end
|
14
|
+
|
15
|
+
# @return [String] An XML string
|
16
|
+
def to_xml(options = {})
|
17
|
+
builder = Nokogiri::XML::Builder.new do |xml|
|
18
|
+
dtd = options[:dtd] || TMX_DTD
|
19
|
+
xml.doc.create_internal_subset('tmx', nil, dtd)
|
20
|
+
xml.tmx(version: "1.4") do
|
21
|
+
xml.header(creationtool: "Translatomatic",
|
22
|
+
creationtoolversion: Translatomatic::VERSION,
|
23
|
+
datatype: "PlainText",
|
24
|
+
segtype: "phrase", # default segtype
|
25
|
+
adminlang: @source_locale.to_s,
|
26
|
+
srclang: @source_locale.to_s,
|
27
|
+
"o-tmf": DEFAULT_OTMF
|
28
|
+
)
|
29
|
+
xml.body { tmx_body(xml) }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
builder.to_xml
|
33
|
+
end
|
34
|
+
|
35
|
+
# Create a TMX document from the given converter
|
36
|
+
# @param texts [Array<Translatomatic::Model::Text>] List of texts
|
37
|
+
# @return [Translatomatic::TMX::Document] TMX document
|
38
|
+
def self.from_texts(texts)
|
39
|
+
# group texts by from_text_id to create units
|
40
|
+
# source_locale: use from_text.locale
|
41
|
+
# origin: use text.translator
|
42
|
+
sources = texts.select { |i| i.from_text.nil? }
|
43
|
+
source_locales = sources.collect { |i| i.locale }.uniq
|
44
|
+
raise t("tmx.multiple_locales") if source_locales.length > 1
|
45
|
+
units = units_from_texts(texts)
|
46
|
+
|
47
|
+
return new(units, source_locales[0])
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.valid?(xml)
|
51
|
+
options = Nokogiri::XML::ParseOptions::DTDVALID
|
52
|
+
doc = Nokogiri::XML::Document.parse(xml, nil, nil, options)
|
53
|
+
doc.internal_subset.validate(doc)
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
class << self
|
59
|
+
include Translatomatic::Util
|
60
|
+
end
|
61
|
+
|
62
|
+
TMX_DTD = "http://www.ttt.org/oscarstandards/tmx/tmx14.dtd"
|
63
|
+
DEFAULT_OTMF = "Translatomatic"
|
64
|
+
|
65
|
+
def tmx_body(xml)
|
66
|
+
@units.each do |unit|
|
67
|
+
xml.tu("segtype": unit.strings[0].type) do
|
68
|
+
unit.strings.each do |string|
|
69
|
+
xml.tuv("xml:lang": string.locale.to_s) do
|
70
|
+
xml.seg string.value
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# @return [Array<Translatomatic::TMX::TranslationUnit] translation unit list
|
78
|
+
def self.units_from_texts(texts)
|
79
|
+
# group texts by from_text_id
|
80
|
+
texts_by_from_id = {}
|
81
|
+
texts.each do |text|
|
82
|
+
id = text.from_text_id || text.id
|
83
|
+
list = (texts_by_from_id[id] ||= [])
|
84
|
+
list << text
|
85
|
+
end
|
86
|
+
|
87
|
+
# create list of Translation Units
|
88
|
+
texts_by_from_id.values.collect do |list|
|
89
|
+
strings = list.uniq.collect { |i| string(i.value, i.locale) }
|
90
|
+
tmx_unit(strings)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.tmx_unit(strings)
|
95
|
+
Translatomatic::TMX::TranslationUnit.new(strings)
|
96
|
+
end
|
97
|
+
|
98
|
+
end # class
|
99
|
+
end # module
|