translatomatic 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.translatomatic/config.yml +18 -0
- data/.travis.yml +33 -33
- data/Gemfile +6 -4
- data/README.de.md +53 -18
- data/README.es.md +55 -20
- data/README.fr.md +54 -19
- data/README.it.md +58 -23
- data/README.ja.md +54 -19
- data/README.ko.md +58 -23
- data/README.md +167 -141
- data/README.ms.md +51 -16
- data/README.pt.md +58 -23
- data/README.ru.md +53 -18
- data/README.sv.md +53 -18
- data/README.zh.md +53 -18
- data/bin/translatomatic +6 -6
- data/bin/travis +24 -26
- data/config/locales/translatomatic/de.yml +22 -11
- data/config/locales/translatomatic/en.yml +21 -12
- data/config/locales/translatomatic/es.yml +22 -11
- data/config/locales/translatomatic/fr.yml +22 -12
- data/config/locales/translatomatic/it.yml +22 -11
- data/config/locales/translatomatic/ja.yml +22 -11
- data/config/locales/translatomatic/ko.yml +22 -11
- data/config/locales/translatomatic/ms.yml +22 -11
- data/config/locales/translatomatic/pt.yml +22 -11
- data/config/locales/translatomatic/ru.yml +22 -11
- data/config/locales/translatomatic/sv.yml +22 -11
- data/config/locales/translatomatic/zh.yml +22 -11
- data/db/migrate/201712170000_initial.rb +25 -25
- data/lib/translatomatic/cli/base.rb +81 -73
- data/lib/translatomatic/cli/config.rb +110 -81
- data/lib/translatomatic/cli/main.rb +85 -72
- data/lib/translatomatic/cli/translate.rb +141 -106
- data/lib/translatomatic/cli.rb +8 -8
- data/lib/translatomatic/config.rb +302 -155
- data/lib/translatomatic/converter.rb +28 -260
- data/lib/translatomatic/database.rb +134 -134
- data/lib/translatomatic/define_options.rb +22 -0
- data/lib/translatomatic/escaped_unicode.rb +0 -0
- data/lib/translatomatic/extractor/base.rb +16 -16
- data/lib/translatomatic/extractor/ruby.rb +6 -6
- data/lib/translatomatic/extractor.rb +5 -5
- data/lib/translatomatic/file_translator.rb +269 -0
- data/lib/translatomatic/http_request.rb +162 -162
- data/lib/translatomatic/locale.rb +76 -76
- data/lib/translatomatic/logger.rb +23 -23
- data/lib/translatomatic/model/locale.rb +25 -25
- data/lib/translatomatic/model/text.rb +19 -19
- data/lib/translatomatic/model.rb +1 -1
- data/lib/translatomatic/option.rb +37 -41
- data/lib/translatomatic/progress_updater.rb +13 -13
- data/lib/translatomatic/resource_file/base.rb +269 -192
- data/lib/translatomatic/resource_file/csv.rb +37 -0
- data/lib/translatomatic/resource_file/html.rb +54 -47
- data/lib/translatomatic/resource_file/markdown.rb +50 -55
- data/lib/translatomatic/resource_file/plist.rb +153 -19
- data/lib/translatomatic/resource_file/po.rb +107 -0
- data/lib/translatomatic/resource_file/properties.rb +91 -90
- data/lib/translatomatic/resource_file/resw.rb +50 -30
- data/lib/translatomatic/resource_file/subtitle.rb +75 -0
- data/lib/translatomatic/resource_file/text.rb +24 -30
- data/lib/translatomatic/resource_file/xcode_strings.rb +75 -80
- data/lib/translatomatic/resource_file/xml.rb +98 -91
- data/lib/translatomatic/resource_file/yaml.rb +94 -116
- data/lib/translatomatic/resource_file.rb +87 -78
- data/lib/translatomatic/string.rb +188 -188
- data/lib/translatomatic/tmx/document.rb +99 -99
- data/lib/translatomatic/translation_result.rb +63 -63
- data/lib/translatomatic/{converter_stats.rb → translation_stats.rb} +17 -17
- data/lib/translatomatic/translator/base.rb +1 -1
- data/lib/translatomatic/translator/google.rb +2 -0
- data/lib/translatomatic/translator.rb +10 -2
- data/lib/translatomatic/util.rb +45 -45
- data/lib/translatomatic/version.rb +7 -7
- data/lib/translatomatic.rb +52 -49
- data/translatomatic.gemspec +3 -2
- metadata +25 -5
@@ -1,188 +1,188 @@
|
|
1
|
-
module Translatomatic
|
2
|
-
# A string object with an associated locale.
|
3
|
-
class String
|
4
|
-
|
5
|
-
# @return [String] The string
|
6
|
-
attr_reader :value
|
7
|
-
|
8
|
-
# @return [Translatomatic::Locale] The string's locale
|
9
|
-
attr_reader :locale
|
10
|
-
|
11
|
-
# @return [Translatomatic::String] If this string is a substring of
|
12
|
-
# another string, returns the original string. Otherwise, returns nil.
|
13
|
-
attr_reader :parent
|
14
|
-
|
15
|
-
# @return [Number] If this string is a substring of another string,
|
16
|
-
# returns the starting offset of this string in the original.
|
17
|
-
attr_reader :offset
|
18
|
-
|
19
|
-
def initialize(value, locale, options = {})
|
20
|
-
@value = value.to_s || ''
|
21
|
-
@locale = Translatomatic::Locale.parse(locale)
|
22
|
-
@offset = options[:offset] || 0
|
23
|
-
@parent = options[:parent]
|
24
|
-
end
|
25
|
-
|
26
|
-
# @return [String] The value of the string
|
27
|
-
def to_s
|
28
|
-
@value
|
29
|
-
end
|
30
|
-
|
31
|
-
# @return [Number] The length of the string
|
32
|
-
def length
|
33
|
-
@value.length
|
34
|
-
end
|
35
|
-
|
36
|
-
# @return [boolean] True if the string is empty
|
37
|
-
def empty?
|
38
|
-
@value.empty?
|
39
|
-
end
|
40
|
-
|
41
|
-
# Invokes value.match
|
42
|
-
# @param pattern [Regexp,String] The regex pattern to match
|
43
|
-
# @return [MatchData] Object describing the match, or nil if no match
|
44
|
-
def match(pattern)
|
45
|
-
@value.match(pattern)
|
46
|
-
end
|
47
|
-
|
48
|
-
# @return [boolean] true if this string is a substring of another string
|
49
|
-
def substring?
|
50
|
-
@parent ? true : false
|
51
|
-
end
|
52
|
-
|
53
|
-
# @return [Symbol] The type of string, corresponding to TMX segtype.
|
54
|
-
# @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
|
55
|
-
def type
|
56
|
-
if sentences.length >= 2
|
57
|
-
:paragraph
|
58
|
-
else
|
59
|
-
script = script_data
|
60
|
-
@value.strip.match(/#{script.delimiter}\s*$/) ? :sentence : :phrase
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
# Find all sentences in the string
|
65
|
-
# @return [Array<Translatomatic::String] List of sentences
|
66
|
-
def sentences
|
67
|
-
substrings(sentence_regex)
|
68
|
-
end
|
69
|
-
|
70
|
-
# Find all substrings matching the given regex
|
71
|
-
# @return [Array<Translatomatic::String] List of substrings
|
72
|
-
def substrings(regex)
|
73
|
-
matches = matches(@value, regex)
|
74
|
-
strings = []
|
75
|
-
matches.each do |match|
|
76
|
-
substring = match.to_s
|
77
|
-
# find leading and trailing whitespace
|
78
|
-
next if substring.length == 0
|
79
|
-
|
80
|
-
parts = substring.match(/\A(\s*)(.*?)(\s*)\z/m).to_a
|
81
|
-
value = parts[2]
|
82
|
-
offset = match.offset(0)[0]
|
83
|
-
offset += parts[1].length # leading whitespace
|
84
|
-
strings << self.class.new(value, locale, offset: offset, parent: self)
|
85
|
-
end
|
86
|
-
|
87
|
-
# return [self] if there's only one substring and it's equal to self
|
88
|
-
strings.length == 1 && strings[0].eql?(self) ? [self] : strings
|
89
|
-
end
|
90
|
-
|
91
|
-
# @return [boolean] true if other is a {Translatomatic::String} with
|
92
|
-
# the same value and locale.
|
93
|
-
def eql?(other)
|
94
|
-
other.kind_of?(Translatomatic::String) && other.hash == hash
|
95
|
-
end
|
96
|
-
|
97
|
-
# (see #eql?)
|
98
|
-
def ==(other)
|
99
|
-
eql?(other)
|
100
|
-
end
|
101
|
-
|
102
|
-
# @!visibility private
|
103
|
-
def hash
|
104
|
-
[value, locale].hash
|
105
|
-
end
|
106
|
-
|
107
|
-
private
|
108
|
-
|
109
|
-
# @!visibility private
|
110
|
-
class Script
|
111
|
-
attr_reader :language
|
112
|
-
attr_reader :delimiter # sentence delimiter
|
113
|
-
attr_reader :trailing_space # delimiter requires trailing space or eol
|
114
|
-
attr_reader :left_to_right # script direction
|
115
|
-
|
116
|
-
def initialize(language:, delimiter:, trailing_space:, direction:)
|
117
|
-
@language = language
|
118
|
-
@delimiter = delimiter
|
119
|
-
@trailing_space = trailing_space
|
120
|
-
@left_to_right = direction == :ltr
|
121
|
-
raise "invalid direction" unless [:ltr, :rtl].include?(direction)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
SCRIPT_DATA = [
|
126
|
-
# [language, delimiter, trailing space, direction]
|
127
|
-
# japanese, no space after
|
128
|
-
["ja", "\u3002", false, :ltr],
|
129
|
-
# chinese, no space after
|
130
|
-
["zh", "\u3002", false, :ltr], # can be written any direction
|
131
|
-
# armenian, space after
|
132
|
-
["hy", ":", true, :ltr],
|
133
|
-
# hindi, space after
|
134
|
-
["hi", "।", true, :ltr],
|
135
|
-
# urdu, space after, right to left
|
136
|
-
["ur", "\u06d4", true, :rtl],
|
137
|
-
# thai, spaces used to separate sentences
|
138
|
-
["th", "\\s", false, :ltr],
|
139
|
-
# arabic, right to left
|
140
|
-
["ar", "\\.", true, :rtl],
|
141
|
-
# hebrew, right to left
|
142
|
-
["he", "\\.", true, :rtl],
|
143
|
-
# all other languages
|
144
|
-
["default", "\\.", true, :ltr],
|
145
|
-
]
|
146
|
-
|
147
|
-
class << self
|
148
|
-
attr_reader :script_data
|
149
|
-
end
|
150
|
-
|
151
|
-
begin
|
152
|
-
script_data = {}
|
153
|
-
SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
|
154
|
-
script = Script.new(language: lang, delimiter: delimiter,
|
155
|
-
trailing_space: trailing, direction: ltr)
|
156
|
-
script_data[lang] = script
|
157
|
-
end
|
158
|
-
@script_data = script_data
|
159
|
-
end
|
160
|
-
|
161
|
-
def matches(s, re)
|
162
|
-
start_at = 0
|
163
|
-
matches = []
|
164
|
-
while(m = s.match(re, start_at))
|
165
|
-
break if m.to_s.empty?
|
166
|
-
matches.push(m)
|
167
|
-
start_at = m.end(0)
|
168
|
-
end
|
169
|
-
matches
|
170
|
-
end
|
171
|
-
|
172
|
-
def sentence_regex
|
173
|
-
script = script_data
|
174
|
-
if script.trailing_space
|
175
|
-
regex = /.*?(?:#{script.delimiter}\s+|\z)/m
|
176
|
-
else
|
177
|
-
# no trailing space after delimiter
|
178
|
-
regex = /.*?(?:#{script.delimiter}|\z)/m
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
def script_data
|
183
|
-
data = self.class.script_data
|
184
|
-
data[locale.language] || data["default"]
|
185
|
-
end
|
186
|
-
|
187
|
-
end
|
188
|
-
end
|
1
|
+
module Translatomatic
|
2
|
+
# A string object with an associated locale.
|
3
|
+
class String
|
4
|
+
|
5
|
+
# @return [String] The string
|
6
|
+
attr_reader :value
|
7
|
+
|
8
|
+
# @return [Translatomatic::Locale] The string's locale
|
9
|
+
attr_reader :locale
|
10
|
+
|
11
|
+
# @return [Translatomatic::String] If this string is a substring of
|
12
|
+
# another string, returns the original string. Otherwise, returns nil.
|
13
|
+
attr_reader :parent
|
14
|
+
|
15
|
+
# @return [Number] If this string is a substring of another string,
|
16
|
+
# returns the starting offset of this string in the original.
|
17
|
+
attr_reader :offset
|
18
|
+
|
19
|
+
def initialize(value, locale, options = {})
|
20
|
+
@value = value.to_s || ''
|
21
|
+
@locale = Translatomatic::Locale.parse(locale)
|
22
|
+
@offset = options[:offset] || 0
|
23
|
+
@parent = options[:parent]
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [String] The value of the string
|
27
|
+
def to_s
|
28
|
+
@value
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Number] The length of the string
|
32
|
+
def length
|
33
|
+
@value.length
|
34
|
+
end
|
35
|
+
|
36
|
+
# @return [boolean] True if the string is empty
|
37
|
+
def empty?
|
38
|
+
@value.empty?
|
39
|
+
end
|
40
|
+
|
41
|
+
# Invokes value.match
|
42
|
+
# @param pattern [Regexp,String] The regex pattern to match
|
43
|
+
# @return [MatchData] Object describing the match, or nil if no match
|
44
|
+
def match(pattern)
|
45
|
+
@value.match(pattern)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [boolean] true if this string is a substring of another string
|
49
|
+
def substring?
|
50
|
+
@parent ? true : false
|
51
|
+
end
|
52
|
+
|
53
|
+
# @return [Symbol] The type of string, corresponding to TMX segtype.
|
54
|
+
# @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
|
55
|
+
def type
|
56
|
+
if sentences.length >= 2
|
57
|
+
:paragraph
|
58
|
+
else
|
59
|
+
script = script_data
|
60
|
+
@value.strip.match(/#{script.delimiter}\s*$/) ? :sentence : :phrase
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Find all sentences in the string
|
65
|
+
# @return [Array<Translatomatic::String] List of sentences
|
66
|
+
def sentences
|
67
|
+
substrings(sentence_regex)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Find all substrings matching the given regex
|
71
|
+
# @return [Array<Translatomatic::String] List of substrings
|
72
|
+
def substrings(regex)
|
73
|
+
matches = matches(@value, regex)
|
74
|
+
strings = []
|
75
|
+
matches.each do |match|
|
76
|
+
substring = match.to_s
|
77
|
+
# find leading and trailing whitespace
|
78
|
+
next if substring.length == 0
|
79
|
+
|
80
|
+
parts = substring.match(/\A(\s*)(.*?)(\s*)\z/m).to_a
|
81
|
+
value = parts[2]
|
82
|
+
offset = match.offset(0)[0]
|
83
|
+
offset += parts[1].length # leading whitespace
|
84
|
+
strings << self.class.new(value, locale, offset: offset, parent: self)
|
85
|
+
end
|
86
|
+
|
87
|
+
# return [self] if there's only one substring and it's equal to self
|
88
|
+
strings.length == 1 && strings[0].eql?(self) ? [self] : strings
|
89
|
+
end
|
90
|
+
|
91
|
+
# @return [boolean] true if other is a {Translatomatic::String} with
|
92
|
+
# the same value and locale.
|
93
|
+
def eql?(other)
|
94
|
+
other.kind_of?(Translatomatic::String) && other.hash == hash
|
95
|
+
end
|
96
|
+
|
97
|
+
# (see #eql?)
|
98
|
+
def ==(other)
|
99
|
+
eql?(other)
|
100
|
+
end
|
101
|
+
|
102
|
+
# @!visibility private
|
103
|
+
def hash
|
104
|
+
[value, locale].hash
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
# @!visibility private
|
110
|
+
class Script
|
111
|
+
attr_reader :language
|
112
|
+
attr_reader :delimiter # sentence delimiter
|
113
|
+
attr_reader :trailing_space # delimiter requires trailing space or eol
|
114
|
+
attr_reader :left_to_right # script direction
|
115
|
+
|
116
|
+
def initialize(language:, delimiter:, trailing_space:, direction:)
|
117
|
+
@language = language
|
118
|
+
@delimiter = delimiter
|
119
|
+
@trailing_space = trailing_space
|
120
|
+
@left_to_right = direction == :ltr
|
121
|
+
raise "invalid direction" unless [:ltr, :rtl].include?(direction)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
SCRIPT_DATA = [
|
126
|
+
# [language, delimiter, trailing space, direction]
|
127
|
+
# japanese, no space after
|
128
|
+
["ja", "\u3002", false, :ltr],
|
129
|
+
# chinese, no space after
|
130
|
+
["zh", "\u3002", false, :ltr], # can be written any direction
|
131
|
+
# armenian, space after
|
132
|
+
["hy", ":", true, :ltr],
|
133
|
+
# hindi, space after
|
134
|
+
["hi", "।", true, :ltr],
|
135
|
+
# urdu, space after, right to left
|
136
|
+
["ur", "\u06d4", true, :rtl],
|
137
|
+
# thai, spaces used to separate sentences
|
138
|
+
["th", "\\s", false, :ltr],
|
139
|
+
# arabic, right to left
|
140
|
+
["ar", "\\.", true, :rtl],
|
141
|
+
# hebrew, right to left
|
142
|
+
["he", "\\.", true, :rtl],
|
143
|
+
# all other languages
|
144
|
+
["default", "\\.", true, :ltr],
|
145
|
+
]
|
146
|
+
|
147
|
+
class << self
|
148
|
+
attr_reader :script_data
|
149
|
+
end
|
150
|
+
|
151
|
+
begin
|
152
|
+
script_data = {}
|
153
|
+
SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
|
154
|
+
script = Script.new(language: lang, delimiter: delimiter,
|
155
|
+
trailing_space: trailing, direction: ltr)
|
156
|
+
script_data[lang] = script
|
157
|
+
end
|
158
|
+
@script_data = script_data
|
159
|
+
end
|
160
|
+
|
161
|
+
def matches(s, re)
|
162
|
+
start_at = 0
|
163
|
+
matches = []
|
164
|
+
while(m = s.match(re, start_at))
|
165
|
+
break if m.to_s.empty?
|
166
|
+
matches.push(m)
|
167
|
+
start_at = m.end(0)
|
168
|
+
end
|
169
|
+
matches
|
170
|
+
end
|
171
|
+
|
172
|
+
def sentence_regex
|
173
|
+
script = script_data
|
174
|
+
if script.trailing_space
|
175
|
+
regex = /.*?(?:#{script.delimiter}\s+|\z)/m
|
176
|
+
else
|
177
|
+
# no trailing space after delimiter
|
178
|
+
regex = /.*?(?:#{script.delimiter}|\z)/m
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
def script_data
|
183
|
+
data = self.class.script_data
|
184
|
+
data[locale.language] || data["default"]
|
185
|
+
end
|
186
|
+
|
187
|
+
end
|
188
|
+
end
|
@@ -1,99 +1,99 @@
|
|
1
|
-
module Translatomatic::TMX
|
2
|
-
# Translation Memory Exchange document
|
3
|
-
class Document
|
4
|
-
|
5
|
-
# Create a new instance
|
6
|
-
# @param units [Array<TranslationUnit>] A list of translation units
|
7
|
-
# @param source_locale [Locale] Source locale
|
8
|
-
# @return [Translatomatic::TMX::Document] a new TMX object
|
9
|
-
def initialize(units, source_locale)
|
10
|
-
units = [units] unless units.kind_of?(Array)
|
11
|
-
@units = units
|
12
|
-
@source_locale = source_locale
|
13
|
-
end
|
14
|
-
|
15
|
-
# @return [String] An XML string
|
16
|
-
def to_xml(options = {})
|
17
|
-
builder = Nokogiri::XML::Builder.new do |xml|
|
18
|
-
dtd = options[:dtd] || TMX_DTD
|
19
|
-
xml.doc.create_internal_subset('tmx', nil, dtd)
|
20
|
-
xml.tmx(version: "1.4") do
|
21
|
-
xml.header(creationtool: "Translatomatic",
|
22
|
-
creationtoolversion: Translatomatic::VERSION,
|
23
|
-
datatype: "PlainText",
|
24
|
-
segtype: "phrase", # default segtype
|
25
|
-
adminlang: @source_locale.to_s,
|
26
|
-
srclang: @source_locale.to_s,
|
27
|
-
"o-tmf": DEFAULT_OTMF
|
28
|
-
)
|
29
|
-
xml.body { tmx_body(xml) }
|
30
|
-
end
|
31
|
-
end
|
32
|
-
builder.to_xml
|
33
|
-
end
|
34
|
-
|
35
|
-
# Create a TMX document from the given converter
|
36
|
-
# @param texts [Array<Translatomatic::Model::Text>] List of texts
|
37
|
-
# @return [Translatomatic::TMX::Document] TMX document
|
38
|
-
def self.from_texts(texts)
|
39
|
-
# group texts by from_text_id to create units
|
40
|
-
# source_locale: use from_text.locale
|
41
|
-
# origin: use text.translator
|
42
|
-
sources = texts.select { |i| i.from_text.nil? }
|
43
|
-
source_locales = sources.collect { |i| i.locale }.uniq
|
44
|
-
raise t("tmx.multiple_locales") if source_locales.length > 1
|
45
|
-
units = units_from_texts(texts)
|
46
|
-
|
47
|
-
return new(units, source_locales[0])
|
48
|
-
end
|
49
|
-
|
50
|
-
def self.valid?(xml)
|
51
|
-
options = Nokogiri::XML::ParseOptions::DTDVALID
|
52
|
-
doc = Nokogiri::XML::Document.parse(xml, nil, nil, options)
|
53
|
-
doc.internal_subset.validate(doc)
|
54
|
-
end
|
55
|
-
|
56
|
-
private
|
57
|
-
|
58
|
-
class << self
|
59
|
-
include Translatomatic::Util
|
60
|
-
end
|
61
|
-
|
62
|
-
TMX_DTD = "http://www.ttt.org/oscarstandards/tmx/tmx14.dtd"
|
63
|
-
DEFAULT_OTMF = "Translatomatic"
|
64
|
-
|
65
|
-
def tmx_body(xml)
|
66
|
-
@units.each do |unit|
|
67
|
-
xml.tu("segtype": unit.strings[0].type) do
|
68
|
-
unit.strings.each do |string|
|
69
|
-
xml.tuv("xml:lang": string.locale.to_s) do
|
70
|
-
xml.seg string.value
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
# @return [Array<Translatomatic::TMX::TranslationUnit] translation unit list
|
78
|
-
def self.units_from_texts(texts)
|
79
|
-
# group texts by from_text_id
|
80
|
-
texts_by_from_id = {}
|
81
|
-
texts.each do |text|
|
82
|
-
id = text.from_text_id || text.id
|
83
|
-
list = (texts_by_from_id[id] ||= [])
|
84
|
-
list << text
|
85
|
-
end
|
86
|
-
|
87
|
-
# create list of Translation Units
|
88
|
-
texts_by_from_id.values.collect do |list|
|
89
|
-
strings = list.uniq.collect { |i| string(i.value, i.locale) }
|
90
|
-
tmx_unit(strings)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
def self.tmx_unit(strings)
|
95
|
-
Translatomatic::TMX::TranslationUnit.new(strings)
|
96
|
-
end
|
97
|
-
|
98
|
-
end # class
|
99
|
-
end # module
|
1
|
+
module Translatomatic::TMX
|
2
|
+
# Translation Memory Exchange document
|
3
|
+
class Document
|
4
|
+
|
5
|
+
# Create a new instance
|
6
|
+
# @param units [Array<TranslationUnit>] A list of translation units
|
7
|
+
# @param source_locale [Locale] Source locale
|
8
|
+
# @return [Translatomatic::TMX::Document] a new TMX object
|
9
|
+
def initialize(units, source_locale)
|
10
|
+
units = [units] unless units.kind_of?(Array)
|
11
|
+
@units = units
|
12
|
+
@source_locale = source_locale
|
13
|
+
end
|
14
|
+
|
15
|
+
# @return [String] An XML string
|
16
|
+
def to_xml(options = {})
|
17
|
+
builder = Nokogiri::XML::Builder.new do |xml|
|
18
|
+
dtd = options[:dtd] || TMX_DTD
|
19
|
+
xml.doc.create_internal_subset('tmx', nil, dtd)
|
20
|
+
xml.tmx(version: "1.4") do
|
21
|
+
xml.header(creationtool: "Translatomatic",
|
22
|
+
creationtoolversion: Translatomatic::VERSION,
|
23
|
+
datatype: "PlainText",
|
24
|
+
segtype: "phrase", # default segtype
|
25
|
+
adminlang: @source_locale.to_s,
|
26
|
+
srclang: @source_locale.to_s,
|
27
|
+
"o-tmf": DEFAULT_OTMF
|
28
|
+
)
|
29
|
+
xml.body { tmx_body(xml) }
|
30
|
+
end
|
31
|
+
end
|
32
|
+
builder.to_xml
|
33
|
+
end
|
34
|
+
|
35
|
+
# Create a TMX document from the given converter
|
36
|
+
# @param texts [Array<Translatomatic::Model::Text>] List of texts
|
37
|
+
# @return [Translatomatic::TMX::Document] TMX document
|
38
|
+
def self.from_texts(texts)
|
39
|
+
# group texts by from_text_id to create units
|
40
|
+
# source_locale: use from_text.locale
|
41
|
+
# origin: use text.translator
|
42
|
+
sources = texts.select { |i| i.from_text.nil? }
|
43
|
+
source_locales = sources.collect { |i| i.locale }.uniq
|
44
|
+
raise t("tmx.multiple_locales") if source_locales.length > 1
|
45
|
+
units = units_from_texts(texts)
|
46
|
+
|
47
|
+
return new(units, source_locales[0])
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.valid?(xml)
|
51
|
+
options = Nokogiri::XML::ParseOptions::DTDVALID
|
52
|
+
doc = Nokogiri::XML::Document.parse(xml, nil, nil, options)
|
53
|
+
doc.internal_subset.validate(doc)
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
class << self
|
59
|
+
include Translatomatic::Util
|
60
|
+
end
|
61
|
+
|
62
|
+
TMX_DTD = "http://www.ttt.org/oscarstandards/tmx/tmx14.dtd"
|
63
|
+
DEFAULT_OTMF = "Translatomatic"
|
64
|
+
|
65
|
+
def tmx_body(xml)
|
66
|
+
@units.each do |unit|
|
67
|
+
xml.tu("segtype": unit.strings[0].type) do
|
68
|
+
unit.strings.each do |string|
|
69
|
+
xml.tuv("xml:lang": string.locale.to_s) do
|
70
|
+
xml.seg string.value
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# @return [Array<Translatomatic::TMX::TranslationUnit] translation unit list
|
78
|
+
def self.units_from_texts(texts)
|
79
|
+
# group texts by from_text_id
|
80
|
+
texts_by_from_id = {}
|
81
|
+
texts.each do |text|
|
82
|
+
id = text.from_text_id || text.id
|
83
|
+
list = (texts_by_from_id[id] ||= [])
|
84
|
+
list << text
|
85
|
+
end
|
86
|
+
|
87
|
+
# create list of Translation Units
|
88
|
+
texts_by_from_id.values.collect do |list|
|
89
|
+
strings = list.uniq.collect { |i| string(i.value, i.locale) }
|
90
|
+
tmx_unit(strings)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.tmx_unit(strings)
|
95
|
+
Translatomatic::TMX::TranslationUnit.new(strings)
|
96
|
+
end
|
97
|
+
|
98
|
+
end # class
|
99
|
+
end # module
|