translatomatic 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/.translatomatic/config.yml +18 -0
  3. data/.travis.yml +33 -33
  4. data/Gemfile +6 -4
  5. data/README.de.md +53 -18
  6. data/README.es.md +55 -20
  7. data/README.fr.md +54 -19
  8. data/README.it.md +58 -23
  9. data/README.ja.md +54 -19
  10. data/README.ko.md +58 -23
  11. data/README.md +167 -141
  12. data/README.ms.md +51 -16
  13. data/README.pt.md +58 -23
  14. data/README.ru.md +53 -18
  15. data/README.sv.md +53 -18
  16. data/README.zh.md +53 -18
  17. data/bin/translatomatic +6 -6
  18. data/bin/travis +24 -26
  19. data/config/locales/translatomatic/de.yml +22 -11
  20. data/config/locales/translatomatic/en.yml +21 -12
  21. data/config/locales/translatomatic/es.yml +22 -11
  22. data/config/locales/translatomatic/fr.yml +22 -12
  23. data/config/locales/translatomatic/it.yml +22 -11
  24. data/config/locales/translatomatic/ja.yml +22 -11
  25. data/config/locales/translatomatic/ko.yml +22 -11
  26. data/config/locales/translatomatic/ms.yml +22 -11
  27. data/config/locales/translatomatic/pt.yml +22 -11
  28. data/config/locales/translatomatic/ru.yml +22 -11
  29. data/config/locales/translatomatic/sv.yml +22 -11
  30. data/config/locales/translatomatic/zh.yml +22 -11
  31. data/db/migrate/201712170000_initial.rb +25 -25
  32. data/lib/translatomatic/cli/base.rb +81 -73
  33. data/lib/translatomatic/cli/config.rb +110 -81
  34. data/lib/translatomatic/cli/main.rb +85 -72
  35. data/lib/translatomatic/cli/translate.rb +141 -106
  36. data/lib/translatomatic/cli.rb +8 -8
  37. data/lib/translatomatic/config.rb +302 -155
  38. data/lib/translatomatic/converter.rb +28 -260
  39. data/lib/translatomatic/database.rb +134 -134
  40. data/lib/translatomatic/define_options.rb +22 -0
  41. data/lib/translatomatic/escaped_unicode.rb +0 -0
  42. data/lib/translatomatic/extractor/base.rb +16 -16
  43. data/lib/translatomatic/extractor/ruby.rb +6 -6
  44. data/lib/translatomatic/extractor.rb +5 -5
  45. data/lib/translatomatic/file_translator.rb +269 -0
  46. data/lib/translatomatic/http_request.rb +162 -162
  47. data/lib/translatomatic/locale.rb +76 -76
  48. data/lib/translatomatic/logger.rb +23 -23
  49. data/lib/translatomatic/model/locale.rb +25 -25
  50. data/lib/translatomatic/model/text.rb +19 -19
  51. data/lib/translatomatic/model.rb +1 -1
  52. data/lib/translatomatic/option.rb +37 -41
  53. data/lib/translatomatic/progress_updater.rb +13 -13
  54. data/lib/translatomatic/resource_file/base.rb +269 -192
  55. data/lib/translatomatic/resource_file/csv.rb +37 -0
  56. data/lib/translatomatic/resource_file/html.rb +54 -47
  57. data/lib/translatomatic/resource_file/markdown.rb +50 -55
  58. data/lib/translatomatic/resource_file/plist.rb +153 -19
  59. data/lib/translatomatic/resource_file/po.rb +107 -0
  60. data/lib/translatomatic/resource_file/properties.rb +91 -90
  61. data/lib/translatomatic/resource_file/resw.rb +50 -30
  62. data/lib/translatomatic/resource_file/subtitle.rb +75 -0
  63. data/lib/translatomatic/resource_file/text.rb +24 -30
  64. data/lib/translatomatic/resource_file/xcode_strings.rb +75 -80
  65. data/lib/translatomatic/resource_file/xml.rb +98 -91
  66. data/lib/translatomatic/resource_file/yaml.rb +94 -116
  67. data/lib/translatomatic/resource_file.rb +87 -78
  68. data/lib/translatomatic/string.rb +188 -188
  69. data/lib/translatomatic/tmx/document.rb +99 -99
  70. data/lib/translatomatic/translation_result.rb +63 -63
  71. data/lib/translatomatic/{converter_stats.rb → translation_stats.rb} +17 -17
  72. data/lib/translatomatic/translator/base.rb +1 -1
  73. data/lib/translatomatic/translator/google.rb +2 -0
  74. data/lib/translatomatic/translator.rb +10 -2
  75. data/lib/translatomatic/util.rb +45 -45
  76. data/lib/translatomatic/version.rb +7 -7
  77. data/lib/translatomatic.rb +52 -49
  78. data/translatomatic.gemspec +3 -2
  79. metadata +25 -5
@@ -1,188 +1,188 @@
1
- module Translatomatic
2
- # A string object with an associated locale.
3
- class String
4
-
5
- # @return [String] The string
6
- attr_reader :value
7
-
8
- # @return [Translatomatic::Locale] The string's locale
9
- attr_reader :locale
10
-
11
- # @return [Translatomatic::String] If this string is a substring of
12
- # another string, returns the original string. Otherwise, returns nil.
13
- attr_reader :parent
14
-
15
- # @return [Number] If this string is a substring of another string,
16
- # returns the starting offset of this string in the original.
17
- attr_reader :offset
18
-
19
- def initialize(value, locale, options = {})
20
- @value = value.to_s || ''
21
- @locale = Translatomatic::Locale.parse(locale)
22
- @offset = options[:offset] || 0
23
- @parent = options[:parent]
24
- end
25
-
26
- # @return [String] The value of the string
27
- def to_s
28
- @value
29
- end
30
-
31
- # @return [Number] The length of the string
32
- def length
33
- @value.length
34
- end
35
-
36
- # @return [boolean] True if the string is empty
37
- def empty?
38
- @value.empty?
39
- end
40
-
41
- # Invokes value.match
42
- # @param pattern [Regexp,String] The regex pattern to match
43
- # @return [MatchData] Object describing the match, or nil if no match
44
- def match(pattern)
45
- @value.match(pattern)
46
- end
47
-
48
- # @return [boolean] true if this string is a substring of another string
49
- def substring?
50
- @parent ? true : false
51
- end
52
-
53
- # @return [Symbol] The type of string, corresponding to TMX segtype.
54
- # @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
55
- def type
56
- if sentences.length >= 2
57
- :paragraph
58
- else
59
- script = script_data
60
- @value.strip.match(/#{script.delimiter}\s*$/) ? :sentence : :phrase
61
- end
62
- end
63
-
64
- # Find all sentences in the string
65
- # @return [Array<Translatomatic::String] List of sentences
66
- def sentences
67
- substrings(sentence_regex)
68
- end
69
-
70
- # Find all substrings matching the given regex
71
- # @return [Array<Translatomatic::String] List of substrings
72
- def substrings(regex)
73
- matches = matches(@value, regex)
74
- strings = []
75
- matches.each do |match|
76
- substring = match.to_s
77
- # find leading and trailing whitespace
78
- next if substring.length == 0
79
-
80
- parts = substring.match(/\A(\s*)(.*?)(\s*)\z/m).to_a
81
- value = parts[2]
82
- offset = match.offset(0)[0]
83
- offset += parts[1].length # leading whitespace
84
- strings << self.class.new(value, locale, offset: offset, parent: self)
85
- end
86
-
87
- # return [self] if there's only one substring and it's equal to self
88
- strings.length == 1 && strings[0].eql?(self) ? [self] : strings
89
- end
90
-
91
- # @return [boolean] true if other is a {Translatomatic::String} with
92
- # the same value and locale.
93
- def eql?(other)
94
- other.kind_of?(Translatomatic::String) && other.hash == hash
95
- end
96
-
97
- # (see #eql?)
98
- def ==(other)
99
- eql?(other)
100
- end
101
-
102
- # @!visibility private
103
- def hash
104
- [value, locale].hash
105
- end
106
-
107
- private
108
-
109
- # @!visibility private
110
- class Script
111
- attr_reader :language
112
- attr_reader :delimiter # sentence delimiter
113
- attr_reader :trailing_space # delimiter requires trailing space or eol
114
- attr_reader :left_to_right # script direction
115
-
116
- def initialize(language:, delimiter:, trailing_space:, direction:)
117
- @language = language
118
- @delimiter = delimiter
119
- @trailing_space = trailing_space
120
- @left_to_right = direction == :ltr
121
- raise "invalid direction" unless [:ltr, :rtl].include?(direction)
122
- end
123
- end
124
-
125
- SCRIPT_DATA = [
126
- # [language, delimiter, trailing space, direction]
127
- # japanese, no space after
128
- ["ja", "\u3002", false, :ltr],
129
- # chinese, no space after
130
- ["zh", "\u3002", false, :ltr], # can be written any direction
131
- # armenian, space after
132
- ["hy", ":", true, :ltr],
133
- # hindi, space after
134
- ["hi", "।", true, :ltr],
135
- # urdu, space after, right to left
136
- ["ur", "\u06d4", true, :rtl],
137
- # thai, spaces used to separate sentences
138
- ["th", "\\s", false, :ltr],
139
- # arabic, right to left
140
- ["ar", "\\.", true, :rtl],
141
- # hebrew, right to left
142
- ["he", "\\.", true, :rtl],
143
- # all other languages
144
- ["default", "\\.", true, :ltr],
145
- ]
146
-
147
- class << self
148
- attr_reader :script_data
149
- end
150
-
151
- begin
152
- script_data = {}
153
- SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
154
- script = Script.new(language: lang, delimiter: delimiter,
155
- trailing_space: trailing, direction: ltr)
156
- script_data[lang] = script
157
- end
158
- @script_data = script_data
159
- end
160
-
161
- def matches(s, re)
162
- start_at = 0
163
- matches = []
164
- while(m = s.match(re, start_at))
165
- break if m.to_s.empty?
166
- matches.push(m)
167
- start_at = m.end(0)
168
- end
169
- matches
170
- end
171
-
172
- def sentence_regex
173
- script = script_data
174
- if script.trailing_space
175
- regex = /.*?(?:#{script.delimiter}\s+|\z)/m
176
- else
177
- # no trailing space after delimiter
178
- regex = /.*?(?:#{script.delimiter}|\z)/m
179
- end
180
- end
181
-
182
- def script_data
183
- data = self.class.script_data
184
- data[locale.language] || data["default"]
185
- end
186
-
187
- end
188
- end
1
+ module Translatomatic
2
+ # A string object with an associated locale.
3
+ class String
4
+
5
+ # @return [String] The string
6
+ attr_reader :value
7
+
8
+ # @return [Translatomatic::Locale] The string's locale
9
+ attr_reader :locale
10
+
11
+ # @return [Translatomatic::String] If this string is a substring of
12
+ # another string, returns the original string. Otherwise, returns nil.
13
+ attr_reader :parent
14
+
15
+ # @return [Number] If this string is a substring of another string,
16
+ # returns the starting offset of this string in the original.
17
+ attr_reader :offset
18
+
19
+ def initialize(value, locale, options = {})
20
+ @value = value.to_s || ''
21
+ @locale = Translatomatic::Locale.parse(locale)
22
+ @offset = options[:offset] || 0
23
+ @parent = options[:parent]
24
+ end
25
+
26
+ # @return [String] The value of the string
27
+ def to_s
28
+ @value
29
+ end
30
+
31
+ # @return [Number] The length of the string
32
+ def length
33
+ @value.length
34
+ end
35
+
36
+ # @return [boolean] True if the string is empty
37
+ def empty?
38
+ @value.empty?
39
+ end
40
+
41
+ # Invokes value.match
42
+ # @param pattern [Regexp,String] The regex pattern to match
43
+ # @return [MatchData] Object describing the match, or nil if no match
44
+ def match(pattern)
45
+ @value.match(pattern)
46
+ end
47
+
48
+ # @return [boolean] true if this string is a substring of another string
49
+ def substring?
50
+ @parent ? true : false
51
+ end
52
+
53
+ # @return [Symbol] The type of string, corresponding to TMX segtype.
54
+ # @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
55
+ def type
56
+ if sentences.length >= 2
57
+ :paragraph
58
+ else
59
+ script = script_data
60
+ @value.strip.match(/#{script.delimiter}\s*$/) ? :sentence : :phrase
61
+ end
62
+ end
63
+
64
+ # Find all sentences in the string
65
+ # @return [Array<Translatomatic::String] List of sentences
66
+ def sentences
67
+ substrings(sentence_regex)
68
+ end
69
+
70
+ # Find all substrings matching the given regex
71
+ # @return [Array<Translatomatic::String] List of substrings
72
+ def substrings(regex)
73
+ matches = matches(@value, regex)
74
+ strings = []
75
+ matches.each do |match|
76
+ substring = match.to_s
77
+ # find leading and trailing whitespace
78
+ next if substring.length == 0
79
+
80
+ parts = substring.match(/\A(\s*)(.*?)(\s*)\z/m).to_a
81
+ value = parts[2]
82
+ offset = match.offset(0)[0]
83
+ offset += parts[1].length # leading whitespace
84
+ strings << self.class.new(value, locale, offset: offset, parent: self)
85
+ end
86
+
87
+ # return [self] if there's only one substring and it's equal to self
88
+ strings.length == 1 && strings[0].eql?(self) ? [self] : strings
89
+ end
90
+
91
+ # @return [boolean] true if other is a {Translatomatic::String} with
92
+ # the same value and locale.
93
+ def eql?(other)
94
+ other.kind_of?(Translatomatic::String) && other.hash == hash
95
+ end
96
+
97
+ # (see #eql?)
98
+ def ==(other)
99
+ eql?(other)
100
+ end
101
+
102
+ # @!visibility private
103
+ def hash
104
+ [value, locale].hash
105
+ end
106
+
107
+ private
108
+
109
+ # @!visibility private
110
+ class Script
111
+ attr_reader :language
112
+ attr_reader :delimiter # sentence delimiter
113
+ attr_reader :trailing_space # delimiter requires trailing space or eol
114
+ attr_reader :left_to_right # script direction
115
+
116
+ def initialize(language:, delimiter:, trailing_space:, direction:)
117
+ @language = language
118
+ @delimiter = delimiter
119
+ @trailing_space = trailing_space
120
+ @left_to_right = direction == :ltr
121
+ raise "invalid direction" unless [:ltr, :rtl].include?(direction)
122
+ end
123
+ end
124
+
125
+ SCRIPT_DATA = [
126
+ # [language, delimiter, trailing space, direction]
127
+ # japanese, no space after
128
+ ["ja", "\u3002", false, :ltr],
129
+ # chinese, no space after
130
+ ["zh", "\u3002", false, :ltr], # can be written any direction
131
+ # armenian, space after
132
+ ["hy", ":", true, :ltr],
133
+ # hindi, space after
134
+ ["hi", "।", true, :ltr],
135
+ # urdu, space after, right to left
136
+ ["ur", "\u06d4", true, :rtl],
137
+ # thai, spaces used to separate sentences
138
+ ["th", "\\s", false, :ltr],
139
+ # arabic, right to left
140
+ ["ar", "\\.", true, :rtl],
141
+ # hebrew, right to left
142
+ ["he", "\\.", true, :rtl],
143
+ # all other languages
144
+ ["default", "\\.", true, :ltr],
145
+ ]
146
+
147
+ class << self
148
+ attr_reader :script_data
149
+ end
150
+
151
+ begin
152
+ script_data = {}
153
+ SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
154
+ script = Script.new(language: lang, delimiter: delimiter,
155
+ trailing_space: trailing, direction: ltr)
156
+ script_data[lang] = script
157
+ end
158
+ @script_data = script_data
159
+ end
160
+
161
+ def matches(s, re)
162
+ start_at = 0
163
+ matches = []
164
+ while(m = s.match(re, start_at))
165
+ break if m.to_s.empty?
166
+ matches.push(m)
167
+ start_at = m.end(0)
168
+ end
169
+ matches
170
+ end
171
+
172
+ def sentence_regex
173
+ script = script_data
174
+ if script.trailing_space
175
+ regex = /.*?(?:#{script.delimiter}\s+|\z)/m
176
+ else
177
+ # no trailing space after delimiter
178
+ regex = /.*?(?:#{script.delimiter}|\z)/m
179
+ end
180
+ end
181
+
182
+ def script_data
183
+ data = self.class.script_data
184
+ data[locale.language] || data["default"]
185
+ end
186
+
187
+ end
188
+ end
@@ -1,99 +1,99 @@
1
- module Translatomatic::TMX
2
- # Translation Memory Exchange document
3
- class Document
4
-
5
- # Create a new instance
6
- # @param units [Array<TranslationUnit>] A list of translation units
7
- # @param source_locale [Locale] Source locale
8
- # @return [Translatomatic::TMX::Document] a new TMX object
9
- def initialize(units, source_locale)
10
- units = [units] unless units.kind_of?(Array)
11
- @units = units
12
- @source_locale = source_locale
13
- end
14
-
15
- # @return [String] An XML string
16
- def to_xml(options = {})
17
- builder = Nokogiri::XML::Builder.new do |xml|
18
- dtd = options[:dtd] || TMX_DTD
19
- xml.doc.create_internal_subset('tmx', nil, dtd)
20
- xml.tmx(version: "1.4") do
21
- xml.header(creationtool: "Translatomatic",
22
- creationtoolversion: Translatomatic::VERSION,
23
- datatype: "PlainText",
24
- segtype: "phrase", # default segtype
25
- adminlang: @source_locale.to_s,
26
- srclang: @source_locale.to_s,
27
- "o-tmf": DEFAULT_OTMF
28
- )
29
- xml.body { tmx_body(xml) }
30
- end
31
- end
32
- builder.to_xml
33
- end
34
-
35
- # Create a TMX document from the given converter
36
- # @param texts [Array<Translatomatic::Model::Text>] List of texts
37
- # @return [Translatomatic::TMX::Document] TMX document
38
- def self.from_texts(texts)
39
- # group texts by from_text_id to create units
40
- # source_locale: use from_text.locale
41
- # origin: use text.translator
42
- sources = texts.select { |i| i.from_text.nil? }
43
- source_locales = sources.collect { |i| i.locale }.uniq
44
- raise t("tmx.multiple_locales") if source_locales.length > 1
45
- units = units_from_texts(texts)
46
-
47
- return new(units, source_locales[0])
48
- end
49
-
50
- def self.valid?(xml)
51
- options = Nokogiri::XML::ParseOptions::DTDVALID
52
- doc = Nokogiri::XML::Document.parse(xml, nil, nil, options)
53
- doc.internal_subset.validate(doc)
54
- end
55
-
56
- private
57
-
58
- class << self
59
- include Translatomatic::Util
60
- end
61
-
62
- TMX_DTD = "http://www.ttt.org/oscarstandards/tmx/tmx14.dtd"
63
- DEFAULT_OTMF = "Translatomatic"
64
-
65
- def tmx_body(xml)
66
- @units.each do |unit|
67
- xml.tu("segtype": unit.strings[0].type) do
68
- unit.strings.each do |string|
69
- xml.tuv("xml:lang": string.locale.to_s) do
70
- xml.seg string.value
71
- end
72
- end
73
- end
74
- end
75
- end
76
-
77
- # @return [Array<Translatomatic::TMX::TranslationUnit] translation unit list
78
- def self.units_from_texts(texts)
79
- # group texts by from_text_id
80
- texts_by_from_id = {}
81
- texts.each do |text|
82
- id = text.from_text_id || text.id
83
- list = (texts_by_from_id[id] ||= [])
84
- list << text
85
- end
86
-
87
- # create list of Translation Units
88
- texts_by_from_id.values.collect do |list|
89
- strings = list.uniq.collect { |i| string(i.value, i.locale) }
90
- tmx_unit(strings)
91
- end
92
- end
93
-
94
- def self.tmx_unit(strings)
95
- Translatomatic::TMX::TranslationUnit.new(strings)
96
- end
97
-
98
- end # class
99
- end # module
1
+ module Translatomatic::TMX
2
+ # Translation Memory Exchange document
3
+ class Document
4
+
5
+ # Create a new instance
6
+ # @param units [Array<TranslationUnit>] A list of translation units
7
+ # @param source_locale [Locale] Source locale
8
+ # @return [Translatomatic::TMX::Document] a new TMX object
9
+ def initialize(units, source_locale)
10
+ units = [units] unless units.kind_of?(Array)
11
+ @units = units
12
+ @source_locale = source_locale
13
+ end
14
+
15
+ # @return [String] An XML string
16
+ def to_xml(options = {})
17
+ builder = Nokogiri::XML::Builder.new do |xml|
18
+ dtd = options[:dtd] || TMX_DTD
19
+ xml.doc.create_internal_subset('tmx', nil, dtd)
20
+ xml.tmx(version: "1.4") do
21
+ xml.header(creationtool: "Translatomatic",
22
+ creationtoolversion: Translatomatic::VERSION,
23
+ datatype: "PlainText",
24
+ segtype: "phrase", # default segtype
25
+ adminlang: @source_locale.to_s,
26
+ srclang: @source_locale.to_s,
27
+ "o-tmf": DEFAULT_OTMF
28
+ )
29
+ xml.body { tmx_body(xml) }
30
+ end
31
+ end
32
+ builder.to_xml
33
+ end
34
+
35
+ # Create a TMX document from the given converter
36
+ # @param texts [Array<Translatomatic::Model::Text>] List of texts
37
+ # @return [Translatomatic::TMX::Document] TMX document
38
+ def self.from_texts(texts)
39
+ # group texts by from_text_id to create units
40
+ # source_locale: use from_text.locale
41
+ # origin: use text.translator
42
+ sources = texts.select { |i| i.from_text.nil? }
43
+ source_locales = sources.collect { |i| i.locale }.uniq
44
+ raise t("tmx.multiple_locales") if source_locales.length > 1
45
+ units = units_from_texts(texts)
46
+
47
+ return new(units, source_locales[0])
48
+ end
49
+
50
+ def self.valid?(xml)
51
+ options = Nokogiri::XML::ParseOptions::DTDVALID
52
+ doc = Nokogiri::XML::Document.parse(xml, nil, nil, options)
53
+ doc.internal_subset.validate(doc)
54
+ end
55
+
56
+ private
57
+
58
+ class << self
59
+ include Translatomatic::Util
60
+ end
61
+
62
+ TMX_DTD = "http://www.ttt.org/oscarstandards/tmx/tmx14.dtd"
63
+ DEFAULT_OTMF = "Translatomatic"
64
+
65
+ def tmx_body(xml)
66
+ @units.each do |unit|
67
+ xml.tu("segtype": unit.strings[0].type) do
68
+ unit.strings.each do |string|
69
+ xml.tuv("xml:lang": string.locale.to_s) do
70
+ xml.seg string.value
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+ # @return [Array<Translatomatic::TMX::TranslationUnit] translation unit list
78
+ def self.units_from_texts(texts)
79
+ # group texts by from_text_id
80
+ texts_by_from_id = {}
81
+ texts.each do |text|
82
+ id = text.from_text_id || text.id
83
+ list = (texts_by_from_id[id] ||= [])
84
+ list << text
85
+ end
86
+
87
+ # create list of Translation Units
88
+ texts_by_from_id.values.collect do |list|
89
+ strings = list.uniq.collect { |i| string(i.value, i.locale) }
90
+ tmx_unit(strings)
91
+ end
92
+ end
93
+
94
+ def self.tmx_unit(strings)
95
+ Translatomatic::TMX::TranslationUnit.new(strings)
96
+ end
97
+
98
+ end # class
99
+ end # module