translatomatic 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +5 -5
  2. data/.gitattributes +20 -20
  3. data/.gitignore +19 -15
  4. data/.rspec +3 -3
  5. data/.rubocop.yml +28 -0
  6. data/.translatomatic/config.yml +4 -0
  7. data/.travis.yml +4 -6
  8. data/.yardopts +9 -9
  9. data/Gemfile +8 -4
  10. data/Guardfile +4 -5
  11. data/README.de.md +55 -50
  12. data/README.en.md +177 -0
  13. data/README.es.md +53 -48
  14. data/README.fr.md +53 -48
  15. data/README.it.md +54 -49
  16. data/README.ja.md +63 -58
  17. data/README.ko.md +59 -54
  18. data/README.md +17 -13
  19. data/README.ms.md +50 -45
  20. data/README.pt.md +54 -49
  21. data/README.ru.md +57 -52
  22. data/README.sv.md +51 -46
  23. data/README.zh.md +60 -55
  24. data/Rakefile +3 -3
  25. data/TODO.txt +6 -0
  26. data/bin/console +3 -3
  27. data/bin/translatomatic +4 -2
  28. data/config/i18n-tasks.yml +130 -0
  29. data/config/locales/translatomatic/de.yml +141 -99
  30. data/config/locales/translatomatic/en.yml +129 -89
  31. data/config/locales/translatomatic/es.yml +136 -99
  32. data/config/locales/translatomatic/fr.yml +139 -100
  33. data/config/locales/translatomatic/it.yml +135 -97
  34. data/config/locales/translatomatic/ja.yml +137 -98
  35. data/config/locales/translatomatic/ko.yml +138 -98
  36. data/config/locales/translatomatic/ms.yml +138 -100
  37. data/config/locales/translatomatic/pt.yml +137 -101
  38. data/config/locales/translatomatic/ru.yml +136 -98
  39. data/config/locales/translatomatic/sv.yml +134 -96
  40. data/config/locales/translatomatic/zh.yml +136 -97
  41. data/db/migrate/201712170000_initial.rb +2 -3
  42. data/lib/translatomatic.rb +40 -25
  43. data/lib/translatomatic/cli.rb +5 -1
  44. data/lib/translatomatic/cli/base.rb +61 -58
  45. data/lib/translatomatic/cli/common_options.rb +14 -11
  46. data/lib/translatomatic/cli/config.rb +96 -91
  47. data/lib/translatomatic/cli/database.rb +85 -23
  48. data/lib/translatomatic/cli/main.rb +158 -104
  49. data/lib/translatomatic/cli/thor.rb +29 -0
  50. data/lib/translatomatic/cli/translate.rb +134 -157
  51. data/lib/translatomatic/config.rb +10 -301
  52. data/lib/translatomatic/config/display.rb +78 -0
  53. data/lib/translatomatic/config/files.rb +60 -0
  54. data/lib/translatomatic/config/location_settings.rb +133 -0
  55. data/lib/translatomatic/config/options.rb +68 -0
  56. data/lib/translatomatic/config/selector.rb +127 -0
  57. data/lib/translatomatic/config/settings.rb +148 -0
  58. data/lib/translatomatic/converter.rb +40 -28
  59. data/lib/translatomatic/database.rb +127 -110
  60. data/lib/translatomatic/define_options.rb +4 -5
  61. data/lib/translatomatic/escaped_unicode.rb +86 -76
  62. data/lib/translatomatic/extractor.rb +5 -2
  63. data/lib/translatomatic/extractor/base.rb +12 -12
  64. data/lib/translatomatic/extractor/ruby.rb +7 -6
  65. data/lib/translatomatic/file_translator.rb +101 -244
  66. data/lib/translatomatic/flattenation.rb +39 -0
  67. data/lib/translatomatic/http.rb +13 -0
  68. data/lib/translatomatic/http/client.rb +144 -0
  69. data/lib/translatomatic/http/exception.rb +43 -0
  70. data/lib/translatomatic/http/file_param.rb +27 -0
  71. data/lib/translatomatic/http/param.rb +37 -0
  72. data/lib/translatomatic/http/request.rb +91 -0
  73. data/lib/translatomatic/i18n.rb +43 -0
  74. data/lib/translatomatic/locale.rb +71 -59
  75. data/lib/translatomatic/logger.rb +43 -28
  76. data/lib/translatomatic/metadata.rb +58 -0
  77. data/lib/translatomatic/model.rb +4 -2
  78. data/lib/translatomatic/model/locale.rb +5 -5
  79. data/lib/translatomatic/model/text.rb +5 -5
  80. data/lib/translatomatic/option.rb +57 -34
  81. data/lib/translatomatic/path_utils.rb +126 -0
  82. data/lib/translatomatic/progress_updater.rb +13 -16
  83. data/lib/translatomatic/provider.rb +101 -0
  84. data/lib/translatomatic/provider/base.rb +136 -0
  85. data/lib/translatomatic/provider/frengly.rb +55 -0
  86. data/lib/translatomatic/provider/google.rb +78 -0
  87. data/lib/translatomatic/provider/google_web.rb +50 -0
  88. data/lib/translatomatic/provider/microsoft.rb +144 -0
  89. data/lib/translatomatic/provider/my_memory.rb +75 -0
  90. data/lib/translatomatic/provider/yandex.rb +61 -0
  91. data/lib/translatomatic/resource_file.rb +59 -53
  92. data/lib/translatomatic/resource_file/base.rb +171 -237
  93. data/lib/translatomatic/resource_file/csv.rb +176 -24
  94. data/lib/translatomatic/resource_file/html.rb +21 -42
  95. data/lib/translatomatic/resource_file/key_value_support.rb +117 -0
  96. data/lib/translatomatic/resource_file/markdown.rb +36 -38
  97. data/lib/translatomatic/resource_file/plist.rb +121 -126
  98. data/lib/translatomatic/resource_file/po.rb +104 -82
  99. data/lib/translatomatic/resource_file/properties.rb +48 -77
  100. data/lib/translatomatic/resource_file/properties.treetop +87 -0
  101. data/lib/translatomatic/resource_file/resw.rb +56 -41
  102. data/lib/translatomatic/resource_file/subtitle.rb +86 -54
  103. data/lib/translatomatic/resource_file/text.rb +18 -18
  104. data/lib/translatomatic/resource_file/xcode_strings.rb +32 -63
  105. data/lib/translatomatic/resource_file/xcode_strings.treetop +85 -0
  106. data/lib/translatomatic/resource_file/xml.rb +94 -81
  107. data/lib/translatomatic/resource_file/yaml.rb +54 -68
  108. data/lib/translatomatic/retry_executor.rb +37 -0
  109. data/lib/translatomatic/slurp.rb +32 -0
  110. data/lib/translatomatic/string_batcher.rb +50 -0
  111. data/lib/translatomatic/string_escaping.rb +61 -0
  112. data/lib/translatomatic/text.rb +263 -0
  113. data/lib/translatomatic/text_collection.rb +66 -0
  114. data/lib/translatomatic/tmx.rb +5 -3
  115. data/lib/translatomatic/tmx/document.rb +107 -82
  116. data/lib/translatomatic/tmx/translation_unit.rb +19 -18
  117. data/lib/translatomatic/translation.rb +8 -28
  118. data/lib/translatomatic/translation/collection.rb +199 -0
  119. data/lib/translatomatic/translation/fetcher.rb +123 -0
  120. data/lib/translatomatic/translation/munging.rb +112 -0
  121. data/lib/translatomatic/translation/result.rb +50 -0
  122. data/lib/translatomatic/translation/sharer.rb +32 -0
  123. data/lib/translatomatic/translation/stats.rb +44 -0
  124. data/lib/translatomatic/translator.rb +91 -88
  125. data/lib/translatomatic/type_cast.rb +63 -0
  126. data/lib/translatomatic/util.rb +37 -33
  127. data/lib/translatomatic/version.rb +2 -2
  128. data/translatomatic.gemspec +57 -46
  129. metadata +136 -59
  130. data/lib/translatomatic/http_request.rb +0 -162
  131. data/lib/translatomatic/string.rb +0 -188
  132. data/lib/translatomatic/translation_result.rb +0 -86
  133. data/lib/translatomatic/translation_stats.rb +0 -31
  134. data/lib/translatomatic/translator/base.rb +0 -128
  135. data/lib/translatomatic/translator/frengly.rb +0 -62
  136. data/lib/translatomatic/translator/google.rb +0 -37
  137. data/lib/translatomatic/translator/microsoft.rb +0 -41
  138. data/lib/translatomatic/translator/my_memory.rb +0 -68
  139. data/lib/translatomatic/translator/yandex.rb +0 -56
@@ -0,0 +1,37 @@
1
+ module Translatomatic
2
+ # Executes code with retry on exceptions
3
+ class RetryExecutor
4
+ include Util
5
+
6
+ # @private
7
+ DEFAULT_RETRIES = 3
8
+
9
+ def initialize(options = {})
10
+ @max_retries = options[:max_retries] || DEFAULT_RETRIES
11
+ @retriable = options[:retriable] || [StandardError]
12
+ @delay = options[:retry_delay]
13
+ end
14
+
15
+ # Attempt to run a block of code up to retries times.
16
+ # Reraises the exception if the block fails retries times or if
17
+ # a non-retriable exception was raised.
18
+ # @return [Object] the return value of the block
19
+ def run
20
+ fail_count = 0
21
+ begin
22
+ yield
23
+ rescue StandardError => e
24
+ fail_count += 1
25
+ if fail_count < @max_retries && retriable?(e)
26
+ sleep @delay if @delay
27
+ retry
28
+ end
29
+ raise e
30
+ end
31
+ end
32
+
33
+ def retriable?(exception)
34
+ @retriable.any? { |i| exception.kind_of?(i) }
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,32 @@
1
+ module Translatomatic
2
+ # Class for slurping files
3
+ class Slurp
4
+ class << self
5
+ # Slurp a file, convert to UTF-8
6
+ # @param path [String] Path to a file
7
+ # @return [String] file contents in UTF-8
8
+ def read(path)
9
+ # read data
10
+ data = File.read(path)
11
+ encoding = detect_encoding(data)
12
+ data.force_encoding(encoding) if encoding
13
+ data.encode!(Encoding::UTF_8)
14
+ data.gsub!(/\A\xEF\xBB\xBF/, '') # kill bom
15
+ data
16
+ end
17
+
18
+ private
19
+
20
+ # detect encoding using CharDet
21
+ # returns nil if unknown encoding
22
+ def detect_encoding(data)
23
+ # detect encoding
24
+ cd = CharDet.detect(data)
25
+ encoding = cd['encoding']
26
+ confidence = cd['confidence']
27
+
28
+ Encoding.find(encoding) if encoding && confidence >= 0.5
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,50 @@
1
+ module Translatomatic
2
+ # each_slice with a limit on number of strings and also an optional
3
+ # character limit.
4
+ class StringBatcher
5
+ include Util
6
+
7
+ # @param strings [Array<String>] A list of strings to return in batches
8
+ # @param max_count [Number] The maximum number of strings to return
9
+ # @param max_length [Number] The maximum total length of strings to return
10
+ def initialize(strings, max_count:, max_length:)
11
+ @strings = strings
12
+ @max_count = max_count
13
+ @max_length = max_length
14
+ @batch = []
15
+ @length = 0
16
+ end
17
+
18
+ # Yields lists of strings within the size constraints given to the
19
+ # constructor.
20
+ # @return [Array<String>] List of strings
21
+ def each_batch
22
+ @strings.each do |string|
23
+ process_string(string) { |batch| yield batch }
24
+ end
25
+ yield_batch { |batch| yield batch } # send remaining strings
26
+ end
27
+
28
+ private
29
+
30
+ def process_string(string)
31
+ if @max_length && @length + string.length >= @max_length
32
+ raise t('translator.string_too_long') if @batch.empty?
33
+ yield_batch { |batch| yield batch }
34
+ end
35
+
36
+ # add string to batch
37
+ @batch << string
38
+ @length += string.length
39
+
40
+ return if @max_count.nil? || @batch.length < @max_count
41
+ yield_batch { |batch| yield batch }
42
+ end
43
+
44
+ def yield_batch
45
+ yield @batch if @batch.present?
46
+ @batch = []
47
+ @length = 0
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,61 @@
1
+ module Translatomatic
2
+ # String escaping/unescaping code from syck/encoding.rb
3
+ module StringEscaping
4
+ ESCAPES = %w[\x00 \x01 \x02 \x03 \x04 \x05 \x06 \a
5
+ \x08 \t \n \v \f
6
+ \r \x0e \x0f
7
+ \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17
8
+ \x18 \x19 \x1a \e \x1c \x1d \x1e \x1f].freeze
9
+ UNESCAPES = {
10
+ 'a' => "\x07", 'b' => "\x08", 't' => "\x09",
11
+ 'n' => "\x0a", 'v' => "\x0b", 'f' => "\x0c",
12
+ 'r' => "\x0d", 'e' => "\x1b", '\\' => '\\'
13
+ }.freeze
14
+
15
+ private_constant :ESCAPES
16
+ private_constant :UNESCAPES
17
+
18
+ class << self
19
+ # Escape unprintable characters such as newlines.
20
+ # @param value [String] The string to escape
21
+ # @param include [String] Extra characters to escape
22
+ # @return [String] The string with special characters escaped.
23
+ def escape(value, include = '"')
24
+ return nil if value.nil?
25
+ new_value = value.dup
26
+ new_value.gsub!(/\\/, '\\\\\\')
27
+ if include.present?
28
+ new_value.gsub!(/([#{include}])/) { '\\' + Regexp.last_match(1) }
29
+ end
30
+ new_value.gsub!(/([\x00-\x1f])/) { ESCAPES[ $&.unpack('C')[0] ] }
31
+ new_value
32
+ end
33
+
34
+ # Unescape character escapes such as "\n" to their character equivalents.
35
+ # @param value [String] The string to unescape
36
+ # @return [String] The string with special characters unescaped.
37
+ def unescape(value)
38
+ return nil if value.nil?
39
+ regex = /\\(?:([nevfbart\\])|0?x([0-9a-fA-F]{2})|u([0-9a-fA-F]{4}))/
40
+ value.gsub(regex) do
41
+ if Regexp.last_match(3)
42
+ [Regexp.last_match(3).to_s.hex].pack('U*')
43
+ elsif Regexp.last_match(2)
44
+ [Regexp.last_match(2)].pack('H2')
45
+ else
46
+ UNESCAPES[Regexp.last_match(1)]
47
+ end
48
+ end
49
+ end
50
+
51
+ # Unescape as above, and also convert all occurrences of \$char to $char
52
+ # @param value [String] The string to unescape
53
+ # @return [String] The string with all characters unescaped.
54
+ def unescape_all(value)
55
+ return nil if value.nil?
56
+ value = unescape(value).gsub(/\\(.)/) { Regexp.last_match(1) }
57
+ value
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,263 @@
1
+ module Translatomatic
2
+ # A text string with an associated locale and other attributes
3
+ class Text
4
+ # @return [String] The text content
5
+ attr_reader :value
6
+
7
+ # @return [Translatomatic::Locale] The text locale
8
+ attr_reader :locale
9
+
10
+ # @return [Translatomatic::Text] If this text is a substring of
11
+ # another text, returns the original text. Otherwise, returns nil.
12
+ attr_reader :parent
13
+
14
+ # @return [Number] If this text is a substring of another text,
15
+ # returns the starting offset of this text in the original.
16
+ attr_reader :offset
17
+
18
+ # @return [Array<String>] Disambiguating context string(s)
19
+ attr_accessor :context
20
+
21
+ # @return [Regexp] Regexp that matches parts of the text to preserve
22
+ attr_accessor :preserve_regex
23
+
24
+ # Create a new text. Returns value if value is already a
25
+ # Translatomatic::Text object with the same locale.
26
+ def self.[](value, locale)
27
+ locale = Translatomatic::Locale.parse(locale)
28
+ if value.is_a?(Translatomatic::Text) && value.locale == locale
29
+ value
30
+ else
31
+ new(value, locale)
32
+ end
33
+ end
34
+
35
+ # Creates a new text
36
+ # @param value [String] A string
37
+ # @param locale [String] A locale
38
+ def initialize(value, locale, options = {})
39
+ @value = value.to_s || ''
40
+ @locale = Translatomatic::Locale.parse(locale)
41
+ @offset = options[:offset] || 0
42
+ @parent = options[:parent]
43
+ @context = options[:context]
44
+ @options = options
45
+ end
46
+
47
+ # @return [Text] A copy of this text
48
+ def dup
49
+ copy_self_with_value(value)
50
+ end
51
+
52
+ # Invokes value.match
53
+ # @param pattern [Regexp,String] The regex pattern to match
54
+ # @return [MatchData] Object describing the match, or nil if no match
55
+ def match(pattern)
56
+ @value.match(pattern)
57
+ end
58
+
59
+ # @return [boolean] true if this text is a substring of another text
60
+ def substring?
61
+ @parent ? true : false
62
+ end
63
+
64
+ # @return [String] The value of the text
65
+ def to_s
66
+ @value
67
+ end
68
+
69
+ # @return [String] value.to_str
70
+ def to_str
71
+ @value.to_str
72
+ end
73
+
74
+ # @return [Text] A copy of this text with all occurrences of pattern
75
+ # substituted for the replacement text.
76
+ def gsub(pattern, replacement = nil)
77
+ new_value = if block_given?
78
+ @value.gsub(pattern) { yield Regexp.last_match }
79
+ else
80
+ @value.gsub(pattern, replacement)
81
+ end
82
+ copy_self_with_value(new_value)
83
+ end
84
+
85
+ # @return [Symbol] The type of text, corresponding to TMX segtype.
86
+ # @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
87
+ def type
88
+ if sentences.length >= 2
89
+ :paragraph
90
+ else
91
+ script = script_data
92
+ @value.strip =~ /#{script.delimiter}\s*$/ ? :sentence : :phrase
93
+ end
94
+ end
95
+
96
+ # Find all sentences in the text
97
+ # @return [Array<Translatomatic::Text] List of sentences
98
+ def sentences
99
+ substrings(sentence_regex)
100
+ end
101
+
102
+ # Find all substrings matching the given regex
103
+ # @return [Array<Translatomatic::Text] List of substrings
104
+ def substrings(regex)
105
+ matches = matches(@value, regex)
106
+ strings = matches.collect { |i| match_to_substring(i) }.compact
107
+ # return [self] if there's only one substring and it's equal to self
108
+ strings.length == 1 && strings[0].eql?(self) ? [self] : strings
109
+ end
110
+
111
+ # @return [boolean] true if other is a {Translatomatic::Text} with
112
+ # the same value and locale.
113
+ def eql?(other)
114
+ (other.is_a?(Translatomatic::Text) || other.is_a?(::String)) &&
115
+ other.hash == hash
116
+ end
117
+
118
+ # (see #eql?)
119
+ def ==(other)
120
+ eql?(other)
121
+ end
122
+
123
+ # @!visibility private
124
+ def hash
125
+ value.hash
126
+ # [value, locale].hash
127
+ end
128
+
129
+ # Escape unprintable characters such as newlines.
130
+ # @return [Translatomatic::Text] The text with
131
+ # special characters escaped.
132
+ def escape(skip = '')
133
+ self.class.new(StringEscaping.escape(@value, skip), locale)
134
+ end
135
+
136
+ # Unescape character escapes such as "\n" to their character equivalents.
137
+ # @return [Translatomatic::Text] The text with
138
+ # escaped characters replaced with actual characters.
139
+ def unescape
140
+ self.class.new(StringEscaping.unescape(@value), locale)
141
+ end
142
+
143
+ private
144
+
145
+ # @!visibility private
146
+ class Script
147
+ attr_reader :language
148
+ attr_reader :delimiter # sentence delimiter
149
+ attr_reader :trailing_space # delimiter requires trailing space or eol
150
+ attr_reader :left_to_right # script direction
151
+
152
+ def initialize(language:, delimiter:, trailing_space:, direction:)
153
+ @language = language
154
+ @delimiter = delimiter
155
+ @trailing_space = trailing_space
156
+ @left_to_right = direction == :ltr
157
+ raise 'invalid direction' unless %i[ltr rtl].include?(direction)
158
+ end
159
+ end
160
+
161
+ SCRIPT_DATA = [
162
+ # [language, delimiter, trailing space, direction]
163
+ # japanese, no space after
164
+ ['ja', "\u3002", false, :ltr],
165
+ # chinese, no space after
166
+ ['zh', "\u3002", false, :ltr], # can be written any direction
167
+ # armenian, space after
168
+ ['hy', ':', true, :ltr],
169
+ # hindi, space after
170
+ ['hi', '।', true, :ltr],
171
+ # urdu, space after, right to left
172
+ ['ur', "\u06d4", true, :rtl],
173
+ # thai, spaces used to separate sentences
174
+ ['th', '\\s', false, :ltr],
175
+ # arabic, right to left
176
+ ['ar', '\\.', true, :rtl],
177
+ # hebrew, right to left
178
+ ['he', '\\.', true, :rtl],
179
+ # all other languages
180
+ ['default', '\\.', true, :ltr]
181
+ ].freeze
182
+
183
+ class << self
184
+ attr_reader :script_data
185
+ end
186
+
187
+ begin
188
+ script_data = {}
189
+ SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
190
+ script = Script.new(language: lang, delimiter: delimiter,
191
+ trailing_space: trailing, direction: ltr)
192
+ script_data[lang] = script
193
+ end
194
+ @script_data = script_data
195
+ end
196
+
197
+ def copy_self_with_value(new_value)
198
+ copy = self.class.new(new_value, @locale, @options)
199
+ copy.preserve_regex = preserve_regex
200
+ copy.context = context
201
+ copy
202
+ end
203
+
204
+ def match_to_substring(match)
205
+ substring = match.to_s
206
+ return nil if substring.empty?
207
+
208
+ # find leading and trailing whitespace
209
+ parts = substring.match(/\A(\s*)(.*?)(\s*)\z/m).to_a
210
+ value = parts[2]
211
+ offset = match.offset(0)[0]
212
+ offset += parts[1].length # leading whitespace
213
+ string = self.class.new(value, locale, offset: offset, parent: self)
214
+ string.preserve_regex = preserve_regex
215
+ string.context = context
216
+ string
217
+ end
218
+
219
+ def matches(s, re)
220
+ start_at = 0
221
+ matches = []
222
+ while (m = s.match(re, start_at))
223
+ break if m.to_s.empty?
224
+ matches.push(m)
225
+ start_at = m.end(0)
226
+ end
227
+ matches
228
+ end
229
+
230
+ def sentence_regex
231
+ script = script_data
232
+ if script.trailing_space
233
+ /.*?(?:#{script.delimiter}\s+|\z|\n)/m
234
+ else
235
+ # no trailing space after delimiter
236
+ /.*?(?:#{script.delimiter}|\z|\n)/m
237
+ end
238
+ end
239
+
240
+ def script_data
241
+ data = self.class.script_data
242
+ data[locale.language] || data['default']
243
+ end
244
+
245
+ def respond_to_missing?(name, include_private = false)
246
+ @value.respond_to?(name) || super
247
+ end
248
+
249
+ def method_missing(name, *args)
250
+ if @value.respond_to?(name)
251
+ result = @value.send(name, *args)
252
+ if result.is_a?(String)
253
+ # convert to text object
254
+ copy_self_with_value(result)
255
+ else
256
+ result
257
+ end
258
+ else
259
+ super
260
+ end
261
+ end
262
+ end
263
+ end