translatomatic 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/.gitattributes +1 -0
  3. data/.gitignore +15 -12
  4. data/.rspec +3 -3
  5. data/.travis.yml +32 -50
  6. data/CODE_OF_CONDUCT.md +74 -74
  7. data/Gemfile +29 -5
  8. data/Guardfile +48 -0
  9. data/LICENSE.txt +21 -21
  10. data/README.de.md +92 -0
  11. data/README.es.md +92 -0
  12. data/README.fr.md +92 -0
  13. data/README.it.md +92 -0
  14. data/README.ja.md +92 -0
  15. data/README.md +96 -74
  16. data/Rakefile +6 -6
  17. data/bin/setup +8 -8
  18. data/bin/translatomatic +6 -6
  19. data/bin/travis +26 -0
  20. data/db/database.yml +9 -9
  21. data/db/migrate/201712170000_initial.rb +24 -23
  22. data/lib/translatomatic/cli.rb +204 -80
  23. data/lib/translatomatic/config.rb +12 -26
  24. data/lib/translatomatic/converter.rb +206 -142
  25. data/lib/translatomatic/converter_stats.rb +27 -27
  26. data/lib/translatomatic/database.rb +139 -99
  27. data/lib/translatomatic/escaped_unicode.rb +90 -90
  28. data/lib/translatomatic/extractor/base.rb +14 -0
  29. data/lib/translatomatic/extractor/ruby.rb +5 -0
  30. data/lib/translatomatic/extractor.rb +4 -0
  31. data/lib/translatomatic/http_request.rb +133 -0
  32. data/lib/translatomatic/locale.rb +52 -0
  33. data/lib/translatomatic/logger.rb +28 -0
  34. data/lib/translatomatic/model/locale.rb +21 -22
  35. data/lib/translatomatic/model/text.rb +17 -13
  36. data/lib/translatomatic/model.rb +4 -4
  37. data/lib/translatomatic/option.rb +24 -24
  38. data/lib/translatomatic/progress_updater.rb +15 -0
  39. data/lib/translatomatic/resource_file/base.rb +169 -137
  40. data/lib/translatomatic/resource_file/html.rb +46 -28
  41. data/lib/translatomatic/resource_file/markdown.rb +54 -0
  42. data/lib/translatomatic/resource_file/plist.rb +30 -29
  43. data/lib/translatomatic/resource_file/properties.rb +72 -60
  44. data/lib/translatomatic/resource_file/resw.rb +30 -0
  45. data/lib/translatomatic/resource_file/text.rb +29 -28
  46. data/lib/translatomatic/resource_file/xcode_strings.rb +71 -65
  47. data/lib/translatomatic/resource_file/xml.rb +79 -59
  48. data/lib/translatomatic/resource_file/yaml.rb +82 -80
  49. data/lib/translatomatic/resource_file.rb +76 -74
  50. data/lib/translatomatic/string.rb +160 -0
  51. data/lib/translatomatic/tmx/document.rb +100 -0
  52. data/lib/translatomatic/tmx/translation_unit.rb +19 -0
  53. data/lib/translatomatic/tmx.rb +4 -0
  54. data/lib/translatomatic/translation_result.rb +75 -57
  55. data/lib/translatomatic/translator/base.rb +83 -47
  56. data/lib/translatomatic/translator/frengly.rb +57 -64
  57. data/lib/translatomatic/translator/google.rb +31 -30
  58. data/lib/translatomatic/translator/microsoft.rb +33 -32
  59. data/lib/translatomatic/translator/my_memory.rb +64 -55
  60. data/lib/translatomatic/translator/yandex.rb +39 -37
  61. data/lib/translatomatic/translator.rb +63 -63
  62. data/lib/translatomatic/util.rb +15 -24
  63. data/lib/translatomatic/version.rb +4 -3
  64. data/lib/translatomatic.rb +32 -27
  65. data/translatomatic.gemspec +43 -45
  66. metadata +52 -18
  67. data/Gemfile.lock +0 -137
@@ -1,74 +1,76 @@
1
-
2
- module Translatomatic
3
- module ResourceFile
4
- class << self
5
- include Translatomatic::Util
6
- end
7
-
8
- # Load a resource file. If locale is not specified, the locale of the
9
- # file will be determined from the filename, or else the current default
10
- # locale will be used.
11
- # @param [String] path Path to the resource file
12
- # @param [String] locale Locale of the resource file
13
- # @return [Translatomatic::ResourceFile::Base] The resource file, or nil
14
- # if the file type is unsupported.
15
- def self.load(path, locale = nil)
16
- path = path.kind_of?(Pathname) ? path : Pathname.new(path)
17
- modules.each do |mod|
18
- # match on entire filename to support extensions containing locales
19
- if extension_match(mod, path)
20
- log.debug("attempting to load #{path.to_s} using #{mod.name.demodulize}")
21
- file = mod.new(path, locale)
22
- return file if file.valid?
23
- end
24
- end
25
- nil
26
- end
27
-
28
- # Find all resource files under the given directory. Follows symlinks.
29
- # @param [String, Pathname] path The path to search from
30
- # @return [Array<Translatomatic::ResourceFile>] Resource files found
31
- def self.find(path, options = {})
32
- files = []
33
- include_dot_directories = options[:include_dot_directories]
34
- path = Pathname.new(path) unless path.kind_of?(Pathname)
35
- path.find do |file|
36
- if !include_dot_directories && file.basename.to_s[0] == ?.
37
- Find.prune
38
- else
39
- resource = load(file)
40
- files << resource if resource
41
- end
42
- end
43
- files
44
- end
45
-
46
- # Find all configured resource file classes
47
- # @return [Array<Class>] Available resource file classes
48
- def self.modules
49
- self.constants.map { |c| self.const_get(c) }.select do |klass|
50
- klass.is_a?(Class) && klass != Base
51
- end
52
- end
53
-
54
- private
55
-
56
- def self.extension_match(mod, path)
57
- filename = path.basename.to_s.downcase
58
- mod.extensions.each do |extension|
59
- # don't match end of line in case file has locale extension
60
- return true if filename.match(/\.#{extension}/)
61
- end
62
- false
63
- end
64
- end
65
- end
66
-
67
- require 'translatomatic/resource_file/base'
68
- require 'translatomatic/resource_file/yaml'
69
- require 'translatomatic/resource_file/properties'
70
- require 'translatomatic/resource_file/text'
71
- require 'translatomatic/resource_file/xml'
72
- require 'translatomatic/resource_file/html'
73
- require 'translatomatic/resource_file/plist'
74
- require 'translatomatic/resource_file/xcode_strings'
1
+
2
+ module Translatomatic
3
+ module ResourceFile
4
+ class << self
5
+ include Translatomatic::Util
6
+ end
7
+
8
+ # Load a resource file. If locale is not specified, the locale of the
9
+ # file will be determined from the filename, or else the current default
10
+ # locale will be used.
11
+ # @param [String] path Path to the resource file
12
+ # @param [String] locale Locale of the resource file
13
+ # @return [Translatomatic::ResourceFile::Base] The resource file, or nil
14
+ # if the file type is unsupported.
15
+ def self.load(path, locale = nil)
16
+ path = path.kind_of?(Pathname) ? path : Pathname.new(path)
17
+ modules.each do |mod|
18
+ # match on entire filename to support extensions containing locales
19
+ if extension_match(mod, path)
20
+ log.debug("attempting to load #{path.to_s} using #{mod.name.demodulize}")
21
+ file = mod.new(path, locale)
22
+ return file if file.valid?
23
+ end
24
+ end
25
+ nil
26
+ end
27
+
28
+ # Find all resource files under the given directory. Follows symlinks.
29
+ # @param [String, Pathname] path The path to search from
30
+ # @return [Array<Translatomatic::ResourceFile>] Resource files found
31
+ def self.find(path, options = {})
32
+ files = []
33
+ include_dot_directories = options[:include_dot_directories]
34
+ path = Pathname.new(path) unless path.kind_of?(Pathname)
35
+ path.find do |file|
36
+ if !include_dot_directories && file.basename.to_s[0] == ?.
37
+ Find.prune
38
+ else
39
+ resource = load(file)
40
+ files << resource if resource
41
+ end
42
+ end
43
+ files
44
+ end
45
+
46
+ # Find all configured resource file classes
47
+ # @return [Array<Class>] Available resource file classes
48
+ def self.modules
49
+ self.constants.map { |c| self.const_get(c) }.select do |klass|
50
+ klass.is_a?(Class) && klass != Base
51
+ end
52
+ end
53
+
54
+ private
55
+
56
+ def self.extension_match(mod, path)
57
+ filename = path.basename.to_s.downcase
58
+ mod.extensions.each do |extension|
59
+ # don't match end of line in case file has locale extension
60
+ return true if filename.match(/\.#{extension}/)
61
+ end
62
+ false
63
+ end
64
+ end
65
+ end
66
+
67
+ require 'translatomatic/resource_file/base'
68
+ require 'translatomatic/resource_file/yaml'
69
+ require 'translatomatic/resource_file/properties'
70
+ require 'translatomatic/resource_file/text'
71
+ require 'translatomatic/resource_file/xml'
72
+ require 'translatomatic/resource_file/html'
73
+ require 'translatomatic/resource_file/markdown'
74
+ require 'translatomatic/resource_file/plist'
75
+ require 'translatomatic/resource_file/resw'
76
+ require 'translatomatic/resource_file/xcode_strings'
@@ -0,0 +1,160 @@
1
+ module Translatomatic
2
+ class String
3
+
4
+ # @return [String] The string
5
+ attr_reader :value
6
+
7
+ # @return [Translatomatic::Locale] The string's locale
8
+ attr_reader :locale
9
+
10
+ # @return [Translatomatic::String] If this string is a substring of
11
+ # another string, returns the original string. Otherwise, returns nil.
12
+ attr_reader :parent
13
+
14
+ # @return [Number] If this string is a substring of another string,
15
+ # returns the starting offset of this string in the original.
16
+ attr_reader :offset
17
+
18
+ def initialize(value, locale, options = {})
19
+ @value = value || ''
20
+ @locale = Translatomatic::Locale.parse(locale)
21
+ @offset = options[:offset] || 0
22
+ @parent = options[:parent]
23
+ end
24
+
25
+ # @return [String] The value of the string
26
+ def to_s
27
+ @value
28
+ end
29
+
30
+ def length
31
+ @value.length
32
+ end
33
+
34
+ def empty?
35
+ @value.empty?
36
+ end
37
+
38
+ def match(regex)
39
+ @value.match(regex)
40
+ end
41
+
42
+ # @return [boolean] true if this string is a substring of another string
43
+ def substring?
44
+ @parent ? true : false
45
+ end
46
+
47
+ # @return [Symbol] The type of string, corresponding to TMX segtype.
48
+ # @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
49
+ def type
50
+ if sentences.length >= 2
51
+ :paragraph
52
+ else
53
+ script = script_data
54
+ @value.strip.match(/#{script.delimiter}\s*$/) ? :sentence : :phrase
55
+ end
56
+ end
57
+
58
+ # Find all sentences in the string
59
+ # @return [Array<Translatomatic::String] List of sentences
60
+ def sentences
61
+ sentences = @value.scan(sentence_regex)
62
+ strings = []
63
+ offset = 0
64
+ sentences.each do |sentence|
65
+ # find leading and trailing whitespace
66
+ next if sentence.length == 0
67
+
68
+ parts = sentence.match(/^(\s*)(.*?)(\s*)$/).to_a
69
+ value = parts[2]
70
+ offset += parts[1].length # leading whitespace
71
+ strings << self.class.new(value, locale, offset: offset, parent: self)
72
+ offset += value.length + parts[3].length
73
+ end
74
+
75
+ # return [self] if there's only one sentence and it's equal to self
76
+ strings.length == 1 && strings[0].eql?(self) ? [self] : strings
77
+ end
78
+
79
+ def eql?(other)
80
+ other.kind_of?(Translatomatic::String) && other.hash == hash
81
+ end
82
+
83
+ def ==(other)
84
+ eql?(other)
85
+ end
86
+
87
+ def hash
88
+ [value, locale].hash
89
+ end
90
+
91
+ private
92
+
93
+ class Script
94
+ attr_reader :language
95
+ attr_reader :delimiter # sentence delimiter
96
+ attr_reader :trailing_space # delimiter requires trailing space or eol
97
+ attr_reader :left_to_right # script direction
98
+
99
+ def initialize(language:, delimiter:, trailing_space:, direction:)
100
+ @language = language
101
+ @delimiter = delimiter
102
+ @trailing_space = trailing_space
103
+ @left_to_right = direction == :ltr
104
+ raise "invalid direction" unless [:ltr, :rtl].include?(direction)
105
+ end
106
+ end
107
+
108
+ SCRIPT_DATA = [
109
+ # [language, delimiter, trailing space, direction]
110
+ # japanese, no space after
111
+ ["ja", "\u3002", false, :ltr],
112
+ # chinese, no space after
113
+ ["zh", "\u3002", false, :ltr], # can be written any direction
114
+ # armenian, space after
115
+ ["hy", ":", true, :ltr],
116
+ # hindi, space after
117
+ ["hi", "।", true, :ltr],
118
+ # urdu, space after, right to left
119
+ ["ur", "\u06d4", true, :rtl],
120
+ # thai, spaces used to separate sentences
121
+ ["th", "\\s", false, :ltr],
122
+ # arabic, right to left
123
+ ["ar", "\\.", true, :rtl],
124
+ # hebrew, right to left
125
+ ["he", "\\.", true, :rtl],
126
+ # all other languages
127
+ ["default", "\\.", true, :ltr],
128
+ ]
129
+
130
+ class << self
131
+ attr_reader :script_data
132
+ end
133
+
134
+ begin
135
+ script_data = {}
136
+ SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
137
+ script = Script.new(language: lang, delimiter: delimiter,
138
+ trailing_space: trailing, direction: ltr)
139
+ script_data[lang] = script
140
+ end
141
+ @script_data = script_data
142
+ end
143
+
144
+ def sentence_regex
145
+ script = script_data
146
+ if script.trailing_space
147
+ regex = /.*?(?:#{script.delimiter}\s+|$)/
148
+ else
149
+ # no trailing space after delimiter
150
+ regex = /.*?(?:#{script.delimiter}|$)/
151
+ end
152
+ end
153
+
154
+ def script_data
155
+ data = self.class.script_data
156
+ data[locale.language] || data["default"]
157
+ end
158
+
159
+ end
160
+ end
@@ -0,0 +1,100 @@
1
+ module Translatomatic::TMX
2
+ # Translation Memory Exchange document
3
+ class Document
4
+
5
+ # Create a new instance
6
+ # @param [Array<TranslationUnit>] A list of translation units
7
+ # @param [Locale] Source locale
8
+ # @return A new TMX object
9
+ def initialize(units, source_locale, origin)
10
+ units = [units] unless units.kind_of?(Array)
11
+ @units = units
12
+ @source_locale = source_locale
13
+ @origin = origin
14
+ end
15
+
16
+ # @return [String] An XML string
17
+ def to_xml(options = {})
18
+ builder = Nokogiri::XML::Builder.new do |xml|
19
+ dtd = options[:dtd] || TMX_DTD
20
+ xml.doc.create_internal_subset('tmx', nil, dtd)
21
+ xml.tmx(version: "1.4") do
22
+ xml.header(creationtool: "Translatomatic",
23
+ creationtoolversion: Translatomatic::VERSION,
24
+ datatype: "PlainText",
25
+ segtype: "phrase", # default segtype
26
+ adminlang: @source_locale.to_s,
27
+ srclang: @source_locale.to_s,
28
+ "o-tmx": @origin
29
+ )
30
+ xml.body { tmx_body(xml) }
31
+ end
32
+ end
33
+ builder.to_xml
34
+ end
35
+
36
+ # Create a TMX document from the given converter
37
+ # @param [Array<Translatomatic::Model::Text>] List of texts
38
+ # @return [Translatomatic::TMX::Document] TMX document
39
+ def self.from_texts(texts)
40
+ # group texts by from_text_id to create units
41
+ # source_locale: use from_text.locale
42
+ # origin: use text.translator
43
+ origins = texts.collect { |i| i.translator }.compact.uniq
44
+ raise "Multiple origins in texts" if origins.length > 1
45
+ sources = texts.select { |i| i.from_text.nil? }
46
+ source_locales = sources.collect { |i| i.locale }.uniq
47
+ raise "Multiple source locales in texts" if source_locales.length > 1
48
+ units = units_from_texts(texts)
49
+
50
+ return new(units, source_locales[0], origins[0])
51
+ end
52
+
53
+ def self.valid?(xml)
54
+ options = Nokogiri::XML::ParseOptions::DTDVALID
55
+ doc = Nokogiri::XML::Document.parse(xml, nil, nil, options)
56
+ doc.internal_subset.validate(doc)
57
+ end
58
+
59
+ private
60
+
61
+ class << self
62
+ include Translatomatic::Util
63
+ end
64
+
65
+ TMX_DTD = "http://www.ttt.org/oscarstandards/tmx/tmx14.dtd"
66
+
67
+ def tmx_body(xml)
68
+ @units.each do |unit|
69
+ xml.tu("segtype": unit.strings[0].type) do
70
+ unit.strings.each do |string|
71
+ xml.tuv("xml:lang": string.locale.to_s) do
72
+ xml.seg string.value
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+
79
+ # @return [Array<Translatomatic::TMX::TranslationUnit] translation unit list
80
+ def self.units_from_texts(texts)
81
+ # group texts by from_text_id
82
+ texts_by_from_id = {}
83
+ texts.each do |text|
84
+ id = text.from_text_id || text.id
85
+ list = (texts_by_from_id[id] ||= [])
86
+ list << text
87
+ end
88
+
89
+ # create list of Translation Units
90
+ texts_by_from_id.values.collect do |list|
91
+ tmx_unit(list.uniq.collect { |i| string(i.value, i.locale) })
92
+ end
93
+ end
94
+
95
+ def self.tmx_unit(strings)
96
+ Translatomatic::TMX::TranslationUnit.new(strings)
97
+ end
98
+
99
+ end # class
100
+ end # module
@@ -0,0 +1,19 @@
1
+ module Translatomatic::TMX
2
+ class TranslationUnit
3
+
4
+ # @return [Array<Translatomatic::String>] Strings in this translation unit
5
+ attr_reader :strings
6
+
7
+ # @param [Array<Translatomatic::String>] list of strings
8
+ def initialize(strings)
9
+ @strings = strings || []
10
+ end
11
+
12
+ # Test translation unit validity.
13
+ # A translation unit must contain at least two strings.
14
+ # @return [boolean] true if this translation unit is valid
15
+ def valid?
16
+ @strings.length >= 2
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,4 @@
1
+ module Translatomatic::TMX; end
2
+
3
+ require 'translatomatic/tmx/translation_unit'
4
+ require 'translatomatic/tmx/document'
@@ -1,68 +1,86 @@
1
- require 'set'
2
-
3
- module Translatomatic
4
- class TranslationResult
5
-
6
- # Translation results
7
- # @return [Hash<String,String>] Translation results
8
- attr_reader :properties
9
-
10
- # @return [Locale] The locale of the original strings
11
- attr_reader :from_locale
12
-
13
- # @return [Locale] The target locale
14
- attr_reader :to_locale
15
-
16
- # @return [Set<String>] Untranslated strings
17
- attr_reader :untranslated
18
-
19
- # Create a translation result
20
- # @param [Hash<String,String>] properties Untranslated properties
21
- # @param [Locale] from_locale The locale of the untranslated strings
22
- # @param [Locale] to_locale The target locale
23
- def initialize(properties, from_locale, to_locale)
24
- @properties = properties.dup
25
- @value_to_keys = {}
26
- @untranslated = Set.new
27
- properties.each do |key, value|
28
- @untranslated << value
29
- keylist = (@value_to_keys[value] ||= [])
30
- keylist << key
31
- end
1
+ require 'set'
2
+
3
+ module Translatomatic
4
+ class TranslationResult
5
+
6
+ # Translation results
7
+ # @return [Hash<String,String>] Translation results
8
+ attr_reader :properties
9
+
10
+ # @return [Locale] The locale of the original strings
11
+ attr_reader :from_locale
12
+
13
+ # @return [Locale] The target locale
14
+ attr_reader :to_locale
15
+
16
+ # @return [Set<String>] Untranslated strings
17
+ attr_reader :untranslated
18
+
19
+ # Create a translation result
20
+ # @param [Hash<String,String>] properties Untranslated properties
21
+ # @param [Locale] from_locale The locale of the untranslated strings
22
+ # @param [Locale] to_locale The target locale
23
+ def initialize(properties, from_locale, to_locale)
24
+ @value_to_keys = {}
25
+ @untranslated = Set.new
32
26
  @from_locale = from_locale
33
27
  @to_locale = to_locale
34
- end
35
28
 
36
- # Update result with a list of translated strings.
37
- # @param [Array<String>] original Original strings
38
- # @param [Array<String>] translated Translated strings
39
- # @return [void]
40
- def update_strings(original, translated)
29
+ # duplicate strings
30
+ @properties = properties.transform_values { |i| i.dup }
31
+
32
+ properties.each do |key, value|
33
+ # split property value into sentences
34
+ string = string(value, from_locale)
35
+ string.sentences.each do |sentence|
36
+ @untranslated << sentence
37
+ keylist = (@value_to_keys[sentence.to_s] ||= [])
38
+ keylist << key
39
+ end
40
+ end
41
+ end
42
+
43
+ # Update result with a list of translated strings.
44
+ # @param [Array<String>] original Original strings
45
+ # @param [Array<String>] translated Translated strings
46
+ # @return [void]
47
+ def update_strings(original, translated)
41
48
  raise "strings length mismatch" unless original.length == translated.length
42
- original.zip(translated).each do |text1, text2|
43
- update(text1, text2)
49
+
50
+ # create list of [from, to] text conversions
51
+ conversions = []
52
+ original.zip(translated).each do |text1, text2|
53
+ conversions << [text1, text2]
44
54
  end
45
- end
46
55
 
47
- # Update result with texts from the database.
48
- # @param [Array<Translatomatic::Model::Text>] list Texts from database
49
- # @return [void]
50
- def update_db_strings(list)
51
- list.each do |t|
52
- original = t.from_text.value
53
- translated = t.value
54
- update(original, translated)
56
+ # sort conversion list by largest offset first so that we replace
57
+ # from the end of the string to the front, so substring offsets
58
+ # are correct in the target string.
59
+ conversions.sort_by! do |t1, t2|
60
+ t1.respond_to?(:offset) ? -t1.offset : 0
55
61
  end
56
- end
57
62
 
63
+ conversions.each do |text1, text2|
64
+ update(text1, text2)
65
+ end
66
+ end
67
+
58
68
  private
59
69
 
60
- def update(original, translated)
61
- keys = @value_to_keys[original]
70
+ include Translatomatic::Util
71
+
72
+ def update(original, translated)
73
+ keys = @value_to_keys[original.to_s]
62
74
  raise "no key mapping for text '#{original}'" unless keys
63
- keys.each { |key| @properties[key] = translated }
64
-
65
- @untranslated.delete(original)
66
- end
67
- end
68
- end
75
+ keys.each do |key|
76
+ if original.kind_of?(Translatomatic::String) && original.substring?
77
+ @properties[key][original.offset, original.length] = translated
78
+ else
79
+ @properties[key] = translated
80
+ end
81
+ end
82
+
83
+ @untranslated.delete(original)
84
+ end
85
+ end
86
+ end