translatomatic 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +5 -5
  2. data/.gitattributes +1 -0
  3. data/.gitignore +15 -12
  4. data/.rspec +3 -3
  5. data/.travis.yml +32 -50
  6. data/CODE_OF_CONDUCT.md +74 -74
  7. data/Gemfile +29 -5
  8. data/Guardfile +48 -0
  9. data/LICENSE.txt +21 -21
  10. data/README.de.md +92 -0
  11. data/README.es.md +92 -0
  12. data/README.fr.md +92 -0
  13. data/README.it.md +92 -0
  14. data/README.ja.md +92 -0
  15. data/README.md +96 -74
  16. data/Rakefile +6 -6
  17. data/bin/setup +8 -8
  18. data/bin/translatomatic +6 -6
  19. data/bin/travis +26 -0
  20. data/db/database.yml +9 -9
  21. data/db/migrate/201712170000_initial.rb +24 -23
  22. data/lib/translatomatic/cli.rb +204 -80
  23. data/lib/translatomatic/config.rb +12 -26
  24. data/lib/translatomatic/converter.rb +206 -142
  25. data/lib/translatomatic/converter_stats.rb +27 -27
  26. data/lib/translatomatic/database.rb +139 -99
  27. data/lib/translatomatic/escaped_unicode.rb +90 -90
  28. data/lib/translatomatic/extractor/base.rb +14 -0
  29. data/lib/translatomatic/extractor/ruby.rb +5 -0
  30. data/lib/translatomatic/extractor.rb +4 -0
  31. data/lib/translatomatic/http_request.rb +133 -0
  32. data/lib/translatomatic/locale.rb +52 -0
  33. data/lib/translatomatic/logger.rb +28 -0
  34. data/lib/translatomatic/model/locale.rb +21 -22
  35. data/lib/translatomatic/model/text.rb +17 -13
  36. data/lib/translatomatic/model.rb +4 -4
  37. data/lib/translatomatic/option.rb +24 -24
  38. data/lib/translatomatic/progress_updater.rb +15 -0
  39. data/lib/translatomatic/resource_file/base.rb +169 -137
  40. data/lib/translatomatic/resource_file/html.rb +46 -28
  41. data/lib/translatomatic/resource_file/markdown.rb +54 -0
  42. data/lib/translatomatic/resource_file/plist.rb +30 -29
  43. data/lib/translatomatic/resource_file/properties.rb +72 -60
  44. data/lib/translatomatic/resource_file/resw.rb +30 -0
  45. data/lib/translatomatic/resource_file/text.rb +29 -28
  46. data/lib/translatomatic/resource_file/xcode_strings.rb +71 -65
  47. data/lib/translatomatic/resource_file/xml.rb +79 -59
  48. data/lib/translatomatic/resource_file/yaml.rb +82 -80
  49. data/lib/translatomatic/resource_file.rb +76 -74
  50. data/lib/translatomatic/string.rb +160 -0
  51. data/lib/translatomatic/tmx/document.rb +100 -0
  52. data/lib/translatomatic/tmx/translation_unit.rb +19 -0
  53. data/lib/translatomatic/tmx.rb +4 -0
  54. data/lib/translatomatic/translation_result.rb +75 -57
  55. data/lib/translatomatic/translator/base.rb +83 -47
  56. data/lib/translatomatic/translator/frengly.rb +57 -64
  57. data/lib/translatomatic/translator/google.rb +31 -30
  58. data/lib/translatomatic/translator/microsoft.rb +33 -32
  59. data/lib/translatomatic/translator/my_memory.rb +64 -55
  60. data/lib/translatomatic/translator/yandex.rb +39 -37
  61. data/lib/translatomatic/translator.rb +63 -63
  62. data/lib/translatomatic/util.rb +15 -24
  63. data/lib/translatomatic/version.rb +4 -3
  64. data/lib/translatomatic.rb +32 -27
  65. data/translatomatic.gemspec +43 -45
  66. metadata +52 -18
  67. data/Gemfile.lock +0 -137
@@ -1,74 +1,76 @@
1
-
2
- module Translatomatic
3
- module ResourceFile
4
- class << self
5
- include Translatomatic::Util
6
- end
7
-
8
- # Load a resource file. If locale is not specified, the locale of the
9
- # file will be determined from the filename, or else the current default
10
- # locale will be used.
11
- # @param [String] path Path to the resource file
12
- # @param [String] locale Locale of the resource file
13
- # @return [Translatomatic::ResourceFile::Base] The resource file, or nil
14
- # if the file type is unsupported.
15
- def self.load(path, locale = nil)
16
- path = path.kind_of?(Pathname) ? path : Pathname.new(path)
17
- modules.each do |mod|
18
- # match on entire filename to support extensions containing locales
19
- if extension_match(mod, path)
20
- log.debug("attempting to load #{path.to_s} using #{mod.name.demodulize}")
21
- file = mod.new(path, locale)
22
- return file if file.valid?
23
- end
24
- end
25
- nil
26
- end
27
-
28
- # Find all resource files under the given directory. Follows symlinks.
29
- # @param [String, Pathname] path The path to search from
30
- # @return [Array<Translatomatic::ResourceFile>] Resource files found
31
- def self.find(path, options = {})
32
- files = []
33
- include_dot_directories = options[:include_dot_directories]
34
- path = Pathname.new(path) unless path.kind_of?(Pathname)
35
- path.find do |file|
36
- if !include_dot_directories && file.basename.to_s[0] == ?.
37
- Find.prune
38
- else
39
- resource = load(file)
40
- files << resource if resource
41
- end
42
- end
43
- files
44
- end
45
-
46
- # Find all configured resource file classes
47
- # @return [Array<Class>] Available resource file classes
48
- def self.modules
49
- self.constants.map { |c| self.const_get(c) }.select do |klass|
50
- klass.is_a?(Class) && klass != Base
51
- end
52
- end
53
-
54
- private
55
-
56
- def self.extension_match(mod, path)
57
- filename = path.basename.to_s.downcase
58
- mod.extensions.each do |extension|
59
- # don't match end of line in case file has locale extension
60
- return true if filename.match(/\.#{extension}/)
61
- end
62
- false
63
- end
64
- end
65
- end
66
-
67
- require 'translatomatic/resource_file/base'
68
- require 'translatomatic/resource_file/yaml'
69
- require 'translatomatic/resource_file/properties'
70
- require 'translatomatic/resource_file/text'
71
- require 'translatomatic/resource_file/xml'
72
- require 'translatomatic/resource_file/html'
73
- require 'translatomatic/resource_file/plist'
74
- require 'translatomatic/resource_file/xcode_strings'
1
+
2
+ module Translatomatic
3
+ module ResourceFile
4
+ class << self
5
+ include Translatomatic::Util
6
+ end
7
+
8
+ # Load a resource file. If locale is not specified, the locale of the
9
+ # file will be determined from the filename, or else the current default
10
+ # locale will be used.
11
+ # @param [String] path Path to the resource file
12
+ # @param [String] locale Locale of the resource file
13
+ # @return [Translatomatic::ResourceFile::Base] The resource file, or nil
14
+ # if the file type is unsupported.
15
+ def self.load(path, locale = nil)
16
+ path = path.kind_of?(Pathname) ? path : Pathname.new(path)
17
+ modules.each do |mod|
18
+ # match on entire filename to support extensions containing locales
19
+ if extension_match(mod, path)
20
+ log.debug("attempting to load #{path.to_s} using #{mod.name.demodulize}")
21
+ file = mod.new(path, locale)
22
+ return file if file.valid?
23
+ end
24
+ end
25
+ nil
26
+ end
27
+
28
+ # Find all resource files under the given directory. Follows symlinks.
29
+ # @param [String, Pathname] path The path to search from
30
+ # @return [Array<Translatomatic::ResourceFile>] Resource files found
31
+ def self.find(path, options = {})
32
+ files = []
33
+ include_dot_directories = options[:include_dot_directories]
34
+ path = Pathname.new(path) unless path.kind_of?(Pathname)
35
+ path.find do |file|
36
+ if !include_dot_directories && file.basename.to_s[0] == ?.
37
+ Find.prune
38
+ else
39
+ resource = load(file)
40
+ files << resource if resource
41
+ end
42
+ end
43
+ files
44
+ end
45
+
46
+ # Find all configured resource file classes
47
+ # @return [Array<Class>] Available resource file classes
48
+ def self.modules
49
+ self.constants.map { |c| self.const_get(c) }.select do |klass|
50
+ klass.is_a?(Class) && klass != Base
51
+ end
52
+ end
53
+
54
+ private
55
+
56
+ def self.extension_match(mod, path)
57
+ filename = path.basename.to_s.downcase
58
+ mod.extensions.each do |extension|
59
+ # don't match end of line in case file has locale extension
60
+ return true if filename.match(/\.#{extension}/)
61
+ end
62
+ false
63
+ end
64
+ end
65
+ end
66
+
67
+ require 'translatomatic/resource_file/base'
68
+ require 'translatomatic/resource_file/yaml'
69
+ require 'translatomatic/resource_file/properties'
70
+ require 'translatomatic/resource_file/text'
71
+ require 'translatomatic/resource_file/xml'
72
+ require 'translatomatic/resource_file/html'
73
+ require 'translatomatic/resource_file/markdown'
74
+ require 'translatomatic/resource_file/plist'
75
+ require 'translatomatic/resource_file/resw'
76
+ require 'translatomatic/resource_file/xcode_strings'
@@ -0,0 +1,160 @@
1
+ module Translatomatic
2
+ class String
3
+
4
+ # @return [String] The string
5
+ attr_reader :value
6
+
7
+ # @return [Translatomatic::Locale] The string's locale
8
+ attr_reader :locale
9
+
10
+ # @return [Translatomatic::String] If this string is a substring of
11
+ # another string, returns the original string. Otherwise, returns nil.
12
+ attr_reader :parent
13
+
14
+ # @return [Number] If this string is a substring of another string,
15
+ # returns the starting offset of this string in the original.
16
+ attr_reader :offset
17
+
18
+ def initialize(value, locale, options = {})
19
+ @value = value || ''
20
+ @locale = Translatomatic::Locale.parse(locale)
21
+ @offset = options[:offset] || 0
22
+ @parent = options[:parent]
23
+ end
24
+
25
+ # @return [String] The value of the string
26
+ def to_s
27
+ @value
28
+ end
29
+
30
+ def length
31
+ @value.length
32
+ end
33
+
34
+ def empty?
35
+ @value.empty?
36
+ end
37
+
38
+ def match(regex)
39
+ @value.match(regex)
40
+ end
41
+
42
+ # @return [boolean] true if this string is a substring of another string
43
+ def substring?
44
+ @parent ? true : false
45
+ end
46
+
47
+ # @return [Symbol] The type of string, corresponding to TMX segtype.
48
+ # @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
49
+ def type
50
+ if sentences.length >= 2
51
+ :paragraph
52
+ else
53
+ script = script_data
54
+ @value.strip.match(/#{script.delimiter}\s*$/) ? :sentence : :phrase
55
+ end
56
+ end
57
+
58
+ # Find all sentences in the string
59
+ # @return [Array<Translatomatic::String] List of sentences
60
+ def sentences
61
+ sentences = @value.scan(sentence_regex)
62
+ strings = []
63
+ offset = 0
64
+ sentences.each do |sentence|
65
+ # find leading and trailing whitespace
66
+ next if sentence.length == 0
67
+
68
+ parts = sentence.match(/^(\s*)(.*?)(\s*)$/).to_a
69
+ value = parts[2]
70
+ offset += parts[1].length # leading whitespace
71
+ strings << self.class.new(value, locale, offset: offset, parent: self)
72
+ offset += value.length + parts[3].length
73
+ end
74
+
75
+ # return [self] if there's only one sentence and it's equal to self
76
+ strings.length == 1 && strings[0].eql?(self) ? [self] : strings
77
+ end
78
+
79
+ def eql?(other)
80
+ other.kind_of?(Translatomatic::String) && other.hash == hash
81
+ end
82
+
83
+ def ==(other)
84
+ eql?(other)
85
+ end
86
+
87
+ def hash
88
+ [value, locale].hash
89
+ end
90
+
91
+ private
92
+
93
+ class Script
94
+ attr_reader :language
95
+ attr_reader :delimiter # sentence delimiter
96
+ attr_reader :trailing_space # delimiter requires trailing space or eol
97
+ attr_reader :left_to_right # script direction
98
+
99
+ def initialize(language:, delimiter:, trailing_space:, direction:)
100
+ @language = language
101
+ @delimiter = delimiter
102
+ @trailing_space = trailing_space
103
+ @left_to_right = direction == :ltr
104
+ raise "invalid direction" unless [:ltr, :rtl].include?(direction)
105
+ end
106
+ end
107
+
108
+ SCRIPT_DATA = [
109
+ # [language, delimiter, trailing space, direction]
110
+ # japanese, no space after
111
+ ["ja", "\u3002", false, :ltr],
112
+ # chinese, no space after
113
+ ["zh", "\u3002", false, :ltr], # can be written any direction
114
+ # armenian, space after
115
+ ["hy", ":", true, :ltr],
116
+ # hindi, space after
117
+ ["hi", "।", true, :ltr],
118
+ # urdu, space after, right to left
119
+ ["ur", "\u06d4", true, :rtl],
120
+ # thai, spaces used to separate sentences
121
+ ["th", "\\s", false, :ltr],
122
+ # arabic, right to left
123
+ ["ar", "\\.", true, :rtl],
124
+ # hebrew, right to left
125
+ ["he", "\\.", true, :rtl],
126
+ # all other languages
127
+ ["default", "\\.", true, :ltr],
128
+ ]
129
+
130
+ class << self
131
+ attr_reader :script_data
132
+ end
133
+
134
+ begin
135
+ script_data = {}
136
+ SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
137
+ script = Script.new(language: lang, delimiter: delimiter,
138
+ trailing_space: trailing, direction: ltr)
139
+ script_data[lang] = script
140
+ end
141
+ @script_data = script_data
142
+ end
143
+
144
+ def sentence_regex
145
+ script = script_data
146
+ if script.trailing_space
147
+ regex = /.*?(?:#{script.delimiter}\s+|$)/
148
+ else
149
+ # no trailing space after delimiter
150
+ regex = /.*?(?:#{script.delimiter}|$)/
151
+ end
152
+ end
153
+
154
+ def script_data
155
+ data = self.class.script_data
156
+ data[locale.language] || data["default"]
157
+ end
158
+
159
+ end
160
+ end
@@ -0,0 +1,100 @@
1
+ module Translatomatic::TMX
2
+ # Translation Memory Exchange document
3
+ class Document
4
+
5
+ # Create a new instance
6
+ # @param [Array<TranslationUnit>] A list of translation units
7
+ # @param [Locale] Source locale
8
+ # @return A new TMX object
9
+ def initialize(units, source_locale, origin)
10
+ units = [units] unless units.kind_of?(Array)
11
+ @units = units
12
+ @source_locale = source_locale
13
+ @origin = origin
14
+ end
15
+
16
+ # @return [String] An XML string
17
+ def to_xml(options = {})
18
+ builder = Nokogiri::XML::Builder.new do |xml|
19
+ dtd = options[:dtd] || TMX_DTD
20
+ xml.doc.create_internal_subset('tmx', nil, dtd)
21
+ xml.tmx(version: "1.4") do
22
+ xml.header(creationtool: "Translatomatic",
23
+ creationtoolversion: Translatomatic::VERSION,
24
+ datatype: "PlainText",
25
+ segtype: "phrase", # default segtype
26
+ adminlang: @source_locale.to_s,
27
+ srclang: @source_locale.to_s,
28
+ "o-tmx": @origin
29
+ )
30
+ xml.body { tmx_body(xml) }
31
+ end
32
+ end
33
+ builder.to_xml
34
+ end
35
+
36
+ # Create a TMX document from the given converter
37
+ # @param [Array<Translatomatic::Model::Text>] List of texts
38
+ # @return [Translatomatic::TMX::Document] TMX document
39
+ def self.from_texts(texts)
40
+ # group texts by from_text_id to create units
41
+ # source_locale: use from_text.locale
42
+ # origin: use text.translator
43
+ origins = texts.collect { |i| i.translator }.compact.uniq
44
+ raise "Multiple origins in texts" if origins.length > 1
45
+ sources = texts.select { |i| i.from_text.nil? }
46
+ source_locales = sources.collect { |i| i.locale }.uniq
47
+ raise "Multiple source locales in texts" if source_locales.length > 1
48
+ units = units_from_texts(texts)
49
+
50
+ return new(units, source_locales[0], origins[0])
51
+ end
52
+
53
+ def self.valid?(xml)
54
+ options = Nokogiri::XML::ParseOptions::DTDVALID
55
+ doc = Nokogiri::XML::Document.parse(xml, nil, nil, options)
56
+ doc.internal_subset.validate(doc)
57
+ end
58
+
59
+ private
60
+
61
+ class << self
62
+ include Translatomatic::Util
63
+ end
64
+
65
+ TMX_DTD = "http://www.ttt.org/oscarstandards/tmx/tmx14.dtd"
66
+
67
+ def tmx_body(xml)
68
+ @units.each do |unit|
69
+ xml.tu("segtype": unit.strings[0].type) do
70
+ unit.strings.each do |string|
71
+ xml.tuv("xml:lang": string.locale.to_s) do
72
+ xml.seg string.value
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+
79
+ # @return [Array<Translatomatic::TMX::TranslationUnit] translation unit list
80
+ def self.units_from_texts(texts)
81
+ # group texts by from_text_id
82
+ texts_by_from_id = {}
83
+ texts.each do |text|
84
+ id = text.from_text_id || text.id
85
+ list = (texts_by_from_id[id] ||= [])
86
+ list << text
87
+ end
88
+
89
+ # create list of Translation Units
90
+ texts_by_from_id.values.collect do |list|
91
+ tmx_unit(list.uniq.collect { |i| string(i.value, i.locale) })
92
+ end
93
+ end
94
+
95
+ def self.tmx_unit(strings)
96
+ Translatomatic::TMX::TranslationUnit.new(strings)
97
+ end
98
+
99
+ end # class
100
+ end # module
@@ -0,0 +1,19 @@
1
+ module Translatomatic::TMX
2
+ class TranslationUnit
3
+
4
+ # @return [Array<Translatomatic::String>] Strings in this translation unit
5
+ attr_reader :strings
6
+
7
+ # @param [Array<Translatomatic::String>] list of strings
8
+ def initialize(strings)
9
+ @strings = strings || []
10
+ end
11
+
12
+ # Test translation unit validity.
13
+ # A translation unit must contain at least two strings.
14
+ # @return [boolean] true if this translation unit is valid
15
+ def valid?
16
+ @strings.length >= 2
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,4 @@
1
+ module Translatomatic::TMX; end
2
+
3
+ require 'translatomatic/tmx/translation_unit'
4
+ require 'translatomatic/tmx/document'
@@ -1,68 +1,86 @@
1
- require 'set'
2
-
3
- module Translatomatic
4
- class TranslationResult
5
-
6
- # Translation results
7
- # @return [Hash<String,String>] Translation results
8
- attr_reader :properties
9
-
10
- # @return [Locale] The locale of the original strings
11
- attr_reader :from_locale
12
-
13
- # @return [Locale] The target locale
14
- attr_reader :to_locale
15
-
16
- # @return [Set<String>] Untranslated strings
17
- attr_reader :untranslated
18
-
19
- # Create a translation result
20
- # @param [Hash<String,String>] properties Untranslated properties
21
- # @param [Locale] from_locale The locale of the untranslated strings
22
- # @param [Locale] to_locale The target locale
23
- def initialize(properties, from_locale, to_locale)
24
- @properties = properties.dup
25
- @value_to_keys = {}
26
- @untranslated = Set.new
27
- properties.each do |key, value|
28
- @untranslated << value
29
- keylist = (@value_to_keys[value] ||= [])
30
- keylist << key
31
- end
1
+ require 'set'
2
+
3
+ module Translatomatic
4
+ class TranslationResult
5
+
6
+ # Translation results
7
+ # @return [Hash<String,String>] Translation results
8
+ attr_reader :properties
9
+
10
+ # @return [Locale] The locale of the original strings
11
+ attr_reader :from_locale
12
+
13
+ # @return [Locale] The target locale
14
+ attr_reader :to_locale
15
+
16
+ # @return [Set<String>] Untranslated strings
17
+ attr_reader :untranslated
18
+
19
+ # Create a translation result
20
+ # @param [Hash<String,String>] properties Untranslated properties
21
+ # @param [Locale] from_locale The locale of the untranslated strings
22
+ # @param [Locale] to_locale The target locale
23
+ def initialize(properties, from_locale, to_locale)
24
+ @value_to_keys = {}
25
+ @untranslated = Set.new
32
26
  @from_locale = from_locale
33
27
  @to_locale = to_locale
34
- end
35
28
 
36
- # Update result with a list of translated strings.
37
- # @param [Array<String>] original Original strings
38
- # @param [Array<String>] translated Translated strings
39
- # @return [void]
40
- def update_strings(original, translated)
29
+ # duplicate strings
30
+ @properties = properties.transform_values { |i| i.dup }
31
+
32
+ properties.each do |key, value|
33
+ # split property value into sentences
34
+ string = string(value, from_locale)
35
+ string.sentences.each do |sentence|
36
+ @untranslated << sentence
37
+ keylist = (@value_to_keys[sentence.to_s] ||= [])
38
+ keylist << key
39
+ end
40
+ end
41
+ end
42
+
43
+ # Update result with a list of translated strings.
44
+ # @param [Array<String>] original Original strings
45
+ # @param [Array<String>] translated Translated strings
46
+ # @return [void]
47
+ def update_strings(original, translated)
41
48
  raise "strings length mismatch" unless original.length == translated.length
42
- original.zip(translated).each do |text1, text2|
43
- update(text1, text2)
49
+
50
+ # create list of [from, to] text conversions
51
+ conversions = []
52
+ original.zip(translated).each do |text1, text2|
53
+ conversions << [text1, text2]
44
54
  end
45
- end
46
55
 
47
- # Update result with texts from the database.
48
- # @param [Array<Translatomatic::Model::Text>] list Texts from database
49
- # @return [void]
50
- def update_db_strings(list)
51
- list.each do |t|
52
- original = t.from_text.value
53
- translated = t.value
54
- update(original, translated)
56
+ # sort conversion list by largest offset first so that we replace
57
+ # from the end of the string to the front, so substring offsets
58
+ # are correct in the target string.
59
+ conversions.sort_by! do |t1, t2|
60
+ t1.respond_to?(:offset) ? -t1.offset : 0
55
61
  end
56
- end
57
62
 
63
+ conversions.each do |text1, text2|
64
+ update(text1, text2)
65
+ end
66
+ end
67
+
58
68
  private
59
69
 
60
- def update(original, translated)
61
- keys = @value_to_keys[original]
70
+ include Translatomatic::Util
71
+
72
+ def update(original, translated)
73
+ keys = @value_to_keys[original.to_s]
62
74
  raise "no key mapping for text '#{original}'" unless keys
63
- keys.each { |key| @properties[key] = translated }
64
-
65
- @untranslated.delete(original)
66
- end
67
- end
68
- end
75
+ keys.each do |key|
76
+ if original.kind_of?(Translatomatic::String) && original.substring?
77
+ @properties[key][original.offset, original.length] = translated
78
+ else
79
+ @properties[key] = translated
80
+ end
81
+ end
82
+
83
+ @untranslated.delete(original)
84
+ end
85
+ end
86
+ end