translatomatic 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/.translatomatic/config.yml +18 -0
  3. data/.travis.yml +33 -33
  4. data/Gemfile +6 -4
  5. data/README.de.md +53 -18
  6. data/README.es.md +55 -20
  7. data/README.fr.md +54 -19
  8. data/README.it.md +58 -23
  9. data/README.ja.md +54 -19
  10. data/README.ko.md +58 -23
  11. data/README.md +167 -141
  12. data/README.ms.md +51 -16
  13. data/README.pt.md +58 -23
  14. data/README.ru.md +53 -18
  15. data/README.sv.md +53 -18
  16. data/README.zh.md +53 -18
  17. data/bin/translatomatic +6 -6
  18. data/bin/travis +24 -26
  19. data/config/locales/translatomatic/de.yml +22 -11
  20. data/config/locales/translatomatic/en.yml +21 -12
  21. data/config/locales/translatomatic/es.yml +22 -11
  22. data/config/locales/translatomatic/fr.yml +22 -12
  23. data/config/locales/translatomatic/it.yml +22 -11
  24. data/config/locales/translatomatic/ja.yml +22 -11
  25. data/config/locales/translatomatic/ko.yml +22 -11
  26. data/config/locales/translatomatic/ms.yml +22 -11
  27. data/config/locales/translatomatic/pt.yml +22 -11
  28. data/config/locales/translatomatic/ru.yml +22 -11
  29. data/config/locales/translatomatic/sv.yml +22 -11
  30. data/config/locales/translatomatic/zh.yml +22 -11
  31. data/db/migrate/201712170000_initial.rb +25 -25
  32. data/lib/translatomatic/cli/base.rb +81 -73
  33. data/lib/translatomatic/cli/config.rb +110 -81
  34. data/lib/translatomatic/cli/main.rb +85 -72
  35. data/lib/translatomatic/cli/translate.rb +141 -106
  36. data/lib/translatomatic/cli.rb +8 -8
  37. data/lib/translatomatic/config.rb +302 -155
  38. data/lib/translatomatic/converter.rb +28 -260
  39. data/lib/translatomatic/database.rb +134 -134
  40. data/lib/translatomatic/define_options.rb +22 -0
  41. data/lib/translatomatic/escaped_unicode.rb +0 -0
  42. data/lib/translatomatic/extractor/base.rb +16 -16
  43. data/lib/translatomatic/extractor/ruby.rb +6 -6
  44. data/lib/translatomatic/extractor.rb +5 -5
  45. data/lib/translatomatic/file_translator.rb +269 -0
  46. data/lib/translatomatic/http_request.rb +162 -162
  47. data/lib/translatomatic/locale.rb +76 -76
  48. data/lib/translatomatic/logger.rb +23 -23
  49. data/lib/translatomatic/model/locale.rb +25 -25
  50. data/lib/translatomatic/model/text.rb +19 -19
  51. data/lib/translatomatic/model.rb +1 -1
  52. data/lib/translatomatic/option.rb +37 -41
  53. data/lib/translatomatic/progress_updater.rb +13 -13
  54. data/lib/translatomatic/resource_file/base.rb +269 -192
  55. data/lib/translatomatic/resource_file/csv.rb +37 -0
  56. data/lib/translatomatic/resource_file/html.rb +54 -47
  57. data/lib/translatomatic/resource_file/markdown.rb +50 -55
  58. data/lib/translatomatic/resource_file/plist.rb +153 -19
  59. data/lib/translatomatic/resource_file/po.rb +107 -0
  60. data/lib/translatomatic/resource_file/properties.rb +91 -90
  61. data/lib/translatomatic/resource_file/resw.rb +50 -30
  62. data/lib/translatomatic/resource_file/subtitle.rb +75 -0
  63. data/lib/translatomatic/resource_file/text.rb +24 -30
  64. data/lib/translatomatic/resource_file/xcode_strings.rb +75 -80
  65. data/lib/translatomatic/resource_file/xml.rb +98 -91
  66. data/lib/translatomatic/resource_file/yaml.rb +94 -116
  67. data/lib/translatomatic/resource_file.rb +87 -78
  68. data/lib/translatomatic/string.rb +188 -188
  69. data/lib/translatomatic/tmx/document.rb +99 -99
  70. data/lib/translatomatic/translation_result.rb +63 -63
  71. data/lib/translatomatic/{converter_stats.rb → translation_stats.rb} +17 -17
  72. data/lib/translatomatic/translator/base.rb +1 -1
  73. data/lib/translatomatic/translator/google.rb +2 -0
  74. data/lib/translatomatic/translator.rb +10 -2
  75. data/lib/translatomatic/util.rb +45 -45
  76. data/lib/translatomatic/version.rb +7 -7
  77. data/lib/translatomatic.rb +52 -49
  78. data/translatomatic.gemspec +3 -2
  79. metadata +25 -5
@@ -1,188 +1,188 @@
1
- module Translatomatic
2
- # A string object with an associated locale.
3
- class String
4
-
5
- # @return [String] The string
6
- attr_reader :value
7
-
8
- # @return [Translatomatic::Locale] The string's locale
9
- attr_reader :locale
10
-
11
- # @return [Translatomatic::String] If this string is a substring of
12
- # another string, returns the original string. Otherwise, returns nil.
13
- attr_reader :parent
14
-
15
- # @return [Number] If this string is a substring of another string,
16
- # returns the starting offset of this string in the original.
17
- attr_reader :offset
18
-
19
- def initialize(value, locale, options = {})
20
- @value = value.to_s || ''
21
- @locale = Translatomatic::Locale.parse(locale)
22
- @offset = options[:offset] || 0
23
- @parent = options[:parent]
24
- end
25
-
26
- # @return [String] The value of the string
27
- def to_s
28
- @value
29
- end
30
-
31
- # @return [Number] The length of the string
32
- def length
33
- @value.length
34
- end
35
-
36
- # @return [boolean] True if the string is empty
37
- def empty?
38
- @value.empty?
39
- end
40
-
41
- # Invokes value.match
42
- # @param pattern [Regexp,String] The regex pattern to match
43
- # @return [MatchData] Object describing the match, or nil if no match
44
- def match(pattern)
45
- @value.match(pattern)
46
- end
47
-
48
- # @return [boolean] true if this string is a substring of another string
49
- def substring?
50
- @parent ? true : false
51
- end
52
-
53
- # @return [Symbol] The type of string, corresponding to TMX segtype.
54
- # @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
55
- def type
56
- if sentences.length >= 2
57
- :paragraph
58
- else
59
- script = script_data
60
- @value.strip.match(/#{script.delimiter}\s*$/) ? :sentence : :phrase
61
- end
62
- end
63
-
64
- # Find all sentences in the string
65
- # @return [Array<Translatomatic::String] List of sentences
66
- def sentences
67
- substrings(sentence_regex)
68
- end
69
-
70
- # Find all substrings matching the given regex
71
- # @return [Array<Translatomatic::String] List of substrings
72
- def substrings(regex)
73
- matches = matches(@value, regex)
74
- strings = []
75
- matches.each do |match|
76
- substring = match.to_s
77
- # find leading and trailing whitespace
78
- next if substring.length == 0
79
-
80
- parts = substring.match(/\A(\s*)(.*?)(\s*)\z/m).to_a
81
- value = parts[2]
82
- offset = match.offset(0)[0]
83
- offset += parts[1].length # leading whitespace
84
- strings << self.class.new(value, locale, offset: offset, parent: self)
85
- end
86
-
87
- # return [self] if there's only one substring and it's equal to self
88
- strings.length == 1 && strings[0].eql?(self) ? [self] : strings
89
- end
90
-
91
- # @return [boolean] true if other is a {Translatomatic::String} with
92
- # the same value and locale.
93
- def eql?(other)
94
- other.kind_of?(Translatomatic::String) && other.hash == hash
95
- end
96
-
97
- # (see #eql?)
98
- def ==(other)
99
- eql?(other)
100
- end
101
-
102
- # @!visibility private
103
- def hash
104
- [value, locale].hash
105
- end
106
-
107
- private
108
-
109
- # @!visibility private
110
- class Script
111
- attr_reader :language
112
- attr_reader :delimiter # sentence delimiter
113
- attr_reader :trailing_space # delimiter requires trailing space or eol
114
- attr_reader :left_to_right # script direction
115
-
116
- def initialize(language:, delimiter:, trailing_space:, direction:)
117
- @language = language
118
- @delimiter = delimiter
119
- @trailing_space = trailing_space
120
- @left_to_right = direction == :ltr
121
- raise "invalid direction" unless [:ltr, :rtl].include?(direction)
122
- end
123
- end
124
-
125
- SCRIPT_DATA = [
126
- # [language, delimiter, trailing space, direction]
127
- # japanese, no space after
128
- ["ja", "\u3002", false, :ltr],
129
- # chinese, no space after
130
- ["zh", "\u3002", false, :ltr], # can be written any direction
131
- # armenian, space after
132
- ["hy", ":", true, :ltr],
133
- # hindi, space after
134
- ["hi", "।", true, :ltr],
135
- # urdu, space after, right to left
136
- ["ur", "\u06d4", true, :rtl],
137
- # thai, spaces used to separate sentences
138
- ["th", "\\s", false, :ltr],
139
- # arabic, right to left
140
- ["ar", "\\.", true, :rtl],
141
- # hebrew, right to left
142
- ["he", "\\.", true, :rtl],
143
- # all other languages
144
- ["default", "\\.", true, :ltr],
145
- ]
146
-
147
- class << self
148
- attr_reader :script_data
149
- end
150
-
151
- begin
152
- script_data = {}
153
- SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
154
- script = Script.new(language: lang, delimiter: delimiter,
155
- trailing_space: trailing, direction: ltr)
156
- script_data[lang] = script
157
- end
158
- @script_data = script_data
159
- end
160
-
161
- def matches(s, re)
162
- start_at = 0
163
- matches = []
164
- while(m = s.match(re, start_at))
165
- break if m.to_s.empty?
166
- matches.push(m)
167
- start_at = m.end(0)
168
- end
169
- matches
170
- end
171
-
172
- def sentence_regex
173
- script = script_data
174
- if script.trailing_space
175
- regex = /.*?(?:#{script.delimiter}\s+|\z)/m
176
- else
177
- # no trailing space after delimiter
178
- regex = /.*?(?:#{script.delimiter}|\z)/m
179
- end
180
- end
181
-
182
- def script_data
183
- data = self.class.script_data
184
- data[locale.language] || data["default"]
185
- end
186
-
187
- end
188
- end
1
+ module Translatomatic
2
+ # A string object with an associated locale.
3
+ class String
4
+
5
+ # @return [String] The string
6
+ attr_reader :value
7
+
8
+ # @return [Translatomatic::Locale] The string's locale
9
+ attr_reader :locale
10
+
11
+ # @return [Translatomatic::String] If this string is a substring of
12
+ # another string, returns the original string. Otherwise, returns nil.
13
+ attr_reader :parent
14
+
15
+ # @return [Number] If this string is a substring of another string,
16
+ # returns the starting offset of this string in the original.
17
+ attr_reader :offset
18
+
19
+ def initialize(value, locale, options = {})
20
+ @value = value.to_s || ''
21
+ @locale = Translatomatic::Locale.parse(locale)
22
+ @offset = options[:offset] || 0
23
+ @parent = options[:parent]
24
+ end
25
+
26
+ # @return [String] The value of the string
27
+ def to_s
28
+ @value
29
+ end
30
+
31
+ # @return [Number] The length of the string
32
+ def length
33
+ @value.length
34
+ end
35
+
36
+ # @return [boolean] True if the string is empty
37
+ def empty?
38
+ @value.empty?
39
+ end
40
+
41
+ # Invokes value.match
42
+ # @param pattern [Regexp,String] The regex pattern to match
43
+ # @return [MatchData] Object describing the match, or nil if no match
44
+ def match(pattern)
45
+ @value.match(pattern)
46
+ end
47
+
48
+ # @return [boolean] true if this string is a substring of another string
49
+ def substring?
50
+ @parent ? true : false
51
+ end
52
+
53
+ # @return [Symbol] The type of string, corresponding to TMX segtype.
54
+ # @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
55
+ def type
56
+ if sentences.length >= 2
57
+ :paragraph
58
+ else
59
+ script = script_data
60
+ @value.strip.match(/#{script.delimiter}\s*$/) ? :sentence : :phrase
61
+ end
62
+ end
63
+
64
+ # Find all sentences in the string
65
+ # @return [Array<Translatomatic::String] List of sentences
66
+ def sentences
67
+ substrings(sentence_regex)
68
+ end
69
+
70
+ # Find all substrings matching the given regex
71
+ # @return [Array<Translatomatic::String] List of substrings
72
+ def substrings(regex)
73
+ matches = matches(@value, regex)
74
+ strings = []
75
+ matches.each do |match|
76
+ substring = match.to_s
77
+ # find leading and trailing whitespace
78
+ next if substring.length == 0
79
+
80
+ parts = substring.match(/\A(\s*)(.*?)(\s*)\z/m).to_a
81
+ value = parts[2]
82
+ offset = match.offset(0)[0]
83
+ offset += parts[1].length # leading whitespace
84
+ strings << self.class.new(value, locale, offset: offset, parent: self)
85
+ end
86
+
87
+ # return [self] if there's only one substring and it's equal to self
88
+ strings.length == 1 && strings[0].eql?(self) ? [self] : strings
89
+ end
90
+
91
+ # @return [boolean] true if other is a {Translatomatic::String} with
92
+ # the same value and locale.
93
+ def eql?(other)
94
+ other.kind_of?(Translatomatic::String) && other.hash == hash
95
+ end
96
+
97
+ # (see #eql?)
98
+ def ==(other)
99
+ eql?(other)
100
+ end
101
+
102
+ # @!visibility private
103
+ def hash
104
+ [value, locale].hash
105
+ end
106
+
107
+ private
108
+
109
+ # @!visibility private
110
+ class Script
111
+ attr_reader :language
112
+ attr_reader :delimiter # sentence delimiter
113
+ attr_reader :trailing_space # delimiter requires trailing space or eol
114
+ attr_reader :left_to_right # script direction
115
+
116
+ def initialize(language:, delimiter:, trailing_space:, direction:)
117
+ @language = language
118
+ @delimiter = delimiter
119
+ @trailing_space = trailing_space
120
+ @left_to_right = direction == :ltr
121
+ raise "invalid direction" unless [:ltr, :rtl].include?(direction)
122
+ end
123
+ end
124
+
125
+ SCRIPT_DATA = [
126
+ # [language, delimiter, trailing space, direction]
127
+ # japanese, no space after
128
+ ["ja", "\u3002", false, :ltr],
129
+ # chinese, no space after
130
+ ["zh", "\u3002", false, :ltr], # can be written any direction
131
+ # armenian, space after
132
+ ["hy", ":", true, :ltr],
133
+ # hindi, space after
134
+ ["hi", "।", true, :ltr],
135
+ # urdu, space after, right to left
136
+ ["ur", "\u06d4", true, :rtl],
137
+ # thai, spaces used to separate sentences
138
+ ["th", "\\s", false, :ltr],
139
+ # arabic, right to left
140
+ ["ar", "\\.", true, :rtl],
141
+ # hebrew, right to left
142
+ ["he", "\\.", true, :rtl],
143
+ # all other languages
144
+ ["default", "\\.", true, :ltr],
145
+ ]
146
+
147
+ class << self
148
+ attr_reader :script_data
149
+ end
150
+
151
+ begin
152
+ script_data = {}
153
+ SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
154
+ script = Script.new(language: lang, delimiter: delimiter,
155
+ trailing_space: trailing, direction: ltr)
156
+ script_data[lang] = script
157
+ end
158
+ @script_data = script_data
159
+ end
160
+
161
+ def matches(s, re)
162
+ start_at = 0
163
+ matches = []
164
+ while(m = s.match(re, start_at))
165
+ break if m.to_s.empty?
166
+ matches.push(m)
167
+ start_at = m.end(0)
168
+ end
169
+ matches
170
+ end
171
+
172
+ def sentence_regex
173
+ script = script_data
174
+ if script.trailing_space
175
+ regex = /.*?(?:#{script.delimiter}\s+|\z)/m
176
+ else
177
+ # no trailing space after delimiter
178
+ regex = /.*?(?:#{script.delimiter}|\z)/m
179
+ end
180
+ end
181
+
182
+ def script_data
183
+ data = self.class.script_data
184
+ data[locale.language] || data["default"]
185
+ end
186
+
187
+ end
188
+ end
@@ -1,99 +1,99 @@
1
- module Translatomatic::TMX
2
- # Translation Memory Exchange document
3
- class Document
4
-
5
- # Create a new instance
6
- # @param units [Array<TranslationUnit>] A list of translation units
7
- # @param source_locale [Locale] Source locale
8
- # @return [Translatomatic::TMX::Document] a new TMX object
9
- def initialize(units, source_locale)
10
- units = [units] unless units.kind_of?(Array)
11
- @units = units
12
- @source_locale = source_locale
13
- end
14
-
15
- # @return [String] An XML string
16
- def to_xml(options = {})
17
- builder = Nokogiri::XML::Builder.new do |xml|
18
- dtd = options[:dtd] || TMX_DTD
19
- xml.doc.create_internal_subset('tmx', nil, dtd)
20
- xml.tmx(version: "1.4") do
21
- xml.header(creationtool: "Translatomatic",
22
- creationtoolversion: Translatomatic::VERSION,
23
- datatype: "PlainText",
24
- segtype: "phrase", # default segtype
25
- adminlang: @source_locale.to_s,
26
- srclang: @source_locale.to_s,
27
- "o-tmf": DEFAULT_OTMF
28
- )
29
- xml.body { tmx_body(xml) }
30
- end
31
- end
32
- builder.to_xml
33
- end
34
-
35
- # Create a TMX document from the given converter
36
- # @param texts [Array<Translatomatic::Model::Text>] List of texts
37
- # @return [Translatomatic::TMX::Document] TMX document
38
- def self.from_texts(texts)
39
- # group texts by from_text_id to create units
40
- # source_locale: use from_text.locale
41
- # origin: use text.translator
42
- sources = texts.select { |i| i.from_text.nil? }
43
- source_locales = sources.collect { |i| i.locale }.uniq
44
- raise t("tmx.multiple_locales") if source_locales.length > 1
45
- units = units_from_texts(texts)
46
-
47
- return new(units, source_locales[0])
48
- end
49
-
50
- def self.valid?(xml)
51
- options = Nokogiri::XML::ParseOptions::DTDVALID
52
- doc = Nokogiri::XML::Document.parse(xml, nil, nil, options)
53
- doc.internal_subset.validate(doc)
54
- end
55
-
56
- private
57
-
58
- class << self
59
- include Translatomatic::Util
60
- end
61
-
62
- TMX_DTD = "http://www.ttt.org/oscarstandards/tmx/tmx14.dtd"
63
- DEFAULT_OTMF = "Translatomatic"
64
-
65
- def tmx_body(xml)
66
- @units.each do |unit|
67
- xml.tu("segtype": unit.strings[0].type) do
68
- unit.strings.each do |string|
69
- xml.tuv("xml:lang": string.locale.to_s) do
70
- xml.seg string.value
71
- end
72
- end
73
- end
74
- end
75
- end
76
-
77
- # @return [Array<Translatomatic::TMX::TranslationUnit] translation unit list
78
- def self.units_from_texts(texts)
79
- # group texts by from_text_id
80
- texts_by_from_id = {}
81
- texts.each do |text|
82
- id = text.from_text_id || text.id
83
- list = (texts_by_from_id[id] ||= [])
84
- list << text
85
- end
86
-
87
- # create list of Translation Units
88
- texts_by_from_id.values.collect do |list|
89
- strings = list.uniq.collect { |i| string(i.value, i.locale) }
90
- tmx_unit(strings)
91
- end
92
- end
93
-
94
- def self.tmx_unit(strings)
95
- Translatomatic::TMX::TranslationUnit.new(strings)
96
- end
97
-
98
- end # class
99
- end # module
1
+ module Translatomatic::TMX
2
+ # Translation Memory Exchange document
3
+ class Document
4
+
5
+ # Create a new instance
6
+ # @param units [Array<TranslationUnit>] A list of translation units
7
+ # @param source_locale [Locale] Source locale
8
+ # @return [Translatomatic::TMX::Document] a new TMX object
9
+ def initialize(units, source_locale)
10
+ units = [units] unless units.kind_of?(Array)
11
+ @units = units
12
+ @source_locale = source_locale
13
+ end
14
+
15
+ # @return [String] An XML string
16
+ def to_xml(options = {})
17
+ builder = Nokogiri::XML::Builder.new do |xml|
18
+ dtd = options[:dtd] || TMX_DTD
19
+ xml.doc.create_internal_subset('tmx', nil, dtd)
20
+ xml.tmx(version: "1.4") do
21
+ xml.header(creationtool: "Translatomatic",
22
+ creationtoolversion: Translatomatic::VERSION,
23
+ datatype: "PlainText",
24
+ segtype: "phrase", # default segtype
25
+ adminlang: @source_locale.to_s,
26
+ srclang: @source_locale.to_s,
27
+ "o-tmf": DEFAULT_OTMF
28
+ )
29
+ xml.body { tmx_body(xml) }
30
+ end
31
+ end
32
+ builder.to_xml
33
+ end
34
+
35
+ # Create a TMX document from the given converter
36
+ # @param texts [Array<Translatomatic::Model::Text>] List of texts
37
+ # @return [Translatomatic::TMX::Document] TMX document
38
+ def self.from_texts(texts)
39
+ # group texts by from_text_id to create units
40
+ # source_locale: use from_text.locale
41
+ # origin: use text.translator
42
+ sources = texts.select { |i| i.from_text.nil? }
43
+ source_locales = sources.collect { |i| i.locale }.uniq
44
+ raise t("tmx.multiple_locales") if source_locales.length > 1
45
+ units = units_from_texts(texts)
46
+
47
+ return new(units, source_locales[0])
48
+ end
49
+
50
+ def self.valid?(xml)
51
+ options = Nokogiri::XML::ParseOptions::DTDVALID
52
+ doc = Nokogiri::XML::Document.parse(xml, nil, nil, options)
53
+ doc.internal_subset.validate(doc)
54
+ end
55
+
56
+ private
57
+
58
+ class << self
59
+ include Translatomatic::Util
60
+ end
61
+
62
+ TMX_DTD = "http://www.ttt.org/oscarstandards/tmx/tmx14.dtd"
63
+ DEFAULT_OTMF = "Translatomatic"
64
+
65
+ def tmx_body(xml)
66
+ @units.each do |unit|
67
+ xml.tu("segtype": unit.strings[0].type) do
68
+ unit.strings.each do |string|
69
+ xml.tuv("xml:lang": string.locale.to_s) do
70
+ xml.seg string.value
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+ # @return [Array<Translatomatic::TMX::TranslationUnit] translation unit list
78
+ def self.units_from_texts(texts)
79
+ # group texts by from_text_id
80
+ texts_by_from_id = {}
81
+ texts.each do |text|
82
+ id = text.from_text_id || text.id
83
+ list = (texts_by_from_id[id] ||= [])
84
+ list << text
85
+ end
86
+
87
+ # create list of Translation Units
88
+ texts_by_from_id.values.collect do |list|
89
+ strings = list.uniq.collect { |i| string(i.value, i.locale) }
90
+ tmx_unit(strings)
91
+ end
92
+ end
93
+
94
+ def self.tmx_unit(strings)
95
+ Translatomatic::TMX::TranslationUnit.new(strings)
96
+ end
97
+
98
+ end # class
99
+ end # module