tml 4.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +22 -0
- data/README.md +243 -0
- data/Rakefile +9 -0
- data/lib/tml.rb +56 -0
- data/lib/tml/api/client.rb +206 -0
- data/lib/tml/api/post_office.rb +71 -0
- data/lib/tml/application.rb +254 -0
- data/lib/tml/base.rb +116 -0
- data/lib/tml/cache.rb +143 -0
- data/lib/tml/cache_adapters/file.rb +89 -0
- data/lib/tml/cache_adapters/memcache.rb +104 -0
- data/lib/tml/cache_adapters/memory.rb +85 -0
- data/lib/tml/cache_adapters/redis.rb +108 -0
- data/lib/tml/config.rb +410 -0
- data/lib/tml/decorators/base.rb +52 -0
- data/lib/tml/decorators/default.rb +43 -0
- data/lib/tml/decorators/html.rb +102 -0
- data/lib/tml/exception.rb +35 -0
- data/lib/tml/ext/array.rb +86 -0
- data/lib/tml/ext/date.rb +99 -0
- data/lib/tml/ext/fixnum.rb +47 -0
- data/lib/tml/ext/hash.rb +99 -0
- data/lib/tml/ext/string.rb +56 -0
- data/lib/tml/ext/time.rb +89 -0
- data/lib/tml/generators/cache/base.rb +117 -0
- data/lib/tml/generators/cache/file.rb +159 -0
- data/lib/tml/language.rb +175 -0
- data/lib/tml/language_case.rb +105 -0
- data/lib/tml/language_case_rule.rb +76 -0
- data/lib/tml/language_context.rb +117 -0
- data/lib/tml/language_context_rule.rb +56 -0
- data/lib/tml/languages/en.json +1363 -0
- data/lib/tml/logger.rb +109 -0
- data/lib/tml/rules_engine/evaluator.rb +162 -0
- data/lib/tml/rules_engine/parser.rb +65 -0
- data/lib/tml/session.rb +199 -0
- data/lib/tml/source.rb +106 -0
- data/lib/tml/tokenizers/data.rb +96 -0
- data/lib/tml/tokenizers/decoration.rb +204 -0
- data/lib/tml/tokenizers/dom.rb +346 -0
- data/lib/tml/tokens/data.rb +403 -0
- data/lib/tml/tokens/method.rb +61 -0
- data/lib/tml/tokens/transform.rb +223 -0
- data/lib/tml/translation.rb +67 -0
- data/lib/tml/translation_key.rb +178 -0
- data/lib/tml/translator.rb +47 -0
- data/lib/tml/utils.rb +130 -0
- data/lib/tml/version.rb +34 -0
- metadata +121 -0
data/lib/tml/source.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#--
|
3
|
+
# Copyright (c) 2015 Translation Exchange, Inc
|
4
|
+
#
|
5
|
+
# _______ _ _ _ ______ _
|
6
|
+
# |__ __| | | | | (_) | ____| | |
|
7
|
+
# | |_ __ __ _ _ __ ___| | __ _| |_ _ ___ _ __ | |__ __ _____| |__ __ _ _ __ __ _ ___
|
8
|
+
# | | '__/ _` | '_ \/ __| |/ _` | __| |/ _ \| '_ \| __| \ \/ / __| '_ \ / _` | '_ \ / _` |/ _ \
|
9
|
+
# | | | | (_| | | | \__ \ | (_| | |_| | (_) | | | | |____ > < (__| | | | (_| | | | | (_| | __/
|
10
|
+
# |_|_| \__,_|_| |_|___/_|\__,_|\__|_|\___/|_| |_|______/_/\_\___|_| |_|\__,_|_| |_|\__, |\___|
|
11
|
+
# __/ |
|
12
|
+
# |___/
|
13
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
# a copy of this software and associated documentation files (the
|
15
|
+
# "Software"), to deal in the Software without restriction, including
|
16
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
# the following conditions:
|
20
|
+
#
|
21
|
+
# The above copyright notice and this permission notice shall be
|
22
|
+
# included in all copies or substantial portions of the Software.
|
23
|
+
#
|
24
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
27
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
28
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
29
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
30
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
#++
|
32
|
+
|
33
|
+
require 'digest/md5'
|
34
|
+
|
35
|
+
class Tml::Source < Tml::Base
|
36
|
+
belongs_to :application
|
37
|
+
attributes :key, :source, :url, :name, :description
|
38
|
+
has_many :translations
|
39
|
+
|
40
|
+
def self.normalize(url)
|
41
|
+
return nil if url.nil? or url == ''
|
42
|
+
uri = URI.parse(url)
|
43
|
+
path = uri.path
|
44
|
+
return '/' if uri.path.nil? or uri.path == ''
|
45
|
+
return path if path == '/'
|
46
|
+
|
47
|
+
# always must start with /
|
48
|
+
path = "/#{path}" if path[0] != '/'
|
49
|
+
# should not end with /
|
50
|
+
path = path[0..-2] if path[-1] == '/'
|
51
|
+
path
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.generate_key(source)
|
55
|
+
"#{Digest::MD5.hexdigest("#{source}")}~"[0..-2]
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.cache_key(locale, source)
|
59
|
+
File.join(locale, 'sources', source.split('/'))
|
60
|
+
end
|
61
|
+
|
62
|
+
def initialize(attrs = {})
|
63
|
+
super
|
64
|
+
self.key ||= Tml::Source.generate_key(attrs[:source])
|
65
|
+
end
|
66
|
+
|
67
|
+
def fetch_translations(locale)
|
68
|
+
self.translations ||= {}
|
69
|
+
return self if self.translations[locale]
|
70
|
+
|
71
|
+
self.translations[locale] = {}
|
72
|
+
|
73
|
+
results = self.application.api_client.get(
|
74
|
+
"sources/#{self.key}/translations",
|
75
|
+
{:locale => locale, :per_page => 10000},
|
76
|
+
{:cache_key => Tml::Source.cache_key(locale, self.source)}
|
77
|
+
)
|
78
|
+
|
79
|
+
results.each do |key, data|
|
80
|
+
translations_data = data.is_a?(Hash) ? data['translations'] : data
|
81
|
+
self.translations[locale][key] = translations_data.collect do |t|
|
82
|
+
Tml::Translation.new(
|
83
|
+
:locale => t['locale'] || locale,
|
84
|
+
:label => t['label'],
|
85
|
+
:context => t['context']
|
86
|
+
)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
self
|
91
|
+
rescue Tml::Exception => ex
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
def cached_translations(locale, key)
|
96
|
+
self.translations ||= {}
|
97
|
+
self.translations[locale] ||= {}
|
98
|
+
self.translations[locale][key]
|
99
|
+
end
|
100
|
+
|
101
|
+
def reset_cache
|
102
|
+
application.languages.each do |lang|
|
103
|
+
Tml.cache.delete(Tml::Source.cache_key(lang.locale, self.source))
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#--
|
3
|
+
# Copyright (c) 2015 Translation Exchange, Inc
|
4
|
+
#
|
5
|
+
# _______ _ _ _ ______ _
|
6
|
+
# |__ __| | | | | (_) | ____| | |
|
7
|
+
# | |_ __ __ _ _ __ ___| | __ _| |_ _ ___ _ __ | |__ __ _____| |__ __ _ _ __ __ _ ___
|
8
|
+
# | | '__/ _` | '_ \/ __| |/ _` | __| |/ _ \| '_ \| __| \ \/ / __| '_ \ / _` | '_ \ / _` |/ _ \
|
9
|
+
# | | | | (_| | | | \__ \ | (_| | |_| | (_) | | | | |____ > < (__| | | | (_| | | | | (_| | __/
|
10
|
+
# |_|_| \__,_|_| |_|___/_|\__,_|\__|_|\___/|_| |_|______/_/\_\___|_| |_|\__,_|_| |_|\__, |\___|
|
11
|
+
# __/ |
|
12
|
+
# |___/
|
13
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
# a copy of this software and associated documentation files (the
|
15
|
+
# "Software"), to deal in the Software without restriction, including
|
16
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
# the following conditions:
|
20
|
+
#
|
21
|
+
# The above copyright notice and this permission notice shall be
|
22
|
+
# included in all copies or substantial portions of the Software.
|
23
|
+
#
|
24
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
27
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
28
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
29
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
30
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
#++
|
32
|
+
|
33
|
+
#######################################################################
|
34
|
+
#
|
35
|
+
# Decoration Token Forms:
|
36
|
+
#
|
37
|
+
# [link: click here]
|
38
|
+
# or
|
39
|
+
# [link] click here [/link]
|
40
|
+
#
|
41
|
+
# Decoration Tokens Allow Nesting:
|
42
|
+
#
|
43
|
+
# [link: {count} {_messages}]
|
44
|
+
# [link: {count||message}]
|
45
|
+
# [link: {count||person, people}]
|
46
|
+
# [link: {user.name}]
|
47
|
+
#
|
48
|
+
#######################################################################
|
49
|
+
|
50
|
+
module Tml
|
51
|
+
module Tokenizers
|
52
|
+
class Data
|
53
|
+
|
54
|
+
attr_accessor :text, :context, :tokens, :opts
|
55
|
+
|
56
|
+
def self.supported_tokens
|
57
|
+
[Tml::Tokens::Data, Tml::Tokens::Method, Tml::Tokens::Transform]
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.required?(label)
|
61
|
+
label.index("{")
|
62
|
+
end
|
63
|
+
|
64
|
+
def initialize(text, context={}, opts={})
|
65
|
+
self.text = text
|
66
|
+
self.context = context
|
67
|
+
self.opts = opts
|
68
|
+
self.tokens = []
|
69
|
+
tokenize
|
70
|
+
end
|
71
|
+
|
72
|
+
def tokenize
|
73
|
+
self.tokens = []
|
74
|
+
self.class.supported_tokens.each do |klass|
|
75
|
+
self.tokens << klass.parse(self.text)
|
76
|
+
end
|
77
|
+
self.tokens.flatten!.uniq!
|
78
|
+
end
|
79
|
+
|
80
|
+
def token_allowed?(token)
|
81
|
+
return true unless opts[:allowed_tokens]
|
82
|
+
not opts[:allowed_tokens][token.name].nil?
|
83
|
+
end
|
84
|
+
|
85
|
+
def substitute(language, options = {})
|
86
|
+
label = self.text
|
87
|
+
tokens.each do |token|
|
88
|
+
next unless token_allowed?(token)
|
89
|
+
label = token.substitute(label, context, language, options)
|
90
|
+
end
|
91
|
+
label
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#--
|
3
|
+
# Copyright (c) 2015 Translation Exchange, Inc
|
4
|
+
#
|
5
|
+
# _______ _ _ _ ______ _
|
6
|
+
# |__ __| | | | | (_) | ____| | |
|
7
|
+
# | |_ __ __ _ _ __ ___| | __ _| |_ _ ___ _ __ | |__ __ _____| |__ __ _ _ __ __ _ ___
|
8
|
+
# | | '__/ _` | '_ \/ __| |/ _` | __| |/ _ \| '_ \| __| \ \/ / __| '_ \ / _` | '_ \ / _` |/ _ \
|
9
|
+
# | | | | (_| | | | \__ \ | (_| | |_| | (_) | | | | |____ > < (__| | | | (_| | | | | (_| | __/
|
10
|
+
# |_|_| \__,_|_| |_|___/_|\__,_|\__|_|\___/|_| |_|______/_/\_\___|_| |_|\__,_|_| |_|\__, |\___|
|
11
|
+
# __/ |
|
12
|
+
# |___/
|
13
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
# a copy of this software and associated documentation files (the
|
15
|
+
# "Software"), to deal in the Software without restriction, including
|
16
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
# the following conditions:
|
20
|
+
#
|
21
|
+
# The above copyright notice and this permission notice shall be
|
22
|
+
# included in all copies or substantial portions of the Software.
|
23
|
+
#
|
24
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
27
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
28
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
29
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
30
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
#++
|
32
|
+
|
33
|
+
#######################################################################
|
34
|
+
#
|
35
|
+
# Decoration Token Forms:
|
36
|
+
#
|
37
|
+
# [link: click here]
|
38
|
+
# or
|
39
|
+
# [link] click here [/link]
|
40
|
+
#
|
41
|
+
# Decoration Tokens Allow Nesting:
|
42
|
+
#
|
43
|
+
# [link: {count} {_messages}]
|
44
|
+
# [link: {count||message}]
|
45
|
+
# [link: {count||person, people}]
|
46
|
+
# [link: {user.name}]
|
47
|
+
#
|
48
|
+
#######################################################################
|
49
|
+
|
50
|
+
module Tml
|
51
|
+
module Tokenizers
|
52
|
+
class Decoration
|
53
|
+
|
54
|
+
attr_reader :tokens, :fragments, :context, :text, :opts
|
55
|
+
|
56
|
+
RESERVED_TOKEN = 'tml'
|
57
|
+
|
58
|
+
RE_SHORT_TOKEN_START = '\[[\w]*:'
|
59
|
+
RE_SHORT_TOKEN_END = '\]'
|
60
|
+
RE_LONG_TOKEN_START = '\[[\w]*\]'
|
61
|
+
RE_LONG_TOKEN_END = '\[\/[\w]*\]'
|
62
|
+
RE_TEXT = '[^\[\]]+' #'[\w\s!.:{}\(\)\|,?]*'
|
63
|
+
|
64
|
+
def self.required?(label)
|
65
|
+
label.index('[')
|
66
|
+
end
|
67
|
+
|
68
|
+
def initialize(text, context = {}, opts = {})
|
69
|
+
@text = "[#{RESERVED_TOKEN}]#{text}[/#{RESERVED_TOKEN}]"
|
70
|
+
@context = context
|
71
|
+
@opts = opts
|
72
|
+
tokenize
|
73
|
+
end
|
74
|
+
|
75
|
+
def tokenize
|
76
|
+
re = [RE_SHORT_TOKEN_START,
|
77
|
+
RE_SHORT_TOKEN_END,
|
78
|
+
RE_LONG_TOKEN_START,
|
79
|
+
RE_LONG_TOKEN_END,
|
80
|
+
RE_TEXT].join('|')
|
81
|
+
@fragments = text.scan(/#{re}/)
|
82
|
+
@tokens = []
|
83
|
+
end
|
84
|
+
|
85
|
+
def parse
|
86
|
+
return @text unless fragments
|
87
|
+
token = fragments.shift
|
88
|
+
|
89
|
+
if token.match(/#{RE_SHORT_TOKEN_START}/)
|
90
|
+
return parse_tree(token.gsub(/[\[:]/, ''), :short)
|
91
|
+
end
|
92
|
+
|
93
|
+
if token.match(/#{RE_LONG_TOKEN_START}/)
|
94
|
+
return parse_tree(token.gsub(/[\[\]]/, ''), :long)
|
95
|
+
end
|
96
|
+
|
97
|
+
token.to_s
|
98
|
+
end
|
99
|
+
|
100
|
+
def parse_tree(name, type = :short)
|
101
|
+
tree = [name]
|
102
|
+
@tokens << name unless (@tokens.include?(name) or name == RESERVED_TOKEN)
|
103
|
+
|
104
|
+
if type == :short
|
105
|
+
first = true
|
106
|
+
until fragments.first.nil? or fragments.first.match(/#{RE_SHORT_TOKEN_END}/)
|
107
|
+
value = parse
|
108
|
+
if first and value.is_a?(String)
|
109
|
+
value = value.lstrip
|
110
|
+
first = false
|
111
|
+
end
|
112
|
+
tree << value
|
113
|
+
end
|
114
|
+
elsif type == :long
|
115
|
+
until fragments.first.nil? or fragments.first.match(/#{RE_LONG_TOKEN_END}/)
|
116
|
+
tree << parse
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
fragments.shift
|
121
|
+
tree
|
122
|
+
end
|
123
|
+
|
124
|
+
def default_decoration(token_name, token_value)
|
125
|
+
default_decoration = Tml.config.default_token_value(normalize_token(token_name), :decoration)
|
126
|
+
|
127
|
+
unless default_decoration
|
128
|
+
Tml.logger.error("Invalid decoration token value for #{token_name} in #{text}")
|
129
|
+
return token_value
|
130
|
+
end
|
131
|
+
|
132
|
+
default_decoration = default_decoration.clone
|
133
|
+
decoration_token_values = context[token_name.to_sym] || context[token_name.to_s]
|
134
|
+
|
135
|
+
default_decoration.gsub!('{$0}', token_value.to_s)
|
136
|
+
|
137
|
+
if decoration_token_values.is_a?(Hash)
|
138
|
+
decoration_token_values.keys.each do |key|
|
139
|
+
default_decoration.gsub!("{$#{key}}", decoration_token_values[key].to_s)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
default_decoration
|
144
|
+
end
|
145
|
+
|
146
|
+
def allowed_token?(token)
|
147
|
+
return true if opts[:allowed_tokens].nil?
|
148
|
+
opts[:allowed_tokens].include?(token)
|
149
|
+
end
|
150
|
+
|
151
|
+
def apply(token, value)
|
152
|
+
return value if token == RESERVED_TOKEN
|
153
|
+
return value unless allowed_token?(token)
|
154
|
+
|
155
|
+
method = context[token.to_sym] || context[token.to_s]
|
156
|
+
|
157
|
+
if method
|
158
|
+
if method.is_a?(Proc)
|
159
|
+
return method.call(value)
|
160
|
+
end
|
161
|
+
|
162
|
+
if method.is_a?(Array) or method.is_a?(Hash)
|
163
|
+
return default_decoration(token, value)
|
164
|
+
end
|
165
|
+
|
166
|
+
if method.is_a?(String)
|
167
|
+
return method.to_s.gsub('{$0}', value)
|
168
|
+
end
|
169
|
+
|
170
|
+
Tml.logger.error("Invalid decoration token value for #{token} in #{text}")
|
171
|
+
return value
|
172
|
+
end
|
173
|
+
|
174
|
+
if Tml.config.default_token_value(normalize_token(token), :decoration)
|
175
|
+
return default_decoration(token, value)
|
176
|
+
end
|
177
|
+
|
178
|
+
Tml.logger.error("Missing decoration token value for #{token} in #{text}")
|
179
|
+
value
|
180
|
+
end
|
181
|
+
|
182
|
+
def normalize_token(name)
|
183
|
+
name.to_s.gsub(/(\d)*$/, '')
|
184
|
+
end
|
185
|
+
|
186
|
+
def evaluate(expr)
|
187
|
+
unless expr.is_a?(Array)
|
188
|
+
return expr
|
189
|
+
end
|
190
|
+
|
191
|
+
token = expr[0]
|
192
|
+
args = expr.drop(1)
|
193
|
+
value = args.map { |a| self.evaluate(a) }.join('')
|
194
|
+
|
195
|
+
apply(token, value)
|
196
|
+
end
|
197
|
+
|
198
|
+
def substitute
|
199
|
+
evaluate(parse)
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,346 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#--
|
3
|
+
# Copyright (c) 2015 Translation Exchange, Inc
|
4
|
+
#
|
5
|
+
# _______ _ _ _ ______ _
|
6
|
+
# |__ __| | | | | (_) | ____| | |
|
7
|
+
# | |_ __ __ _ _ __ ___| | __ _| |_ _ ___ _ __ | |__ __ _____| |__ __ _ _ __ __ _ ___
|
8
|
+
# | | '__/ _` | '_ \/ __| |/ _` | __| |/ _ \| '_ \| __| \ \/ / __| '_ \ / _` | '_ \ / _` |/ _ \
|
9
|
+
# | | | | (_| | | | \__ \ | (_| | |_| | (_) | | | | |____ > < (__| | | | (_| | | | | (_| | __/
|
10
|
+
# |_|_| \__,_|_| |_|___/_|\__,_|\__|_|\___/|_| |_|______/_/\_\___|_| |_|\__,_|_| |_|\__, |\___|
|
11
|
+
# __/ |
|
12
|
+
# |___/
|
13
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
# a copy of this software and associated documentation files (the
|
15
|
+
# "Software"), to deal in the Software without restriction, including
|
16
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
# the following conditions:
|
20
|
+
#
|
21
|
+
# The above copyright notice and this permission notice shall be
|
22
|
+
# included in all copies or substantial portions of the Software.
|
23
|
+
#
|
24
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
27
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
28
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
29
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
30
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
#++
|
32
|
+
|
33
|
+
require 'nokogiri'
|
34
|
+
|
35
|
+
module Tml
|
36
|
+
module Tokenizers
|
37
|
+
class Dom
|
38
|
+
|
39
|
+
HTML_SPECIAL_CHAR_REGEX = /(&[^;]*;)/
|
40
|
+
INDEPENDENT_NUMBER_REGEX = /^(\d+)$|^(\d+[.,;\s])|(\s\d+)$|(\s\d+[,;\s])/
|
41
|
+
VERBOSE_DATE_REGEX = /(((Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)|(January|February|March|April|May|June|July|August|September|October|November|December))\\s\\d+(,\\s\\d+)*(,*\\sat\\s\\d+:\\d+(\\sUTC))*)/
|
42
|
+
|
43
|
+
attr_accessor :context, :tokens, :options
|
44
|
+
|
45
|
+
def initialize(context = {}, options = {})
|
46
|
+
self.context = context
|
47
|
+
self.options = options
|
48
|
+
reset_context
|
49
|
+
end
|
50
|
+
|
51
|
+
def translate(doc)
|
52
|
+
translate_tree(doc.is_a?(String) ? Nokogiri::HTML.fragment(doc) : doc)
|
53
|
+
end
|
54
|
+
|
55
|
+
def translate_tree(node)
|
56
|
+
if non_translatable_node?(node)
|
57
|
+
return node.children.first.inner_text if node.children.count == 1
|
58
|
+
return ''
|
59
|
+
end
|
60
|
+
|
61
|
+
return translate_tml(node.inner_text) if node.type == 3
|
62
|
+
|
63
|
+
html = ''
|
64
|
+
buffer = ''
|
65
|
+
|
66
|
+
node.children.each do |child|
|
67
|
+
if child.type == 3
|
68
|
+
buffer += child.inner_text
|
69
|
+
elsif inline_node?(child) and has_inline_or_text_siblings?(child) and !between_separators?(child)
|
70
|
+
buffer += generate_tml_tags(child)
|
71
|
+
elsif separator_node?(child)
|
72
|
+
html += translate_tml(buffer) if buffer != ''
|
73
|
+
html += generate_html_token(child)
|
74
|
+
buffer = ''
|
75
|
+
else
|
76
|
+
html += translate_tml(buffer) if buffer != ''
|
77
|
+
|
78
|
+
container_value = translate_tree(child)
|
79
|
+
if ignored_node?(child)
|
80
|
+
html += container_value
|
81
|
+
else
|
82
|
+
html += generate_html_token(child, container_value)
|
83
|
+
end
|
84
|
+
|
85
|
+
buffer = ''
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
html += translate_tml(buffer) if buffer != ''
|
90
|
+
html
|
91
|
+
end
|
92
|
+
|
93
|
+
def non_translatable_node?(node)
|
94
|
+
return false unless node
|
95
|
+
return true if node.type == 1 && (option('nodes.scripts') || []).index(node.name.downcase)
|
96
|
+
return true if node.type == 1 && node.children.length === 0 && node.inner_text == ''
|
97
|
+
false
|
98
|
+
end
|
99
|
+
|
100
|
+
def translate_tml(tml)
|
101
|
+
return tml if empty_string?(tml)
|
102
|
+
tml = generate_data_tokens(tml)
|
103
|
+
|
104
|
+
if option('split_sentences')
|
105
|
+
sentences = Tml::Utils.split_sentences(tml)
|
106
|
+
translation = tml
|
107
|
+
sentences.each do |sentence|
|
108
|
+
sentence_translation = option('debug') ? debug_translation(sentence) : Tml.session.current_language.translate(sentence, tokens, options)
|
109
|
+
translation = translation.gsub(sentence, sentence_translation)
|
110
|
+
end
|
111
|
+
reset_context
|
112
|
+
return translation
|
113
|
+
end
|
114
|
+
|
115
|
+
tml = tml.gsub(/[\n]/, '').gsub(/\s\s+/, ' ').strip
|
116
|
+
|
117
|
+
translation = option('debug') ? debug_translation(tml) : Tml.session.target_language.translate(tml, tokens, options)
|
118
|
+
reset_context
|
119
|
+
translation
|
120
|
+
end
|
121
|
+
|
122
|
+
def has_child_nodes?(node)
|
123
|
+
node.children and node.children.length > 0
|
124
|
+
end
|
125
|
+
|
126
|
+
def between_separators?(node)
|
127
|
+
(separator_node?(node.previous_sibling) and !valid_text_node?(node.next_sibling)) or
|
128
|
+
(separator_node?(node.next_sibling) and !valid_text_node?(node.previous_sibling))
|
129
|
+
end
|
130
|
+
|
131
|
+
def generate_tml_tags(node)
|
132
|
+
buffer = ''
|
133
|
+
node.children.each do |child|
|
134
|
+
if child.type == 3
|
135
|
+
buffer += child.inner_text
|
136
|
+
else
|
137
|
+
buffer += generate_tml_tags(child)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
token_context = generate_html_token(node)
|
142
|
+
token = contextualize(adjust_name(node), token_context)
|
143
|
+
value = sanitize_value(buffer)
|
144
|
+
|
145
|
+
return '{' + token + '}' if self_closing_node?(node)
|
146
|
+
return '[' + token + ': ' + value + ']' if short_token?(token, value)
|
147
|
+
|
148
|
+
'[' + token + ']' + value + '[/' + token + ']'
|
149
|
+
end
|
150
|
+
|
151
|
+
def option(name)
|
152
|
+
value = Tml::Utils.hash_value(self.options, name)
|
153
|
+
value || Tml.config.translator_option(name)
|
154
|
+
end
|
155
|
+
|
156
|
+
def debug_translation(translation)
|
157
|
+
option('debug_format').gsub('{$0}', translation)
|
158
|
+
end
|
159
|
+
|
160
|
+
def empty_string?(tml)
|
161
|
+
tml = tml.gsub(/[\s\n\r\t]/, '')
|
162
|
+
tml == ''
|
163
|
+
end
|
164
|
+
|
165
|
+
def reset_context
|
166
|
+
self.tokens = {}.merge(self.context)
|
167
|
+
end
|
168
|
+
|
169
|
+
def short_token?(token, value)
|
170
|
+
option('nodes.short').index(token.downcase) || value.length < 20
|
171
|
+
end
|
172
|
+
|
173
|
+
def only_child?(node)
|
174
|
+
return false unless node.parent
|
175
|
+
node.parent.children.count == 1
|
176
|
+
end
|
177
|
+
|
178
|
+
def has_inline_or_text_siblings?(node)
|
179
|
+
return false unless node.parent
|
180
|
+
|
181
|
+
node.parent.children.each do |child|
|
182
|
+
unless child == node
|
183
|
+
return true if inline_node?(child) || valid_text_node?(child)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
false
|
188
|
+
end
|
189
|
+
|
190
|
+
def inline_node?(node)
|
191
|
+
(
|
192
|
+
node.type == 1 and
|
193
|
+
(option('nodes.inline') || []).index(node.name.downcase) and
|
194
|
+
!only_child?(node)
|
195
|
+
)
|
196
|
+
end
|
197
|
+
|
198
|
+
def container_node?(node)
|
199
|
+
node.type == 1 && !inline_node?(node)
|
200
|
+
end
|
201
|
+
|
202
|
+
def self_closing_node?(node)
|
203
|
+
!node.children || !node.children.first
|
204
|
+
end
|
205
|
+
|
206
|
+
def ignored_node?(node)
|
207
|
+
return true if (node.type != 1)
|
208
|
+
(option('nodes.ignored') || []).index(node.name.downcase)
|
209
|
+
end
|
210
|
+
|
211
|
+
def valid_text_node?(node)
|
212
|
+
return false unless node
|
213
|
+
node.type == 3 && !empty_string?(node.inner_text)
|
214
|
+
end
|
215
|
+
|
216
|
+
def separator_node?(node)
|
217
|
+
return false unless node
|
218
|
+
node.type == 1 && (option('nodes.splitters') || []).index(node.name.downcase)
|
219
|
+
end
|
220
|
+
|
221
|
+
def sanitize_value(value)
|
222
|
+
value.gsub(/^\s+/, '')
|
223
|
+
end
|
224
|
+
|
225
|
+
def replace_special_characters(text)
|
226
|
+
return text if option('data_tokens.special')
|
227
|
+
|
228
|
+
matches = text.match(HTML_SPECIAL_CHAR_REGEX)
|
229
|
+
matches.each do |match|
|
230
|
+
token = match[1, - 2]
|
231
|
+
self.context[token] = match
|
232
|
+
text = text.gsub(match, "{#{token}}")
|
233
|
+
end
|
234
|
+
|
235
|
+
text
|
236
|
+
end
|
237
|
+
|
238
|
+
def generate_data_tokens(text)
|
239
|
+
return text unless option('data_tokens.numeric')
|
240
|
+
|
241
|
+
matches = text.match(INDEPENDENT_NUMBER_REGEX) || []
|
242
|
+
token_name = option('data_tokens.numeric_name')
|
243
|
+
|
244
|
+
matches.each do |match|
|
245
|
+
value = match.gsub(/[.,;\s]/, '')
|
246
|
+
token = contextualize(token_name, value.to_i)
|
247
|
+
replacement = match.replace(value, "{#{token}}")
|
248
|
+
text = text.gsub(match, match.gsub(value, replacement))
|
249
|
+
end
|
250
|
+
|
251
|
+
text
|
252
|
+
end
|
253
|
+
|
254
|
+
def generate_html_token(node, value = nil)
|
255
|
+
name = node.name.downcase
|
256
|
+
attributes = node.attributes
|
257
|
+
attributes_hash = {}
|
258
|
+
value = (!value ? '{$0}' : value)
|
259
|
+
|
260
|
+
if attributes.length == 0
|
261
|
+
if self_closing_node?(node)
|
262
|
+
return '<' + name + '/>' if %w(br hr).index(name)
|
263
|
+
return '<' + name + '>' + '</' + name + '>'
|
264
|
+
end
|
265
|
+
return '<' + name + '>' + value + '</' + name + '>'
|
266
|
+
end
|
267
|
+
|
268
|
+
attributes.each do |name, attribute|
|
269
|
+
attributes_hash[name] = attribute.value
|
270
|
+
end
|
271
|
+
|
272
|
+
keys = attributes_hash.keys.sort
|
273
|
+
|
274
|
+
attr = []
|
275
|
+
keys.each do |key|
|
276
|
+
quote = attributes_hash[key].index("'") ? '"' : "'"
|
277
|
+
attr << (key + '=' + quote + attributes_hash[key] + quote)
|
278
|
+
end
|
279
|
+
attr = attr.join(' ')
|
280
|
+
|
281
|
+
return '<' + name + ' ' + attr + '>' + '</' + name + '>' if self_closing_node?(node)
|
282
|
+
'<' + name + ' ' + attr + '>' + value + '</' + name + '>'
|
283
|
+
end
|
284
|
+
|
285
|
+
def adjust_name(node)
|
286
|
+
name = node.name.downcase
|
287
|
+
map = option('name_mapping')
|
288
|
+
map[name.to_sym] ? map[name.to_sym] : name
|
289
|
+
end
|
290
|
+
|
291
|
+
def contextualize(name, context)
|
292
|
+
if self.tokens[name] and self.tokens[name] != context
|
293
|
+
index = 0
|
294
|
+
matches = name.match(/\d+$/)
|
295
|
+
if matches and matches.length > 0
|
296
|
+
index = matches[matches.length-1].to_i
|
297
|
+
name = name.gsub(index.to_s, '')
|
298
|
+
end
|
299
|
+
name += (index + 1).to_s
|
300
|
+
return contextualize(name, context)
|
301
|
+
end
|
302
|
+
|
303
|
+
self.tokens[name] = context
|
304
|
+
name
|
305
|
+
end
|
306
|
+
|
307
|
+
def debug(doc)
|
308
|
+
self.doc = doc
|
309
|
+
debug_tree(self.doc, 0)
|
310
|
+
end
|
311
|
+
|
312
|
+
def debug_tree(node, depth)
|
313
|
+
padding = ('=' * (depth+1))
|
314
|
+
|
315
|
+
Tml.logger.log(padding + '=> ' + (node) + ': ' + node_info(node))
|
316
|
+
|
317
|
+
(node.children || []).each do |child|
|
318
|
+
debug_tree(child, depth+1)
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
def node_info(node)
|
323
|
+
info = []
|
324
|
+
info << node.type
|
325
|
+
|
326
|
+
info << node.tagName if node.type == 1
|
327
|
+
|
328
|
+
if inline_node?(node)
|
329
|
+
info << 'inline'
|
330
|
+
if has_inline_or_text_siblings?(node)
|
331
|
+
info << 'sentence'
|
332
|
+
else
|
333
|
+
info << 'only translatable'
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
info << 'self closing' if self_closing_node?(node)
|
338
|
+
info << 'only child' if only_child?(node)
|
339
|
+
|
340
|
+
return "[#{info.join(', ')}]: " + node.inner_text if node.type == 3
|
341
|
+
"[#{info.join(', ')}]"
|
342
|
+
end
|
343
|
+
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|