tml 4.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +22 -0
- data/README.md +243 -0
- data/Rakefile +9 -0
- data/lib/tml.rb +56 -0
- data/lib/tml/api/client.rb +206 -0
- data/lib/tml/api/post_office.rb +71 -0
- data/lib/tml/application.rb +254 -0
- data/lib/tml/base.rb +116 -0
- data/lib/tml/cache.rb +143 -0
- data/lib/tml/cache_adapters/file.rb +89 -0
- data/lib/tml/cache_adapters/memcache.rb +104 -0
- data/lib/tml/cache_adapters/memory.rb +85 -0
- data/lib/tml/cache_adapters/redis.rb +108 -0
- data/lib/tml/config.rb +410 -0
- data/lib/tml/decorators/base.rb +52 -0
- data/lib/tml/decorators/default.rb +43 -0
- data/lib/tml/decorators/html.rb +102 -0
- data/lib/tml/exception.rb +35 -0
- data/lib/tml/ext/array.rb +86 -0
- data/lib/tml/ext/date.rb +99 -0
- data/lib/tml/ext/fixnum.rb +47 -0
- data/lib/tml/ext/hash.rb +99 -0
- data/lib/tml/ext/string.rb +56 -0
- data/lib/tml/ext/time.rb +89 -0
- data/lib/tml/generators/cache/base.rb +117 -0
- data/lib/tml/generators/cache/file.rb +159 -0
- data/lib/tml/language.rb +175 -0
- data/lib/tml/language_case.rb +105 -0
- data/lib/tml/language_case_rule.rb +76 -0
- data/lib/tml/language_context.rb +117 -0
- data/lib/tml/language_context_rule.rb +56 -0
- data/lib/tml/languages/en.json +1363 -0
- data/lib/tml/logger.rb +109 -0
- data/lib/tml/rules_engine/evaluator.rb +162 -0
- data/lib/tml/rules_engine/parser.rb +65 -0
- data/lib/tml/session.rb +199 -0
- data/lib/tml/source.rb +106 -0
- data/lib/tml/tokenizers/data.rb +96 -0
- data/lib/tml/tokenizers/decoration.rb +204 -0
- data/lib/tml/tokenizers/dom.rb +346 -0
- data/lib/tml/tokens/data.rb +403 -0
- data/lib/tml/tokens/method.rb +61 -0
- data/lib/tml/tokens/transform.rb +223 -0
- data/lib/tml/translation.rb +67 -0
- data/lib/tml/translation_key.rb +178 -0
- data/lib/tml/translator.rb +47 -0
- data/lib/tml/utils.rb +130 -0
- data/lib/tml/version.rb +34 -0
- metadata +121 -0
data/lib/tml/source.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#--
|
3
|
+
# Copyright (c) 2015 Translation Exchange, Inc
|
4
|
+
#
|
5
|
+
# _______ _ _ _ ______ _
|
6
|
+
# |__ __| | | | | (_) | ____| | |
|
7
|
+
# | |_ __ __ _ _ __ ___| | __ _| |_ _ ___ _ __ | |__ __ _____| |__ __ _ _ __ __ _ ___
|
8
|
+
# | | '__/ _` | '_ \/ __| |/ _` | __| |/ _ \| '_ \| __| \ \/ / __| '_ \ / _` | '_ \ / _` |/ _ \
|
9
|
+
# | | | | (_| | | | \__ \ | (_| | |_| | (_) | | | | |____ > < (__| | | | (_| | | | | (_| | __/
|
10
|
+
# |_|_| \__,_|_| |_|___/_|\__,_|\__|_|\___/|_| |_|______/_/\_\___|_| |_|\__,_|_| |_|\__, |\___|
|
11
|
+
# __/ |
|
12
|
+
# |___/
|
13
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
# a copy of this software and associated documentation files (the
|
15
|
+
# "Software"), to deal in the Software without restriction, including
|
16
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
# the following conditions:
|
20
|
+
#
|
21
|
+
# The above copyright notice and this permission notice shall be
|
22
|
+
# included in all copies or substantial portions of the Software.
|
23
|
+
#
|
24
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
27
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
28
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
29
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
30
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
#++
|
32
|
+
|
33
|
+
require 'digest/md5'
|
34
|
+
|
35
|
+
class Tml::Source < Tml::Base
|
36
|
+
belongs_to :application
|
37
|
+
attributes :key, :source, :url, :name, :description
|
38
|
+
has_many :translations
|
39
|
+
|
40
|
+
def self.normalize(url)
|
41
|
+
return nil if url.nil? or url == ''
|
42
|
+
uri = URI.parse(url)
|
43
|
+
path = uri.path
|
44
|
+
return '/' if uri.path.nil? or uri.path == ''
|
45
|
+
return path if path == '/'
|
46
|
+
|
47
|
+
# always must start with /
|
48
|
+
path = "/#{path}" if path[0] != '/'
|
49
|
+
# should not end with /
|
50
|
+
path = path[0..-2] if path[-1] == '/'
|
51
|
+
path
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.generate_key(source)
|
55
|
+
"#{Digest::MD5.hexdigest("#{source}")}~"[0..-2]
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.cache_key(locale, source)
|
59
|
+
File.join(locale, 'sources', source.split('/'))
|
60
|
+
end
|
61
|
+
|
62
|
+
def initialize(attrs = {})
|
63
|
+
super
|
64
|
+
self.key ||= Tml::Source.generate_key(attrs[:source])
|
65
|
+
end
|
66
|
+
|
67
|
+
def fetch_translations(locale)
|
68
|
+
self.translations ||= {}
|
69
|
+
return self if self.translations[locale]
|
70
|
+
|
71
|
+
self.translations[locale] = {}
|
72
|
+
|
73
|
+
results = self.application.api_client.get(
|
74
|
+
"sources/#{self.key}/translations",
|
75
|
+
{:locale => locale, :per_page => 10000},
|
76
|
+
{:cache_key => Tml::Source.cache_key(locale, self.source)}
|
77
|
+
)
|
78
|
+
|
79
|
+
results.each do |key, data|
|
80
|
+
translations_data = data.is_a?(Hash) ? data['translations'] : data
|
81
|
+
self.translations[locale][key] = translations_data.collect do |t|
|
82
|
+
Tml::Translation.new(
|
83
|
+
:locale => t['locale'] || locale,
|
84
|
+
:label => t['label'],
|
85
|
+
:context => t['context']
|
86
|
+
)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
self
|
91
|
+
rescue Tml::Exception => ex
|
92
|
+
self
|
93
|
+
end
|
94
|
+
|
95
|
+
def cached_translations(locale, key)
|
96
|
+
self.translations ||= {}
|
97
|
+
self.translations[locale] ||= {}
|
98
|
+
self.translations[locale][key]
|
99
|
+
end
|
100
|
+
|
101
|
+
def reset_cache
|
102
|
+
application.languages.each do |lang|
|
103
|
+
Tml.cache.delete(Tml::Source.cache_key(lang.locale, self.source))
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#--
|
3
|
+
# Copyright (c) 2015 Translation Exchange, Inc
|
4
|
+
#
|
5
|
+
# _______ _ _ _ ______ _
|
6
|
+
# |__ __| | | | | (_) | ____| | |
|
7
|
+
# | |_ __ __ _ _ __ ___| | __ _| |_ _ ___ _ __ | |__ __ _____| |__ __ _ _ __ __ _ ___
|
8
|
+
# | | '__/ _` | '_ \/ __| |/ _` | __| |/ _ \| '_ \| __| \ \/ / __| '_ \ / _` | '_ \ / _` |/ _ \
|
9
|
+
# | | | | (_| | | | \__ \ | (_| | |_| | (_) | | | | |____ > < (__| | | | (_| | | | | (_| | __/
|
10
|
+
# |_|_| \__,_|_| |_|___/_|\__,_|\__|_|\___/|_| |_|______/_/\_\___|_| |_|\__,_|_| |_|\__, |\___|
|
11
|
+
# __/ |
|
12
|
+
# |___/
|
13
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
# a copy of this software and associated documentation files (the
|
15
|
+
# "Software"), to deal in the Software without restriction, including
|
16
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
# the following conditions:
|
20
|
+
#
|
21
|
+
# The above copyright notice and this permission notice shall be
|
22
|
+
# included in all copies or substantial portions of the Software.
|
23
|
+
#
|
24
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
27
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
28
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
29
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
30
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
#++
|
32
|
+
|
33
|
+
#######################################################################
|
34
|
+
#
|
35
|
+
# Decoration Token Forms:
|
36
|
+
#
|
37
|
+
# [link: click here]
|
38
|
+
# or
|
39
|
+
# [link] click here [/link]
|
40
|
+
#
|
41
|
+
# Decoration Tokens Allow Nesting:
|
42
|
+
#
|
43
|
+
# [link: {count} {_messages}]
|
44
|
+
# [link: {count||message}]
|
45
|
+
# [link: {count||person, people}]
|
46
|
+
# [link: {user.name}]
|
47
|
+
#
|
48
|
+
#######################################################################
|
49
|
+
|
50
|
+
module Tml
|
51
|
+
module Tokenizers
|
52
|
+
class Data
|
53
|
+
|
54
|
+
attr_accessor :text, :context, :tokens, :opts
|
55
|
+
|
56
|
+
def self.supported_tokens
|
57
|
+
[Tml::Tokens::Data, Tml::Tokens::Method, Tml::Tokens::Transform]
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.required?(label)
|
61
|
+
label.index("{")
|
62
|
+
end
|
63
|
+
|
64
|
+
def initialize(text, context={}, opts={})
|
65
|
+
self.text = text
|
66
|
+
self.context = context
|
67
|
+
self.opts = opts
|
68
|
+
self.tokens = []
|
69
|
+
tokenize
|
70
|
+
end
|
71
|
+
|
72
|
+
def tokenize
|
73
|
+
self.tokens = []
|
74
|
+
self.class.supported_tokens.each do |klass|
|
75
|
+
self.tokens << klass.parse(self.text)
|
76
|
+
end
|
77
|
+
self.tokens.flatten!.uniq!
|
78
|
+
end
|
79
|
+
|
80
|
+
def token_allowed?(token)
|
81
|
+
return true unless opts[:allowed_tokens]
|
82
|
+
not opts[:allowed_tokens][token.name].nil?
|
83
|
+
end
|
84
|
+
|
85
|
+
def substitute(language, options = {})
|
86
|
+
label = self.text
|
87
|
+
tokens.each do |token|
|
88
|
+
next unless token_allowed?(token)
|
89
|
+
label = token.substitute(label, context, language, options)
|
90
|
+
end
|
91
|
+
label
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#--
|
3
|
+
# Copyright (c) 2015 Translation Exchange, Inc
|
4
|
+
#
|
5
|
+
# _______ _ _ _ ______ _
|
6
|
+
# |__ __| | | | | (_) | ____| | |
|
7
|
+
# | |_ __ __ _ _ __ ___| | __ _| |_ _ ___ _ __ | |__ __ _____| |__ __ _ _ __ __ _ ___
|
8
|
+
# | | '__/ _` | '_ \/ __| |/ _` | __| |/ _ \| '_ \| __| \ \/ / __| '_ \ / _` | '_ \ / _` |/ _ \
|
9
|
+
# | | | | (_| | | | \__ \ | (_| | |_| | (_) | | | | |____ > < (__| | | | (_| | | | | (_| | __/
|
10
|
+
# |_|_| \__,_|_| |_|___/_|\__,_|\__|_|\___/|_| |_|______/_/\_\___|_| |_|\__,_|_| |_|\__, |\___|
|
11
|
+
# __/ |
|
12
|
+
# |___/
|
13
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
# a copy of this software and associated documentation files (the
|
15
|
+
# "Software"), to deal in the Software without restriction, including
|
16
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
# the following conditions:
|
20
|
+
#
|
21
|
+
# The above copyright notice and this permission notice shall be
|
22
|
+
# included in all copies or substantial portions of the Software.
|
23
|
+
#
|
24
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
27
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
28
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
29
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
30
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
#++
|
32
|
+
|
33
|
+
#######################################################################
|
34
|
+
#
|
35
|
+
# Decoration Token Forms:
|
36
|
+
#
|
37
|
+
# [link: click here]
|
38
|
+
# or
|
39
|
+
# [link] click here [/link]
|
40
|
+
#
|
41
|
+
# Decoration Tokens Allow Nesting:
|
42
|
+
#
|
43
|
+
# [link: {count} {_messages}]
|
44
|
+
# [link: {count||message}]
|
45
|
+
# [link: {count||person, people}]
|
46
|
+
# [link: {user.name}]
|
47
|
+
#
|
48
|
+
#######################################################################
|
49
|
+
|
50
|
+
module Tml
|
51
|
+
module Tokenizers
|
52
|
+
class Decoration
|
53
|
+
|
54
|
+
attr_reader :tokens, :fragments, :context, :text, :opts
|
55
|
+
|
56
|
+
RESERVED_TOKEN = 'tml'
|
57
|
+
|
58
|
+
RE_SHORT_TOKEN_START = '\[[\w]*:'
|
59
|
+
RE_SHORT_TOKEN_END = '\]'
|
60
|
+
RE_LONG_TOKEN_START = '\[[\w]*\]'
|
61
|
+
RE_LONG_TOKEN_END = '\[\/[\w]*\]'
|
62
|
+
RE_TEXT = '[^\[\]]+' #'[\w\s!.:{}\(\)\|,?]*'
|
63
|
+
|
64
|
+
def self.required?(label)
|
65
|
+
label.index('[')
|
66
|
+
end
|
67
|
+
|
68
|
+
def initialize(text, context = {}, opts = {})
|
69
|
+
@text = "[#{RESERVED_TOKEN}]#{text}[/#{RESERVED_TOKEN}]"
|
70
|
+
@context = context
|
71
|
+
@opts = opts
|
72
|
+
tokenize
|
73
|
+
end
|
74
|
+
|
75
|
+
def tokenize
|
76
|
+
re = [RE_SHORT_TOKEN_START,
|
77
|
+
RE_SHORT_TOKEN_END,
|
78
|
+
RE_LONG_TOKEN_START,
|
79
|
+
RE_LONG_TOKEN_END,
|
80
|
+
RE_TEXT].join('|')
|
81
|
+
@fragments = text.scan(/#{re}/)
|
82
|
+
@tokens = []
|
83
|
+
end
|
84
|
+
|
85
|
+
def parse
|
86
|
+
return @text unless fragments
|
87
|
+
token = fragments.shift
|
88
|
+
|
89
|
+
if token.match(/#{RE_SHORT_TOKEN_START}/)
|
90
|
+
return parse_tree(token.gsub(/[\[:]/, ''), :short)
|
91
|
+
end
|
92
|
+
|
93
|
+
if token.match(/#{RE_LONG_TOKEN_START}/)
|
94
|
+
return parse_tree(token.gsub(/[\[\]]/, ''), :long)
|
95
|
+
end
|
96
|
+
|
97
|
+
token.to_s
|
98
|
+
end
|
99
|
+
|
100
|
+
def parse_tree(name, type = :short)
|
101
|
+
tree = [name]
|
102
|
+
@tokens << name unless (@tokens.include?(name) or name == RESERVED_TOKEN)
|
103
|
+
|
104
|
+
if type == :short
|
105
|
+
first = true
|
106
|
+
until fragments.first.nil? or fragments.first.match(/#{RE_SHORT_TOKEN_END}/)
|
107
|
+
value = parse
|
108
|
+
if first and value.is_a?(String)
|
109
|
+
value = value.lstrip
|
110
|
+
first = false
|
111
|
+
end
|
112
|
+
tree << value
|
113
|
+
end
|
114
|
+
elsif type == :long
|
115
|
+
until fragments.first.nil? or fragments.first.match(/#{RE_LONG_TOKEN_END}/)
|
116
|
+
tree << parse
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
fragments.shift
|
121
|
+
tree
|
122
|
+
end
|
123
|
+
|
124
|
+
def default_decoration(token_name, token_value)
|
125
|
+
default_decoration = Tml.config.default_token_value(normalize_token(token_name), :decoration)
|
126
|
+
|
127
|
+
unless default_decoration
|
128
|
+
Tml.logger.error("Invalid decoration token value for #{token_name} in #{text}")
|
129
|
+
return token_value
|
130
|
+
end
|
131
|
+
|
132
|
+
default_decoration = default_decoration.clone
|
133
|
+
decoration_token_values = context[token_name.to_sym] || context[token_name.to_s]
|
134
|
+
|
135
|
+
default_decoration.gsub!('{$0}', token_value.to_s)
|
136
|
+
|
137
|
+
if decoration_token_values.is_a?(Hash)
|
138
|
+
decoration_token_values.keys.each do |key|
|
139
|
+
default_decoration.gsub!("{$#{key}}", decoration_token_values[key].to_s)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
default_decoration
|
144
|
+
end
|
145
|
+
|
146
|
+
def allowed_token?(token)
|
147
|
+
return true if opts[:allowed_tokens].nil?
|
148
|
+
opts[:allowed_tokens].include?(token)
|
149
|
+
end
|
150
|
+
|
151
|
+
def apply(token, value)
|
152
|
+
return value if token == RESERVED_TOKEN
|
153
|
+
return value unless allowed_token?(token)
|
154
|
+
|
155
|
+
method = context[token.to_sym] || context[token.to_s]
|
156
|
+
|
157
|
+
if method
|
158
|
+
if method.is_a?(Proc)
|
159
|
+
return method.call(value)
|
160
|
+
end
|
161
|
+
|
162
|
+
if method.is_a?(Array) or method.is_a?(Hash)
|
163
|
+
return default_decoration(token, value)
|
164
|
+
end
|
165
|
+
|
166
|
+
if method.is_a?(String)
|
167
|
+
return method.to_s.gsub('{$0}', value)
|
168
|
+
end
|
169
|
+
|
170
|
+
Tml.logger.error("Invalid decoration token value for #{token} in #{text}")
|
171
|
+
return value
|
172
|
+
end
|
173
|
+
|
174
|
+
if Tml.config.default_token_value(normalize_token(token), :decoration)
|
175
|
+
return default_decoration(token, value)
|
176
|
+
end
|
177
|
+
|
178
|
+
Tml.logger.error("Missing decoration token value for #{token} in #{text}")
|
179
|
+
value
|
180
|
+
end
|
181
|
+
|
182
|
+
def normalize_token(name)
|
183
|
+
name.to_s.gsub(/(\d)*$/, '')
|
184
|
+
end
|
185
|
+
|
186
|
+
def evaluate(expr)
|
187
|
+
unless expr.is_a?(Array)
|
188
|
+
return expr
|
189
|
+
end
|
190
|
+
|
191
|
+
token = expr[0]
|
192
|
+
args = expr.drop(1)
|
193
|
+
value = args.map { |a| self.evaluate(a) }.join('')
|
194
|
+
|
195
|
+
apply(token, value)
|
196
|
+
end
|
197
|
+
|
198
|
+
def substitute
|
199
|
+
evaluate(parse)
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,346 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#--
|
3
|
+
# Copyright (c) 2015 Translation Exchange, Inc
|
4
|
+
#
|
5
|
+
# _______ _ _ _ ______ _
|
6
|
+
# |__ __| | | | | (_) | ____| | |
|
7
|
+
# | |_ __ __ _ _ __ ___| | __ _| |_ _ ___ _ __ | |__ __ _____| |__ __ _ _ __ __ _ ___
|
8
|
+
# | | '__/ _` | '_ \/ __| |/ _` | __| |/ _ \| '_ \| __| \ \/ / __| '_ \ / _` | '_ \ / _` |/ _ \
|
9
|
+
# | | | | (_| | | | \__ \ | (_| | |_| | (_) | | | | |____ > < (__| | | | (_| | | | | (_| | __/
|
10
|
+
# |_|_| \__,_|_| |_|___/_|\__,_|\__|_|\___/|_| |_|______/_/\_\___|_| |_|\__,_|_| |_|\__, |\___|
|
11
|
+
# __/ |
|
12
|
+
# |___/
|
13
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
14
|
+
# a copy of this software and associated documentation files (the
|
15
|
+
# "Software"), to deal in the Software without restriction, including
|
16
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
17
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
18
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
19
|
+
# the following conditions:
|
20
|
+
#
|
21
|
+
# The above copyright notice and this permission notice shall be
|
22
|
+
# included in all copies or substantial portions of the Software.
|
23
|
+
#
|
24
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
25
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
26
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
27
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
28
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
29
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
30
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
31
|
+
#++
|
32
|
+
|
33
|
+
require 'nokogiri'
|
34
|
+
|
35
|
+
module Tml
|
36
|
+
module Tokenizers
|
37
|
+
class Dom
|
38
|
+
|
39
|
+
HTML_SPECIAL_CHAR_REGEX = /(&[^;]*;)/
|
40
|
+
INDEPENDENT_NUMBER_REGEX = /^(\d+)$|^(\d+[.,;\s])|(\s\d+)$|(\s\d+[,;\s])/
|
41
|
+
VERBOSE_DATE_REGEX = /(((Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)|(January|February|March|April|May|June|July|August|September|October|November|December))\\s\\d+(,\\s\\d+)*(,*\\sat\\s\\d+:\\d+(\\sUTC))*)/
|
42
|
+
|
43
|
+
attr_accessor :context, :tokens, :options
|
44
|
+
|
45
|
+
def initialize(context = {}, options = {})
|
46
|
+
self.context = context
|
47
|
+
self.options = options
|
48
|
+
reset_context
|
49
|
+
end
|
50
|
+
|
51
|
+
def translate(doc)
|
52
|
+
translate_tree(doc.is_a?(String) ? Nokogiri::HTML.fragment(doc) : doc)
|
53
|
+
end
|
54
|
+
|
55
|
+
def translate_tree(node)
|
56
|
+
if non_translatable_node?(node)
|
57
|
+
return node.children.first.inner_text if node.children.count == 1
|
58
|
+
return ''
|
59
|
+
end
|
60
|
+
|
61
|
+
return translate_tml(node.inner_text) if node.type == 3
|
62
|
+
|
63
|
+
html = ''
|
64
|
+
buffer = ''
|
65
|
+
|
66
|
+
node.children.each do |child|
|
67
|
+
if child.type == 3
|
68
|
+
buffer += child.inner_text
|
69
|
+
elsif inline_node?(child) and has_inline_or_text_siblings?(child) and !between_separators?(child)
|
70
|
+
buffer += generate_tml_tags(child)
|
71
|
+
elsif separator_node?(child)
|
72
|
+
html += translate_tml(buffer) if buffer != ''
|
73
|
+
html += generate_html_token(child)
|
74
|
+
buffer = ''
|
75
|
+
else
|
76
|
+
html += translate_tml(buffer) if buffer != ''
|
77
|
+
|
78
|
+
container_value = translate_tree(child)
|
79
|
+
if ignored_node?(child)
|
80
|
+
html += container_value
|
81
|
+
else
|
82
|
+
html += generate_html_token(child, container_value)
|
83
|
+
end
|
84
|
+
|
85
|
+
buffer = ''
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
html += translate_tml(buffer) if buffer != ''
|
90
|
+
html
|
91
|
+
end
|
92
|
+
|
93
|
+
def non_translatable_node?(node)
|
94
|
+
return false unless node
|
95
|
+
return true if node.type == 1 && (option('nodes.scripts') || []).index(node.name.downcase)
|
96
|
+
return true if node.type == 1 && node.children.length === 0 && node.inner_text == ''
|
97
|
+
false
|
98
|
+
end
|
99
|
+
|
100
|
+
def translate_tml(tml)
|
101
|
+
return tml if empty_string?(tml)
|
102
|
+
tml = generate_data_tokens(tml)
|
103
|
+
|
104
|
+
if option('split_sentences')
|
105
|
+
sentences = Tml::Utils.split_sentences(tml)
|
106
|
+
translation = tml
|
107
|
+
sentences.each do |sentence|
|
108
|
+
sentence_translation = option('debug') ? debug_translation(sentence) : Tml.session.current_language.translate(sentence, tokens, options)
|
109
|
+
translation = translation.gsub(sentence, sentence_translation)
|
110
|
+
end
|
111
|
+
reset_context
|
112
|
+
return translation
|
113
|
+
end
|
114
|
+
|
115
|
+
tml = tml.gsub(/[\n]/, '').gsub(/\s\s+/, ' ').strip
|
116
|
+
|
117
|
+
translation = option('debug') ? debug_translation(tml) : Tml.session.target_language.translate(tml, tokens, options)
|
118
|
+
reset_context
|
119
|
+
translation
|
120
|
+
end
|
121
|
+
|
122
|
+
def has_child_nodes?(node)
|
123
|
+
node.children and node.children.length > 0
|
124
|
+
end
|
125
|
+
|
126
|
+
def between_separators?(node)
|
127
|
+
(separator_node?(node.previous_sibling) and !valid_text_node?(node.next_sibling)) or
|
128
|
+
(separator_node?(node.next_sibling) and !valid_text_node?(node.previous_sibling))
|
129
|
+
end
|
130
|
+
|
131
|
+
def generate_tml_tags(node)
|
132
|
+
buffer = ''
|
133
|
+
node.children.each do |child|
|
134
|
+
if child.type == 3
|
135
|
+
buffer += child.inner_text
|
136
|
+
else
|
137
|
+
buffer += generate_tml_tags(child)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
token_context = generate_html_token(node)
|
142
|
+
token = contextualize(adjust_name(node), token_context)
|
143
|
+
value = sanitize_value(buffer)
|
144
|
+
|
145
|
+
return '{' + token + '}' if self_closing_node?(node)
|
146
|
+
return '[' + token + ': ' + value + ']' if short_token?(token, value)
|
147
|
+
|
148
|
+
'[' + token + ']' + value + '[/' + token + ']'
|
149
|
+
end
|
150
|
+
|
151
|
+
def option(name)
|
152
|
+
value = Tml::Utils.hash_value(self.options, name)
|
153
|
+
value || Tml.config.translator_option(name)
|
154
|
+
end
|
155
|
+
|
156
|
+
def debug_translation(translation)
|
157
|
+
option('debug_format').gsub('{$0}', translation)
|
158
|
+
end
|
159
|
+
|
160
|
+
def empty_string?(tml)
|
161
|
+
tml = tml.gsub(/[\s\n\r\t]/, '')
|
162
|
+
tml == ''
|
163
|
+
end
|
164
|
+
|
165
|
+
def reset_context
|
166
|
+
self.tokens = {}.merge(self.context)
|
167
|
+
end
|
168
|
+
|
169
|
+
def short_token?(token, value)
|
170
|
+
option('nodes.short').index(token.downcase) || value.length < 20
|
171
|
+
end
|
172
|
+
|
173
|
+
def only_child?(node)
|
174
|
+
return false unless node.parent
|
175
|
+
node.parent.children.count == 1
|
176
|
+
end
|
177
|
+
|
178
|
+
def has_inline_or_text_siblings?(node)
|
179
|
+
return false unless node.parent
|
180
|
+
|
181
|
+
node.parent.children.each do |child|
|
182
|
+
unless child == node
|
183
|
+
return true if inline_node?(child) || valid_text_node?(child)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
false
|
188
|
+
end
|
189
|
+
|
190
|
+
def inline_node?(node)
|
191
|
+
(
|
192
|
+
node.type == 1 and
|
193
|
+
(option('nodes.inline') || []).index(node.name.downcase) and
|
194
|
+
!only_child?(node)
|
195
|
+
)
|
196
|
+
end
|
197
|
+
|
198
|
+
def container_node?(node)
|
199
|
+
node.type == 1 && !inline_node?(node)
|
200
|
+
end
|
201
|
+
|
202
|
+
def self_closing_node?(node)
|
203
|
+
!node.children || !node.children.first
|
204
|
+
end
|
205
|
+
|
206
|
+
def ignored_node?(node)
|
207
|
+
return true if (node.type != 1)
|
208
|
+
(option('nodes.ignored') || []).index(node.name.downcase)
|
209
|
+
end
|
210
|
+
|
211
|
+
def valid_text_node?(node)
|
212
|
+
return false unless node
|
213
|
+
node.type == 3 && !empty_string?(node.inner_text)
|
214
|
+
end
|
215
|
+
|
216
|
+
def separator_node?(node)
|
217
|
+
return false unless node
|
218
|
+
node.type == 1 && (option('nodes.splitters') || []).index(node.name.downcase)
|
219
|
+
end
|
220
|
+
|
221
|
+
def sanitize_value(value)
|
222
|
+
value.gsub(/^\s+/, '')
|
223
|
+
end
|
224
|
+
|
225
|
+
def replace_special_characters(text)
|
226
|
+
return text if option('data_tokens.special')
|
227
|
+
|
228
|
+
matches = text.match(HTML_SPECIAL_CHAR_REGEX)
|
229
|
+
matches.each do |match|
|
230
|
+
token = match[1, - 2]
|
231
|
+
self.context[token] = match
|
232
|
+
text = text.gsub(match, "{#{token}}")
|
233
|
+
end
|
234
|
+
|
235
|
+
text
|
236
|
+
end
|
237
|
+
|
238
|
+
def generate_data_tokens(text)
|
239
|
+
return text unless option('data_tokens.numeric')
|
240
|
+
|
241
|
+
matches = text.match(INDEPENDENT_NUMBER_REGEX) || []
|
242
|
+
token_name = option('data_tokens.numeric_name')
|
243
|
+
|
244
|
+
matches.each do |match|
|
245
|
+
value = match.gsub(/[.,;\s]/, '')
|
246
|
+
token = contextualize(token_name, value.to_i)
|
247
|
+
replacement = match.replace(value, "{#{token}}")
|
248
|
+
text = text.gsub(match, match.gsub(value, replacement))
|
249
|
+
end
|
250
|
+
|
251
|
+
text
|
252
|
+
end
|
253
|
+
|
254
|
+
def generate_html_token(node, value = nil)
|
255
|
+
name = node.name.downcase
|
256
|
+
attributes = node.attributes
|
257
|
+
attributes_hash = {}
|
258
|
+
value = (!value ? '{$0}' : value)
|
259
|
+
|
260
|
+
if attributes.length == 0
|
261
|
+
if self_closing_node?(node)
|
262
|
+
return '<' + name + '/>' if %w(br hr).index(name)
|
263
|
+
return '<' + name + '>' + '</' + name + '>'
|
264
|
+
end
|
265
|
+
return '<' + name + '>' + value + '</' + name + '>'
|
266
|
+
end
|
267
|
+
|
268
|
+
attributes.each do |name, attribute|
|
269
|
+
attributes_hash[name] = attribute.value
|
270
|
+
end
|
271
|
+
|
272
|
+
keys = attributes_hash.keys.sort
|
273
|
+
|
274
|
+
attr = []
|
275
|
+
keys.each do |key|
|
276
|
+
quote = attributes_hash[key].index("'") ? '"' : "'"
|
277
|
+
attr << (key + '=' + quote + attributes_hash[key] + quote)
|
278
|
+
end
|
279
|
+
attr = attr.join(' ')
|
280
|
+
|
281
|
+
return '<' + name + ' ' + attr + '>' + '</' + name + '>' if self_closing_node?(node)
|
282
|
+
'<' + name + ' ' + attr + '>' + value + '</' + name + '>'
|
283
|
+
end
|
284
|
+
|
285
|
+
def adjust_name(node)
|
286
|
+
name = node.name.downcase
|
287
|
+
map = option('name_mapping')
|
288
|
+
map[name.to_sym] ? map[name.to_sym] : name
|
289
|
+
end
|
290
|
+
|
291
|
+
def contextualize(name, context)
|
292
|
+
if self.tokens[name] and self.tokens[name] != context
|
293
|
+
index = 0
|
294
|
+
matches = name.match(/\d+$/)
|
295
|
+
if matches and matches.length > 0
|
296
|
+
index = matches[matches.length-1].to_i
|
297
|
+
name = name.gsub(index.to_s, '')
|
298
|
+
end
|
299
|
+
name += (index + 1).to_s
|
300
|
+
return contextualize(name, context)
|
301
|
+
end
|
302
|
+
|
303
|
+
self.tokens[name] = context
|
304
|
+
name
|
305
|
+
end
|
306
|
+
|
307
|
+
def debug(doc)
|
308
|
+
self.doc = doc
|
309
|
+
debug_tree(self.doc, 0)
|
310
|
+
end
|
311
|
+
|
312
|
+
def debug_tree(node, depth)
|
313
|
+
padding = ('=' * (depth+1))
|
314
|
+
|
315
|
+
Tml.logger.log(padding + '=> ' + (node) + ': ' + node_info(node))
|
316
|
+
|
317
|
+
(node.children || []).each do |child|
|
318
|
+
debug_tree(child, depth+1)
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
def node_info(node)
|
323
|
+
info = []
|
324
|
+
info << node.type
|
325
|
+
|
326
|
+
info << node.tagName if node.type == 1
|
327
|
+
|
328
|
+
if inline_node?(node)
|
329
|
+
info << 'inline'
|
330
|
+
if has_inline_or_text_siblings?(node)
|
331
|
+
info << 'sentence'
|
332
|
+
else
|
333
|
+
info << 'only translatable'
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
info << 'self closing' if self_closing_node?(node)
|
338
|
+
info << 'only child' if only_child?(node)
|
339
|
+
|
340
|
+
return "[#{info.join(', ')}]: " + node.inner_text if node.type == 3
|
341
|
+
"[#{info.join(', ')}]"
|
342
|
+
end
|
343
|
+
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|