tml 5.0.1 → 5.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/tml/config.rb +32 -13
- data/lib/tml/tokenizers/dom.rb +56 -27
- data/lib/tml/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3bfad1df09bdc30e9dff8ae24cfc73dd044cc1b7
|
4
|
+
data.tar.gz: db29aecf3e0e719844ed5366262e843fa36cf194
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec1a8dd025cd3b506f519e72550d8c891342bee71127531e8cbf00de7710f90706e2b679df58a8ce3b5a66c38f77d88a327a5fcbfa7c6063e47c663b8e4b3e90
|
7
|
+
data.tar.gz: b5f76143c7c34632dcd4c3193f822640916b62786b53c4a21fd8ef5dbea3fec83cbadedbf426b923e43df66e4a420a9f3adaa73f27ae91088812aa8801336efe
|
data/lib/tml/config.rb
CHANGED
@@ -114,25 +114,44 @@ module Tml
|
|
114
114
|
debug_format: '{{{{$0}}}}',
|
115
115
|
split_sentences: false,
|
116
116
|
nodes: {
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
117
|
+
ignored: %w(),
|
118
|
+
scripts: %w(style script code pre),
|
119
|
+
inline: %w(a span i b img strong s em u sub sup),
|
120
|
+
short: %w(i b),
|
121
|
+
splitters: %w(br hr)
|
122
122
|
},
|
123
123
|
attributes: {
|
124
|
-
|
124
|
+
labels: %w(title alt)
|
125
125
|
},
|
126
126
|
name_mapping: {
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
127
|
+
b: 'bold',
|
128
|
+
i: 'italic',
|
129
|
+
a: 'link',
|
130
|
+
img: 'picture'
|
131
131
|
},
|
132
132
|
data_tokens: {
|
133
|
-
|
134
|
-
|
135
|
-
|
133
|
+
special: {
|
134
|
+
enable: true,
|
135
|
+
regex: /(&[^;]*;)/
|
136
|
+
},
|
137
|
+
date: {
|
138
|
+
enabled: true,
|
139
|
+
formats: [
|
140
|
+
[/((Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d+,\s+\d+)/, "{month} {day}, {year}"],
|
141
|
+
[/((January|February|March|April|May|June|July|August|September|October|November|December)\s+\d+,\s+\d+)/, "{month} {day}, {year}"],
|
142
|
+
[/(\d+\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec),\s+\d+)/, "{day} {month}, {year}"],
|
143
|
+
[/(\d+\s+(January|February|March|April|May|June|July|August|September|October|November|December),\s+\d+)/, "{day} {month}, {year}"]
|
144
|
+
],
|
145
|
+
name: 'date'
|
146
|
+
},
|
147
|
+
rules: [
|
148
|
+
{enabled: true, name: 'time', regex: /(\d{1,2}:\d{1,2}\s+([A-Z]{2,3}|am|pm|AM|PM)?)/},
|
149
|
+
{enabled: true, name: 'phone', regex: /((\d{1}-)?\d{3}-\d{3}-\d{4}|\d?\(\d{3}\)\s*\d{3}-\d{4}|(\d.)?\d{3}.\d{3}.\d{4})/},
|
150
|
+
{enabled: true, name: 'email', regex: /([-a-z0-9~!$%^&*_=+}{\'?]+(\.[-a-z0-9~!$%^&*_=+}{\'?]+)*@([a-z0-9_][-a-z0-9_]*(\.[-a-z0-9_]+)*\.(aero|arpa|biz|com|coop|edu|gov|info|int|mil|museum|name|net|org|pro|travel|io|mobi|[a-z][a-z])|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(:[0-9]{1,5})?)/},
|
151
|
+
{enabled: true, name: 'price', regex: /(\$\d*(,\d*)*(\.\d*)?)/},
|
152
|
+
{enabled: true, name: 'fraction', regex: /(\d+\/\d+)/},
|
153
|
+
{enabled: true, name: 'num', regex: /(\b\d*(,\d*)*(\.\d*)?%?\b)/}
|
154
|
+
]
|
136
155
|
}
|
137
156
|
}
|
138
157
|
|
data/lib/tml/tokenizers/dom.rb
CHANGED
@@ -36,10 +36,6 @@ module Tml
|
|
36
36
|
module Tokenizers
|
37
37
|
class Dom
|
38
38
|
|
39
|
-
HTML_SPECIAL_CHAR_REGEX = /(&[^;]*;)/
|
40
|
-
INDEPENDENT_NUMBER_REGEX = /^(\d+)$|^(\d+[.,;\s])|(\s\d+)$|(\s\d+[,;\s])/
|
41
|
-
VERBOSE_DATE_REGEX = /(((Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)|(January|February|March|April|May|June|July|August|September|October|November|December))\\s\\d+(,\\s\\d+)*(,*\\sat\\s\\d+:\\d+(\\sUTC))*)/
|
42
|
-
|
43
39
|
attr_accessor :context, :tokens, :options
|
44
40
|
|
45
41
|
def initialize(context = {}, options = {})
|
@@ -54,8 +50,7 @@ module Tml
|
|
54
50
|
|
55
51
|
def translate_tree(node)
|
56
52
|
if non_translatable_node?(node)
|
57
|
-
return node.
|
58
|
-
return ''
|
53
|
+
return node.inner_html
|
59
54
|
end
|
60
55
|
|
61
56
|
return translate_tml(node.inner_text) if node.type == 3
|
@@ -90,10 +85,19 @@ module Tml
|
|
90
85
|
html
|
91
86
|
end
|
92
87
|
|
88
|
+
def no_translate_node?(node)
|
89
|
+
return unless node && node.type == 1 && node.attributes
|
90
|
+
node.attributes.each do |name, attribute|
|
91
|
+
return true if name == 'notranslate' or attribute.value.index('notranslate')
|
92
|
+
end
|
93
|
+
false
|
94
|
+
end
|
95
|
+
|
93
96
|
def non_translatable_node?(node)
|
94
97
|
return false unless node
|
95
98
|
return true if node.type == 1 && (option('nodes.scripts') || []).index(node.name.downcase)
|
96
99
|
return true if node.type == 1 && node.children.length === 0 && node.inner_text == ''
|
100
|
+
return true if no_translate_node?(node)
|
97
101
|
false
|
98
102
|
end
|
99
103
|
|
@@ -222,30 +226,55 @@ module Tml
|
|
222
226
|
value.gsub(/^\s+/, '')
|
223
227
|
end
|
224
228
|
|
225
|
-
def replace_special_characters(text)
|
226
|
-
return text if option('data_tokens.special')
|
227
|
-
|
228
|
-
matches = text.match(HTML_SPECIAL_CHAR_REGEX)
|
229
|
-
matches.each do |match|
|
230
|
-
token = match[1, - 2]
|
231
|
-
self.context[token] = match
|
232
|
-
text = text.gsub(match, "{#{token}}")
|
233
|
-
end
|
234
|
-
|
235
|
-
text
|
236
|
-
end
|
237
|
-
|
238
229
|
def generate_data_tokens(text)
|
239
|
-
|
230
|
+
if option('data_tokens.special.enabled')
|
231
|
+
matches = text.scan(option('data_tokens.special.regex'))
|
232
|
+
matches.each do |match|
|
233
|
+
token = match[1, - 2]
|
234
|
+
self.context[token] = match
|
235
|
+
text = text.gsub(match, "{#{token}}")
|
236
|
+
end
|
237
|
+
end
|
240
238
|
|
241
|
-
|
242
|
-
|
239
|
+
if option('data_tokens.date.enabled')
|
240
|
+
token_name = option('data_tokens.date.name')
|
241
|
+
formats = option('data_tokens.date.formats')
|
242
|
+
formats.each do |format|
|
243
|
+
regex = format[0]
|
244
|
+
# date_format = format[1]
|
245
|
+
|
246
|
+
matches = text.scan(regex)
|
247
|
+
if matches
|
248
|
+
matches.each do |match|
|
249
|
+
next if match.first.nil? or match.first == ''
|
250
|
+
date = match.first
|
251
|
+
token = self.contextualize(token_name, date)
|
252
|
+
replacement = "{#{token}}"
|
253
|
+
text = text.gsub(date, replacement)
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
243
258
|
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
259
|
+
rules = option('data_tokens.rules')
|
260
|
+
if rules
|
261
|
+
rules.each do |rule|
|
262
|
+
if rule[:enabled]
|
263
|
+
matches = text.scan(rule[:regex])
|
264
|
+
|
265
|
+
if matches
|
266
|
+
matches.each do |match|
|
267
|
+
next if match.first.nil? or match.first == ''
|
268
|
+
value = match.first.strip
|
269
|
+
|
270
|
+
unless value == ''
|
271
|
+
token = contextualize(rule[:name], value.gsub(/[.,;\s]/, '').to_i)
|
272
|
+
text = text.gsub(value, value.gsub(value, "{#{token}}"))
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
249
278
|
end
|
250
279
|
|
251
280
|
text
|
data/lib/tml/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.0.
|
4
|
+
version: 5.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Berkovich
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-09-
|
11
|
+
date: 2015-09-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|