tml 5.0.1 → 5.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5a514613d1a01ea544a896e526f38e5f9843df78
4
- data.tar.gz: b1dceccffd886b61d390dbf323da3578b181835f
3
+ metadata.gz: 3bfad1df09bdc30e9dff8ae24cfc73dd044cc1b7
4
+ data.tar.gz: db29aecf3e0e719844ed5366262e843fa36cf194
5
5
  SHA512:
6
- metadata.gz: f86dea985be31eaf6fcfc391615bc1287dd932704c736781834ba7d5acb029ff2f7e251893ff0593d5a1502442e62f392bc472d2f54279712661a4290e8267ed
7
- data.tar.gz: 47cb409b52c980688d033f7e5b1588d1a4424d1e501f12bf564e17813b4fca668b2065ba23ccda4c24286a3a703c758c0386b1fc1b1fd45ec264c9a727e26dd1
6
+ metadata.gz: ec1a8dd025cd3b506f519e72550d8c891342bee71127531e8cbf00de7710f90706e2b679df58a8ce3b5a66c38f77d88a327a5fcbfa7c6063e47c663b8e4b3e90
7
+ data.tar.gz: b5f76143c7c34632dcd4c3193f822640916b62786b53c4a21fd8ef5dbea3fec83cbadedbf426b923e43df66e4a420a9f3adaa73f27ae91088812aa8801336efe
@@ -114,25 +114,44 @@ module Tml
114
114
  debug_format: '{{{{$0}}}}',
115
115
  split_sentences: false,
116
116
  nodes: {
117
- ignored: [],
118
- scripts: %w(style script),
119
- inline: %w(a span i b img strong s em u sub sup),
120
- short: %w(i b),
121
- splitters: %w(br hr)
117
+ ignored: %w(),
118
+ scripts: %w(style script code pre),
119
+ inline: %w(a span i b img strong s em u sub sup),
120
+ short: %w(i b),
121
+ splitters: %w(br hr)
122
122
  },
123
123
  attributes: {
124
- labels: %w(title alt)
124
+ labels: %w(title alt)
125
125
  },
126
126
  name_mapping: {
127
- b: 'bold',
128
- i: 'italic',
129
- a: 'link',
130
- img: 'picture'
127
+ b: 'bold',
128
+ i: 'italic',
129
+ a: 'link',
130
+ img: 'picture'
131
131
  },
132
132
  data_tokens: {
133
- special: false,
134
- numeric: false,
135
- numeric_name: 'num'
133
+ special: {
134
+ enable: true,
135
+ regex: /(&[^;]*;)/
136
+ },
137
+ date: {
138
+ enabled: true,
139
+ formats: [
140
+ [/((Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d+,\s+\d+)/, "{month} {day}, {year}"],
141
+ [/((January|February|March|April|May|June|July|August|September|October|November|December)\s+\d+,\s+\d+)/, "{month} {day}, {year}"],
142
+ [/(\d+\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec),\s+\d+)/, "{day} {month}, {year}"],
143
+ [/(\d+\s+(January|February|March|April|May|June|July|August|September|October|November|December),\s+\d+)/, "{day} {month}, {year}"]
144
+ ],
145
+ name: 'date'
146
+ },
147
+ rules: [
148
+ {enabled: true, name: 'time', regex: /(\d{1,2}:\d{1,2}\s+([A-Z]{2,3}|am|pm|AM|PM)?)/},
149
+ {enabled: true, name: 'phone', regex: /((\d{1}-)?\d{3}-\d{3}-\d{4}|\d?\(\d{3}\)\s*\d{3}-\d{4}|(\d.)?\d{3}.\d{3}.\d{4})/},
150
+ {enabled: true, name: 'email', regex: /([-a-z0-9~!$%^&*_=+}{\'?]+(\.[-a-z0-9~!$%^&*_=+}{\'?]+)*@([a-z0-9_][-a-z0-9_]*(\.[-a-z0-9_]+)*\.(aero|arpa|biz|com|coop|edu|gov|info|int|mil|museum|name|net|org|pro|travel|io|mobi|[a-z][a-z])|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(:[0-9]{1,5})?)/},
151
+ {enabled: true, name: 'price', regex: /(\$\d*(,\d*)*(\.\d*)?)/},
152
+ {enabled: true, name: 'fraction', regex: /(\d+\/\d+)/},
153
+ {enabled: true, name: 'num', regex: /(\b\d*(,\d*)*(\.\d*)?%?\b)/}
154
+ ]
136
155
  }
137
156
  }
138
157
 
@@ -36,10 +36,6 @@ module Tml
36
36
  module Tokenizers
37
37
  class Dom
38
38
 
39
- HTML_SPECIAL_CHAR_REGEX = /(&[^;]*;)/
40
- INDEPENDENT_NUMBER_REGEX = /^(\d+)$|^(\d+[.,;\s])|(\s\d+)$|(\s\d+[,;\s])/
41
- VERBOSE_DATE_REGEX = /(((Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)|(January|February|March|April|May|June|July|August|September|October|November|December))\\s\\d+(,\\s\\d+)*(,*\\sat\\s\\d+:\\d+(\\sUTC))*)/
42
-
43
39
  attr_accessor :context, :tokens, :options
44
40
 
45
41
  def initialize(context = {}, options = {})
@@ -54,8 +50,7 @@ module Tml
54
50
 
55
51
  def translate_tree(node)
56
52
  if non_translatable_node?(node)
57
- return node.children.first.inner_text if node.children.count == 1
58
- return ''
53
+ return node.inner_html
59
54
  end
60
55
 
61
56
  return translate_tml(node.inner_text) if node.type == 3
@@ -90,10 +85,19 @@ module Tml
90
85
  html
91
86
  end
92
87
 
88
+ def no_translate_node?(node)
89
+ return unless node && node.type == 1 && node.attributes
90
+ node.attributes.each do |name, attribute|
91
+ return true if name == 'notranslate' or attribute.value.index('notranslate')
92
+ end
93
+ false
94
+ end
95
+
93
96
  def non_translatable_node?(node)
94
97
  return false unless node
95
98
  return true if node.type == 1 && (option('nodes.scripts') || []).index(node.name.downcase)
96
99
  return true if node.type == 1 && node.children.length === 0 && node.inner_text == ''
100
+ return true if no_translate_node?(node)
97
101
  false
98
102
  end
99
103
 
@@ -222,30 +226,55 @@ module Tml
222
226
  value.gsub(/^\s+/, '')
223
227
  end
224
228
 
225
- def replace_special_characters(text)
226
- return text if option('data_tokens.special')
227
-
228
- matches = text.match(HTML_SPECIAL_CHAR_REGEX)
229
- matches.each do |match|
230
- token = match[1, - 2]
231
- self.context[token] = match
232
- text = text.gsub(match, "{#{token}}")
233
- end
234
-
235
- text
236
- end
237
-
238
229
  def generate_data_tokens(text)
239
- return text unless option('data_tokens.numeric')
230
+ if option('data_tokens.special.enabled')
231
+ matches = text.scan(option('data_tokens.special.regex'))
232
+ matches.each do |match|
233
+ token = match[1, - 2]
234
+ self.context[token] = match
235
+ text = text.gsub(match, "{#{token}}")
236
+ end
237
+ end
240
238
 
241
- matches = text.match(INDEPENDENT_NUMBER_REGEX) || []
242
- token_name = option('data_tokens.numeric_name')
239
+ if option('data_tokens.date.enabled')
240
+ token_name = option('data_tokens.date.name')
241
+ formats = option('data_tokens.date.formats')
242
+ formats.each do |format|
243
+ regex = format[0]
244
+ # date_format = format[1]
245
+
246
+ matches = text.scan(regex)
247
+ if matches
248
+ matches.each do |match|
249
+ next if match.first.nil? or match.first == ''
250
+ date = match.first
251
+ token = self.contextualize(token_name, date)
252
+ replacement = "{#{token}}"
253
+ text = text.gsub(date, replacement)
254
+ end
255
+ end
256
+ end
257
+ end
243
258
 
244
- matches.each do |match|
245
- value = match.gsub(/[.,;\s]/, '')
246
- token = contextualize(token_name, value.to_i)
247
- replacement = match.replace(value, "{#{token}}")
248
- text = text.gsub(match, match.gsub(value, replacement))
259
+ rules = option('data_tokens.rules')
260
+ if rules
261
+ rules.each do |rule|
262
+ if rule[:enabled]
263
+ matches = text.scan(rule[:regex])
264
+
265
+ if matches
266
+ matches.each do |match|
267
+ next if match.first.nil? or match.first == ''
268
+ value = match.first.strip
269
+
270
+ unless value == ''
271
+ token = contextualize(rule[:name], value.gsub(/[.,;\s]/, '').to_i)
272
+ text = text.gsub(value, value.gsub(value, "{#{token}}"))
273
+ end
274
+ end
275
+ end
276
+ end
277
+ end
249
278
  end
250
279
 
251
280
  text
@@ -30,5 +30,5 @@
30
30
  #++
31
31
 
32
32
  module Tml
33
- VERSION = '5.0.1'
33
+ VERSION = '5.0.2'
34
34
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tml
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.1
4
+ version: 5.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Berkovich
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-08 00:00:00.000000000 Z
11
+ date: 2015-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday