tml 5.0.1 → 5.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5a514613d1a01ea544a896e526f38e5f9843df78
4
- data.tar.gz: b1dceccffd886b61d390dbf323da3578b181835f
3
+ metadata.gz: 3bfad1df09bdc30e9dff8ae24cfc73dd044cc1b7
4
+ data.tar.gz: db29aecf3e0e719844ed5366262e843fa36cf194
5
5
  SHA512:
6
- metadata.gz: f86dea985be31eaf6fcfc391615bc1287dd932704c736781834ba7d5acb029ff2f7e251893ff0593d5a1502442e62f392bc472d2f54279712661a4290e8267ed
7
- data.tar.gz: 47cb409b52c980688d033f7e5b1588d1a4424d1e501f12bf564e17813b4fca668b2065ba23ccda4c24286a3a703c758c0386b1fc1b1fd45ec264c9a727e26dd1
6
+ metadata.gz: ec1a8dd025cd3b506f519e72550d8c891342bee71127531e8cbf00de7710f90706e2b679df58a8ce3b5a66c38f77d88a327a5fcbfa7c6063e47c663b8e4b3e90
7
+ data.tar.gz: b5f76143c7c34632dcd4c3193f822640916b62786b53c4a21fd8ef5dbea3fec83cbadedbf426b923e43df66e4a420a9f3adaa73f27ae91088812aa8801336efe
@@ -114,25 +114,44 @@ module Tml
114
114
  debug_format: '{{{{$0}}}}',
115
115
  split_sentences: false,
116
116
  nodes: {
117
- ignored: [],
118
- scripts: %w(style script),
119
- inline: %w(a span i b img strong s em u sub sup),
120
- short: %w(i b),
121
- splitters: %w(br hr)
117
+ ignored: %w(),
118
+ scripts: %w(style script code pre),
119
+ inline: %w(a span i b img strong s em u sub sup),
120
+ short: %w(i b),
121
+ splitters: %w(br hr)
122
122
  },
123
123
  attributes: {
124
- labels: %w(title alt)
124
+ labels: %w(title alt)
125
125
  },
126
126
  name_mapping: {
127
- b: 'bold',
128
- i: 'italic',
129
- a: 'link',
130
- img: 'picture'
127
+ b: 'bold',
128
+ i: 'italic',
129
+ a: 'link',
130
+ img: 'picture'
131
131
  },
132
132
  data_tokens: {
133
- special: false,
134
- numeric: false,
135
- numeric_name: 'num'
133
+ special: {
134
+ enable: true,
135
+ regex: /(&[^;]*;)/
136
+ },
137
+ date: {
138
+ enabled: true,
139
+ formats: [
140
+ [/((Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d+,\s+\d+)/, "{month} {day}, {year}"],
141
+ [/((January|February|March|April|May|June|July|August|September|October|November|December)\s+\d+,\s+\d+)/, "{month} {day}, {year}"],
142
+ [/(\d+\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec),\s+\d+)/, "{day} {month}, {year}"],
143
+ [/(\d+\s+(January|February|March|April|May|June|July|August|September|October|November|December),\s+\d+)/, "{day} {month}, {year}"]
144
+ ],
145
+ name: 'date'
146
+ },
147
+ rules: [
148
+ {enabled: true, name: 'time', regex: /(\d{1,2}:\d{1,2}\s+([A-Z]{2,3}|am|pm|AM|PM)?)/},
149
+ {enabled: true, name: 'phone', regex: /((\d{1}-)?\d{3}-\d{3}-\d{4}|\d?\(\d{3}\)\s*\d{3}-\d{4}|(\d.)?\d{3}.\d{3}.\d{4})/},
150
+ {enabled: true, name: 'email', regex: /([-a-z0-9~!$%^&*_=+}{\'?]+(\.[-a-z0-9~!$%^&*_=+}{\'?]+)*@([a-z0-9_][-a-z0-9_]*(\.[-a-z0-9_]+)*\.(aero|arpa|biz|com|coop|edu|gov|info|int|mil|museum|name|net|org|pro|travel|io|mobi|[a-z][a-z])|([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}))(:[0-9]{1,5})?)/},
151
+ {enabled: true, name: 'price', regex: /(\$\d*(,\d*)*(\.\d*)?)/},
152
+ {enabled: true, name: 'fraction', regex: /(\d+\/\d+)/},
153
+ {enabled: true, name: 'num', regex: /(\b\d*(,\d*)*(\.\d*)?%?\b)/}
154
+ ]
136
155
  }
137
156
  }
138
157
 
@@ -36,10 +36,6 @@ module Tml
36
36
  module Tokenizers
37
37
  class Dom
38
38
 
39
- HTML_SPECIAL_CHAR_REGEX = /(&[^;]*;)/
40
- INDEPENDENT_NUMBER_REGEX = /^(\d+)$|^(\d+[.,;\s])|(\s\d+)$|(\s\d+[,;\s])/
41
- VERBOSE_DATE_REGEX = /(((Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)|(January|February|March|April|May|June|July|August|September|October|November|December))\\s\\d+(,\\s\\d+)*(,*\\sat\\s\\d+:\\d+(\\sUTC))*)/
42
-
43
39
  attr_accessor :context, :tokens, :options
44
40
 
45
41
  def initialize(context = {}, options = {})
@@ -54,8 +50,7 @@ module Tml
54
50
 
55
51
  def translate_tree(node)
56
52
  if non_translatable_node?(node)
57
- return node.children.first.inner_text if node.children.count == 1
58
- return ''
53
+ return node.inner_html
59
54
  end
60
55
 
61
56
  return translate_tml(node.inner_text) if node.type == 3
@@ -90,10 +85,19 @@ module Tml
90
85
  html
91
86
  end
92
87
 
88
+ def no_translate_node?(node)
89
+ return unless node && node.type == 1 && node.attributes
90
+ node.attributes.each do |name, attribute|
91
+ return true if name == 'notranslate' or attribute.value.index('notranslate')
92
+ end
93
+ false
94
+ end
95
+
93
96
  def non_translatable_node?(node)
94
97
  return false unless node
95
98
  return true if node.type == 1 && (option('nodes.scripts') || []).index(node.name.downcase)
96
99
  return true if node.type == 1 && node.children.length === 0 && node.inner_text == ''
100
+ return true if no_translate_node?(node)
97
101
  false
98
102
  end
99
103
 
@@ -222,30 +226,55 @@ module Tml
222
226
  value.gsub(/^\s+/, '')
223
227
  end
224
228
 
225
- def replace_special_characters(text)
226
- return text if option('data_tokens.special')
227
-
228
- matches = text.match(HTML_SPECIAL_CHAR_REGEX)
229
- matches.each do |match|
230
- token = match[1, - 2]
231
- self.context[token] = match
232
- text = text.gsub(match, "{#{token}}")
233
- end
234
-
235
- text
236
- end
237
-
238
229
  def generate_data_tokens(text)
239
- return text unless option('data_tokens.numeric')
230
+ if option('data_tokens.special.enabled')
231
+ matches = text.scan(option('data_tokens.special.regex'))
232
+ matches.each do |match|
233
+ token = match[1, - 2]
234
+ self.context[token] = match
235
+ text = text.gsub(match, "{#{token}}")
236
+ end
237
+ end
240
238
 
241
- matches = text.match(INDEPENDENT_NUMBER_REGEX) || []
242
- token_name = option('data_tokens.numeric_name')
239
+ if option('data_tokens.date.enabled')
240
+ token_name = option('data_tokens.date.name')
241
+ formats = option('data_tokens.date.formats')
242
+ formats.each do |format|
243
+ regex = format[0]
244
+ # date_format = format[1]
245
+
246
+ matches = text.scan(regex)
247
+ if matches
248
+ matches.each do |match|
249
+ next if match.first.nil? or match.first == ''
250
+ date = match.first
251
+ token = self.contextualize(token_name, date)
252
+ replacement = "{#{token}}"
253
+ text = text.gsub(date, replacement)
254
+ end
255
+ end
256
+ end
257
+ end
243
258
 
244
- matches.each do |match|
245
- value = match.gsub(/[.,;\s]/, '')
246
- token = contextualize(token_name, value.to_i)
247
- replacement = match.replace(value, "{#{token}}")
248
- text = text.gsub(match, match.gsub(value, replacement))
259
+ rules = option('data_tokens.rules')
260
+ if rules
261
+ rules.each do |rule|
262
+ if rule[:enabled]
263
+ matches = text.scan(rule[:regex])
264
+
265
+ if matches
266
+ matches.each do |match|
267
+ next if match.first.nil? or match.first == ''
268
+ value = match.first.strip
269
+
270
+ unless value == ''
271
+ token = contextualize(rule[:name], value.gsub(/[.,;\s]/, '').to_i)
272
+ text = text.gsub(value, value.gsub(value, "{#{token}}"))
273
+ end
274
+ end
275
+ end
276
+ end
277
+ end
249
278
  end
250
279
 
251
280
  text
@@ -30,5 +30,5 @@
30
30
  #++
31
31
 
32
32
  module Tml
33
- VERSION = '5.0.1'
33
+ VERSION = '5.0.2'
34
34
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tml
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.1
4
+ version: 5.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Berkovich
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-08 00:00:00.000000000 Z
11
+ date: 2015-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday