list_matcher 1.0.5 → 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +37 -7
- data/lib/list_matcher.rb +30 -46
- data/lib/list_matcher/version.rb +1 -1
- data/test/basic_test.rb +20 -0
- data/test/doc_test.rb +11 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 393f3b81be1503c0af7b583119e063ff2c91276a
|
4
|
+
data.tar.gz: 66a79b3b020f17b9da3fcd6a8465c8b5966088ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23e71cd30a236c5a247d2f6e12c6e9f7bd5f3adb3c1dcf2c4eff781561ed7ec1bb123a720e9161dc6dba7d597c315c0bdac403663a0bb68b2fb957863cce30ef
|
7
|
+
data.tar.gz: db2306c5f2cf514961971566446de1de8e746fe157b472f8314c6a82f24063b4d7c1bb0c7ed4e632108ce4ef253c00e1c69c3b74c4e6d5cc64530c2035c4287a
|
data/README.md
CHANGED
@@ -192,9 +192,6 @@ List::Matcher.pattern (1...1000).to_a, bound: { test: /\d/, left: '(?<!\d)', rig
|
|
192
192
|
As with the predefined boundaries -- `:word_left`, `:line_right`, `:string_left`, etc. -- you can bound items only at one
|
193
193
|
margin, in this case by providing only the `left:` or `right:` key-value pair.
|
194
194
|
|
195
|
-
**NOTE** Because boundary tests cannot be applied to symbols, the bound option will give you strange results if you use it
|
196
|
-
with a list any of whose items have a symbol at their leading or trailing margin.
|
197
|
-
|
198
195
|
### strip
|
199
196
|
|
200
197
|
```ruby
|
@@ -271,6 +268,38 @@ List::Matcher.pattern [ 'Catch foo', 'foo', 'Fahrenheit foo' ], symbols: { foo:
|
|
271
268
|
Because it is possible for symbol sequences to overlap, sequences with string or symbol keys are evaluated before `Regexps`, and longer keys are
|
272
269
|
evaluated before shorter ones.
|
273
270
|
|
271
|
+
`List::Matcher` doesn't parse regex strings to determine whether they need to be grouped before any iteration suffix
|
272
|
+
can be added or to determine whether it is sensible to add boundary sequences before or after them. By default it assumes
|
273
|
+
that they need grouping if they repeat and that boundary markers don't make sense. You can override this behavior, however.
|
274
|
+
You specify the characteristics of the pattern as a hash with the following keys:
|
275
|
+
|
276
|
+
**`:pattern`**
|
277
|
+
|
278
|
+
The value is the pattern to substitute for the symbol.
|
279
|
+
|
280
|
+
**`:atomic`**
|
281
|
+
|
282
|
+
The pattern needs no grouping if the value is true.
|
283
|
+
|
284
|
+
**`:left`**
|
285
|
+
|
286
|
+
A character to test for the left boundary condition.
|
287
|
+
|
288
|
+
**`:right`**
|
289
|
+
|
290
|
+
A character to test for the right boundary condition.
|
291
|
+
|
292
|
+
For example:
|
293
|
+
|
294
|
+
```ruby
|
295
|
+
List::Matcher.pattern %w(dddd ddddddd),
|
296
|
+
bound: :word,
|
297
|
+
symbols: { d: { pattern: '\d', atomic: true, left: '0', right: '0' } }, # <-- this
|
298
|
+
atomic: false
|
299
|
+
|
300
|
+
# \b\d{4}(?:\d{3})?\b
|
301
|
+
```
|
302
|
+
|
274
303
|
### name
|
275
304
|
|
276
305
|
If you assign your pattern a name, it will be constructed with a named group such that you can extract
|
@@ -311,11 +340,12 @@ date_20th_century = m.rx(
|
|
311
340
|
],
|
312
341
|
normalize_whitespace: true,
|
313
342
|
atomic: true,
|
343
|
+
bound: true,
|
314
344
|
symbols: {
|
315
|
-
year: year,
|
316
|
-
mday: mday,
|
317
|
-
wday: wday,
|
318
|
-
mo: mo
|
345
|
+
year: { pattern: year, atomic: true, left: '1', right: '1' },
|
346
|
+
mday: { pattern: mday, atomic: true, left: '1', right: '1' },
|
347
|
+
wday: { pattern: wday, atomic: true, left: 'a', right: 'a' },
|
348
|
+
mo: { pattern: mo, atomic: true, left: 'a', right: 'a' }
|
319
349
|
}
|
320
350
|
)
|
321
351
|
|
data/lib/list_matcher.rb
CHANGED
@@ -197,7 +197,7 @@ module List
|
|
197
197
|
def tree(list, symbols)
|
198
198
|
if list.size == 1
|
199
199
|
leaves = list[0].chars.map do |c|
|
200
|
-
symbols
|
200
|
+
symbols[c] || Leaf.new( self, c )
|
201
201
|
end
|
202
202
|
if leaves.length == 1
|
203
203
|
leaves.first
|
@@ -205,7 +205,7 @@ module List
|
|
205
205
|
Sequence.new self, *leaves
|
206
206
|
end
|
207
207
|
elsif list.all?{ |w| w.length == 1 }
|
208
|
-
chars = list.select{ |w| !symbols
|
208
|
+
chars = list.select{ |w| !symbols[w] }
|
209
209
|
if chars.size > 1
|
210
210
|
list -= chars
|
211
211
|
c = CharClass.new self, chars
|
@@ -231,7 +231,7 @@ module List
|
|
231
231
|
Sequence.new self, c1, c2
|
232
232
|
else
|
233
233
|
grouped = list.group_by{ |w| w[0] }
|
234
|
-
chars = grouped.select{ |_, w| w.size == 1 && w[0].size == 1 && !symbols
|
234
|
+
chars = grouped.select{ |_, w| w.size == 1 && w[0].size == 1 && !symbols[w[0]] }.map{ |v, _| v }
|
235
235
|
if chars.size > 1
|
236
236
|
list -= chars
|
237
237
|
c = CharClass.new self, chars
|
@@ -257,7 +257,7 @@ module List
|
|
257
257
|
Hash[o.map{ |k, v| [ deep_dup(k), deep_dup(v) ] }]
|
258
258
|
elsif o.is_a?(Array)
|
259
259
|
o.map{ |v| deep_dup v }
|
260
|
-
elsif o.nil? || o.is_a?(Symbol)
|
260
|
+
elsif o.nil? || o.is_a?(Symbol) || o.is_a?(TrueClass) || o.is_a?(FalseClass)
|
261
261
|
o
|
262
262
|
else
|
263
263
|
o.dup
|
@@ -326,11 +326,9 @@ module List
|
|
326
326
|
end
|
327
327
|
|
328
328
|
class Special
|
329
|
-
attr_reader :engine
|
329
|
+
attr_reader :engine, :special_map
|
330
330
|
attr_accessor :specials, :list, :left, :right
|
331
331
|
|
332
|
-
NULL = Regexp.new '(?!)'
|
333
|
-
|
334
332
|
def initialize( engine, specials, list )
|
335
333
|
@engine = engine
|
336
334
|
@list = list
|
@@ -360,11 +358,11 @@ module List
|
|
360
358
|
pat = opts.delete :pattern
|
361
359
|
raise Error, "symbol #{var} requires a pattern" unless pat || var.is_a?(Regexp)
|
362
360
|
pat ||= var.to_s
|
363
|
-
|
361
|
+
SymbolPattern.new engine, c, var, pat, **opts
|
364
362
|
elsif opts.is_a? String
|
365
|
-
|
363
|
+
SymbolPattern.new engine, c, var, opts
|
366
364
|
elsif var.is_a?(Regexp) && opts.nil?
|
367
|
-
|
365
|
+
SymbolPattern.new engine, c, var, nil
|
368
366
|
else
|
369
367
|
raise Error, "symbol #{var} requires a pattern"
|
370
368
|
end
|
@@ -374,15 +372,16 @@ module List
|
|
374
372
|
if engine.bound
|
375
373
|
if engine.left_bound
|
376
374
|
c = ( max += 1 ).chr
|
377
|
-
@left =
|
375
|
+
@left = SymbolPattern.new engine, c, c, engine.left_bound
|
378
376
|
@specials << @left
|
379
377
|
end
|
380
378
|
if engine.right_bound
|
381
379
|
c = ( max += 1 ).chr
|
382
|
-
@right =
|
380
|
+
@right = SymbolPattern.new engine, c, c, engine.right_bound
|
383
381
|
@specials << @right
|
384
382
|
end
|
385
383
|
end
|
384
|
+
@special_map = Hash[@specials.map{ |s| [ s.char, s ] }]
|
386
385
|
end
|
387
386
|
|
388
387
|
# confirm that all special patterns are legitimate regexen
|
@@ -396,58 +395,51 @@ module List
|
|
396
395
|
end
|
397
396
|
end
|
398
397
|
|
399
|
-
|
400
|
-
|
401
|
-
end
|
402
|
-
|
403
|
-
def symbols(s)
|
398
|
+
# maps a symbol character back to the symbol object
|
399
|
+
def [](s)
|
404
400
|
special_map[s]
|
405
401
|
end
|
406
402
|
|
407
403
|
# reduce the list to a version ready for pattern generation
|
408
404
|
def normalize
|
409
405
|
rx = if specials.empty?
|
410
|
-
|
406
|
+
/(?!)/
|
411
407
|
else
|
412
408
|
Regexp.new '(' + specials.map(&:var).map(&:to_s).join('|') + ')'
|
413
409
|
end
|
414
|
-
l = r = false
|
415
410
|
list = self.list.uniq.map do |w|
|
416
|
-
parts = w.split
|
411
|
+
parts = w.split(rx).select{ |p| p.length > 0 }
|
417
412
|
e = parts.size - 1
|
418
413
|
(0..e).map do |i|
|
419
414
|
p = parts[i]
|
420
415
|
if rx === p
|
421
416
|
p = specials.detect{ |sp| sp.var === p }
|
422
|
-
special_map[p.char] = p
|
423
417
|
if engine.bound
|
424
|
-
|
425
|
-
|
426
|
-
|
418
|
+
s = p
|
419
|
+
if i == 0 && engine.left_bound && engine.word_test === p.left
|
420
|
+
s = "#{left}#{s}"
|
427
421
|
end
|
428
|
-
if i == e && engine.right_bound && p.right
|
429
|
-
|
430
|
-
r = true
|
422
|
+
if i == e && engine.right_bound && engine.word_test === p.right
|
423
|
+
s = "#{s}#{right}"
|
431
424
|
end
|
425
|
+
p = s
|
432
426
|
end
|
433
427
|
else
|
434
428
|
p = p.downcase if engine.case_insensitive
|
435
429
|
if engine.bound
|
430
|
+
s = p
|
436
431
|
if i == 0 && engine.left_bound && engine.word_test === p[0]
|
437
|
-
|
438
|
-
l = true
|
432
|
+
s = "#{left}#{s}"
|
439
433
|
end
|
440
434
|
if i == e && engine.right_bound && engine.word_test === p[-1]
|
441
|
-
|
442
|
-
r = true
|
435
|
+
s = "#{s}#{right}"
|
443
436
|
end
|
437
|
+
p = s
|
444
438
|
end
|
445
439
|
end
|
446
440
|
p
|
447
441
|
end.join
|
448
442
|
end.uniq.sort
|
449
|
-
special_map[left.char] = left if l
|
450
|
-
special_map[right.char] = right if r
|
451
443
|
list
|
452
444
|
end
|
453
445
|
end
|
@@ -522,29 +514,21 @@ module List
|
|
522
514
|
|
523
515
|
end
|
524
516
|
|
525
|
-
class
|
517
|
+
class SymbolPattern < Node
|
526
518
|
attr_accessor :char, :var, :left, :right, :pat, :symbol
|
527
|
-
def initialize(engine, char, var, pat, atomic: (var.is_a?(Regexp) && pat.nil?),
|
519
|
+
def initialize(engine, char, var, pat, atomic: (var.is_a?(Regexp) && pat.nil?), left: nil, right: nil)
|
528
520
|
super(engine, nil)
|
529
521
|
@char = char
|
530
522
|
@symbol = var.to_s
|
531
523
|
@var = var.is_a?(String) || var.is_a?(Symbol) ? Regexp.new(Regexp.quote(var.to_s)) : var
|
532
524
|
@pat = pat || var.to_s
|
533
525
|
@atomic = !!atomic
|
534
|
-
@left =
|
535
|
-
@right =
|
526
|
+
@left = left
|
527
|
+
@right = right
|
536
528
|
end
|
537
529
|
|
538
530
|
def dup
|
539
|
-
self.class.new engine, char, var, pat, atomic: atomic?,
|
540
|
-
end
|
541
|
-
|
542
|
-
def left?
|
543
|
-
@left
|
544
|
-
end
|
545
|
-
|
546
|
-
def right?
|
547
|
-
@right
|
531
|
+
self.class.new engine, char, var, pat, atomic: atomic?, left: left, right: right
|
548
532
|
end
|
549
533
|
|
550
534
|
def atomic?
|
data/lib/list_matcher/version.rb
CHANGED
data/test/basic_test.rb
CHANGED
@@ -289,4 +289,24 @@ class BasicTest < Minitest::Test
|
|
289
289
|
assert rx === ' '
|
290
290
|
assert rx === '#'
|
291
291
|
end
|
292
|
+
|
293
|
+
def test_symbol_bound
|
294
|
+
rx = List::Matcher.pattern %w(1 2 3 d), bound: :word, symbols: { d: { pattern: '\d{4}', left: '0', right: '0' } }, atomic: false
|
295
|
+
assert_equal '\b(?:[1-3]|\d{4})\b', rx
|
296
|
+
end
|
297
|
+
|
298
|
+
def test_symbol_bound_left
|
299
|
+
rx = List::Matcher.pattern %w(1 2 3 d), bound: :word_left, symbols: { d: { pattern: '\d{4}', left: '0', right: '0' } }, atomic: false
|
300
|
+
assert_equal '\b(?:[1-3]|\d{4})', rx
|
301
|
+
end
|
302
|
+
|
303
|
+
def test_symbol_bound_right
|
304
|
+
rx = List::Matcher.pattern %w(1 2 3 d), bound: :word_right, symbols: { d: { pattern: '\d{4}', left: '0', right: '0' } }, atomic: false
|
305
|
+
assert_equal '(?:[1-3]|\d{4})\b', rx
|
306
|
+
end
|
307
|
+
|
308
|
+
def test_sort_bound_word
|
309
|
+
rx = List::Matcher.pattern %w(a), bound: :word, atomic: false
|
310
|
+
assert_equal '\ba\b', rx
|
311
|
+
end
|
292
312
|
end
|
data/test/doc_test.rb
CHANGED
@@ -68,11 +68,12 @@ class DocTest < Minitest::Test
|
|
68
68
|
],
|
69
69
|
normalize_whitespace: true,
|
70
70
|
atomic: true,
|
71
|
+
bound: true,
|
71
72
|
symbols: {
|
72
|
-
year: year,
|
73
|
-
mday: mday,
|
74
|
-
wday: wday,
|
75
|
-
mo: mo
|
73
|
+
year: { pattern: year, atomic: true, left: '1', right: '1' },
|
74
|
+
mday: { pattern: mday, atomic: true, left: '1', right: '1' },
|
75
|
+
wday: { pattern: wday, atomic: true, left: 'a', right: 'a' },
|
76
|
+
mo: { pattern: mo, atomic: true, left: 'a', right: 'a' }
|
76
77
|
}
|
77
78
|
)
|
78
79
|
|
@@ -103,6 +104,12 @@ class DocTest < Minitest::Test
|
|
103
104
|
assert_equal "(?:\\Acat)", List::Matcher.pattern( %w(cat), bound: :string_left )
|
104
105
|
assert_equal "(?:cat$)", List::Matcher.pattern( %w(cat), bound: :line_right )
|
105
106
|
assert_equal "(?:\\#@%|\\bcat\\b)", List::Matcher.pattern( %w( cat #@% ), bound: :word )
|
107
|
+
rx = List::Matcher.pattern %w(dddd ddddddd),
|
108
|
+
bound: :word,
|
109
|
+
symbols: { d: { pattern: '\d', atomic: true, left: '0', right: '0' } }, # <-- this
|
110
|
+
atomic: false
|
111
|
+
assert_equal '\b\d{4}(?:\d{3})?\b', rx
|
112
|
+
|
106
113
|
end
|
107
114
|
|
108
115
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: list_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- dfhoughton
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-09-
|
11
|
+
date: 2015-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|