list_matcher 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +37 -7
- data/lib/list_matcher.rb +30 -46
- data/lib/list_matcher/version.rb +1 -1
- data/test/basic_test.rb +20 -0
- data/test/doc_test.rb +11 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 393f3b81be1503c0af7b583119e063ff2c91276a
|
4
|
+
data.tar.gz: 66a79b3b020f17b9da3fcd6a8465c8b5966088ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23e71cd30a236c5a247d2f6e12c6e9f7bd5f3adb3c1dcf2c4eff781561ed7ec1bb123a720e9161dc6dba7d597c315c0bdac403663a0bb68b2fb957863cce30ef
|
7
|
+
data.tar.gz: db2306c5f2cf514961971566446de1de8e746fe157b472f8314c6a82f24063b4d7c1bb0c7ed4e632108ce4ef253c00e1c69c3b74c4e6d5cc64530c2035c4287a
|
data/README.md
CHANGED
@@ -192,9 +192,6 @@ List::Matcher.pattern (1...1000).to_a, bound: { test: /\d/, left: '(?<!\d)', rig
|
|
192
192
|
As with the predefined boundaries -- `:word_left`, `:line_right`, `:string_left`, etc. -- you can bound items only at one
|
193
193
|
margin, in this case by providing only the `left:` or `right:` key-value pair.
|
194
194
|
|
195
|
-
**NOTE** Because boundary tests cannot be applied to symbols, the bound option will give you strange results if you use it
|
196
|
-
with a list any of whose items have a symbol at their leading or trailing margin.
|
197
|
-
|
198
195
|
### strip
|
199
196
|
|
200
197
|
```ruby
|
@@ -271,6 +268,38 @@ List::Matcher.pattern [ 'Catch foo', 'foo', 'Fahrenheit foo' ], symbols: { foo:
|
|
271
268
|
Because it is possible for symbol sequences to overlap, sequences with string or symbol keys are evaluated before `Regexps`, and longer keys are
|
272
269
|
evaluated before shorter ones.
|
273
270
|
|
271
|
+
`List::Matcher` doesn't parse regex strings to determine whether they need to be grouped before any iteration suffix
|
272
|
+
can be added or to determine whether it is sensible to add boundary sequences before or after them. By default it assumes
|
273
|
+
that they need grouping if they repeat and that boundary markers don't make sense. You can override this behavior, however.
|
274
|
+
You specify the characteristics of the pattern as a hash with the following keys:
|
275
|
+
|
276
|
+
**`:pattern`**
|
277
|
+
|
278
|
+
The value is the pattern to substitute for the symbol.
|
279
|
+
|
280
|
+
**`:atomic`**
|
281
|
+
|
282
|
+
The pattern needs no grouping if the value is true.
|
283
|
+
|
284
|
+
**`:left`**
|
285
|
+
|
286
|
+
A character to test for the left boundary condition.
|
287
|
+
|
288
|
+
**`:right`**
|
289
|
+
|
290
|
+
A character to test for the right boundary condition.
|
291
|
+
|
292
|
+
For example:
|
293
|
+
|
294
|
+
```ruby
|
295
|
+
List::Matcher.pattern %w(dddd ddddddd),
|
296
|
+
bound: :word,
|
297
|
+
symbols: { d: { pattern: '\d', atomic: true, left: '0', right: '0' } }, # <-- this
|
298
|
+
atomic: false
|
299
|
+
|
300
|
+
# \b\d{4}(?:\d{3})?\b
|
301
|
+
```
|
302
|
+
|
274
303
|
### name
|
275
304
|
|
276
305
|
If you assign your pattern a name, it will be constructed with a named group such that you can extract
|
@@ -311,11 +340,12 @@ date_20th_century = m.rx(
|
|
311
340
|
],
|
312
341
|
normalize_whitespace: true,
|
313
342
|
atomic: true,
|
343
|
+
bound: true,
|
314
344
|
symbols: {
|
315
|
-
year: year,
|
316
|
-
mday: mday,
|
317
|
-
wday: wday,
|
318
|
-
mo: mo
|
345
|
+
year: { pattern: year, atomic: true, left: '1', right: '1' },
|
346
|
+
mday: { pattern: mday, atomic: true, left: '1', right: '1' },
|
347
|
+
wday: { pattern: wday, atomic: true, left: 'a', right: 'a' },
|
348
|
+
mo: { pattern: mo, atomic: true, left: 'a', right: 'a' }
|
319
349
|
}
|
320
350
|
)
|
321
351
|
|
data/lib/list_matcher.rb
CHANGED
@@ -197,7 +197,7 @@ module List
|
|
197
197
|
def tree(list, symbols)
|
198
198
|
if list.size == 1
|
199
199
|
leaves = list[0].chars.map do |c|
|
200
|
-
symbols
|
200
|
+
symbols[c] || Leaf.new( self, c )
|
201
201
|
end
|
202
202
|
if leaves.length == 1
|
203
203
|
leaves.first
|
@@ -205,7 +205,7 @@ module List
|
|
205
205
|
Sequence.new self, *leaves
|
206
206
|
end
|
207
207
|
elsif list.all?{ |w| w.length == 1 }
|
208
|
-
chars = list.select{ |w| !symbols
|
208
|
+
chars = list.select{ |w| !symbols[w] }
|
209
209
|
if chars.size > 1
|
210
210
|
list -= chars
|
211
211
|
c = CharClass.new self, chars
|
@@ -231,7 +231,7 @@ module List
|
|
231
231
|
Sequence.new self, c1, c2
|
232
232
|
else
|
233
233
|
grouped = list.group_by{ |w| w[0] }
|
234
|
-
chars = grouped.select{ |_, w| w.size == 1 && w[0].size == 1 && !symbols
|
234
|
+
chars = grouped.select{ |_, w| w.size == 1 && w[0].size == 1 && !symbols[w[0]] }.map{ |v, _| v }
|
235
235
|
if chars.size > 1
|
236
236
|
list -= chars
|
237
237
|
c = CharClass.new self, chars
|
@@ -257,7 +257,7 @@ module List
|
|
257
257
|
Hash[o.map{ |k, v| [ deep_dup(k), deep_dup(v) ] }]
|
258
258
|
elsif o.is_a?(Array)
|
259
259
|
o.map{ |v| deep_dup v }
|
260
|
-
elsif o.nil? || o.is_a?(Symbol)
|
260
|
+
elsif o.nil? || o.is_a?(Symbol) || o.is_a?(TrueClass) || o.is_a?(FalseClass)
|
261
261
|
o
|
262
262
|
else
|
263
263
|
o.dup
|
@@ -326,11 +326,9 @@ module List
|
|
326
326
|
end
|
327
327
|
|
328
328
|
class Special
|
329
|
-
attr_reader :engine
|
329
|
+
attr_reader :engine, :special_map
|
330
330
|
attr_accessor :specials, :list, :left, :right
|
331
331
|
|
332
|
-
NULL = Regexp.new '(?!)'
|
333
|
-
|
334
332
|
def initialize( engine, specials, list )
|
335
333
|
@engine = engine
|
336
334
|
@list = list
|
@@ -360,11 +358,11 @@ module List
|
|
360
358
|
pat = opts.delete :pattern
|
361
359
|
raise Error, "symbol #{var} requires a pattern" unless pat || var.is_a?(Regexp)
|
362
360
|
pat ||= var.to_s
|
363
|
-
|
361
|
+
SymbolPattern.new engine, c, var, pat, **opts
|
364
362
|
elsif opts.is_a? String
|
365
|
-
|
363
|
+
SymbolPattern.new engine, c, var, opts
|
366
364
|
elsif var.is_a?(Regexp) && opts.nil?
|
367
|
-
|
365
|
+
SymbolPattern.new engine, c, var, nil
|
368
366
|
else
|
369
367
|
raise Error, "symbol #{var} requires a pattern"
|
370
368
|
end
|
@@ -374,15 +372,16 @@ module List
|
|
374
372
|
if engine.bound
|
375
373
|
if engine.left_bound
|
376
374
|
c = ( max += 1 ).chr
|
377
|
-
@left =
|
375
|
+
@left = SymbolPattern.new engine, c, c, engine.left_bound
|
378
376
|
@specials << @left
|
379
377
|
end
|
380
378
|
if engine.right_bound
|
381
379
|
c = ( max += 1 ).chr
|
382
|
-
@right =
|
380
|
+
@right = SymbolPattern.new engine, c, c, engine.right_bound
|
383
381
|
@specials << @right
|
384
382
|
end
|
385
383
|
end
|
384
|
+
@special_map = Hash[@specials.map{ |s| [ s.char, s ] }]
|
386
385
|
end
|
387
386
|
|
388
387
|
# confirm that all special patterns are legitimate regexen
|
@@ -396,58 +395,51 @@ module List
|
|
396
395
|
end
|
397
396
|
end
|
398
397
|
|
399
|
-
|
400
|
-
|
401
|
-
end
|
402
|
-
|
403
|
-
def symbols(s)
|
398
|
+
# maps a symbol character back to the symbol object
|
399
|
+
def [](s)
|
404
400
|
special_map[s]
|
405
401
|
end
|
406
402
|
|
407
403
|
# reduce the list to a version ready for pattern generation
|
408
404
|
def normalize
|
409
405
|
rx = if specials.empty?
|
410
|
-
|
406
|
+
/(?!)/
|
411
407
|
else
|
412
408
|
Regexp.new '(' + specials.map(&:var).map(&:to_s).join('|') + ')'
|
413
409
|
end
|
414
|
-
l = r = false
|
415
410
|
list = self.list.uniq.map do |w|
|
416
|
-
parts = w.split
|
411
|
+
parts = w.split(rx).select{ |p| p.length > 0 }
|
417
412
|
e = parts.size - 1
|
418
413
|
(0..e).map do |i|
|
419
414
|
p = parts[i]
|
420
415
|
if rx === p
|
421
416
|
p = specials.detect{ |sp| sp.var === p }
|
422
|
-
special_map[p.char] = p
|
423
417
|
if engine.bound
|
424
|
-
|
425
|
-
|
426
|
-
|
418
|
+
s = p
|
419
|
+
if i == 0 && engine.left_bound && engine.word_test === p.left
|
420
|
+
s = "#{left}#{s}"
|
427
421
|
end
|
428
|
-
if i == e && engine.right_bound && p.right
|
429
|
-
|
430
|
-
r = true
|
422
|
+
if i == e && engine.right_bound && engine.word_test === p.right
|
423
|
+
s = "#{s}#{right}"
|
431
424
|
end
|
425
|
+
p = s
|
432
426
|
end
|
433
427
|
else
|
434
428
|
p = p.downcase if engine.case_insensitive
|
435
429
|
if engine.bound
|
430
|
+
s = p
|
436
431
|
if i == 0 && engine.left_bound && engine.word_test === p[0]
|
437
|
-
|
438
|
-
l = true
|
432
|
+
s = "#{left}#{s}"
|
439
433
|
end
|
440
434
|
if i == e && engine.right_bound && engine.word_test === p[-1]
|
441
|
-
|
442
|
-
r = true
|
435
|
+
s = "#{s}#{right}"
|
443
436
|
end
|
437
|
+
p = s
|
444
438
|
end
|
445
439
|
end
|
446
440
|
p
|
447
441
|
end.join
|
448
442
|
end.uniq.sort
|
449
|
-
special_map[left.char] = left if l
|
450
|
-
special_map[right.char] = right if r
|
451
443
|
list
|
452
444
|
end
|
453
445
|
end
|
@@ -522,29 +514,21 @@ module List
|
|
522
514
|
|
523
515
|
end
|
524
516
|
|
525
|
-
class
|
517
|
+
class SymbolPattern < Node
|
526
518
|
attr_accessor :char, :var, :left, :right, :pat, :symbol
|
527
|
-
def initialize(engine, char, var, pat, atomic: (var.is_a?(Regexp) && pat.nil?),
|
519
|
+
def initialize(engine, char, var, pat, atomic: (var.is_a?(Regexp) && pat.nil?), left: nil, right: nil)
|
528
520
|
super(engine, nil)
|
529
521
|
@char = char
|
530
522
|
@symbol = var.to_s
|
531
523
|
@var = var.is_a?(String) || var.is_a?(Symbol) ? Regexp.new(Regexp.quote(var.to_s)) : var
|
532
524
|
@pat = pat || var.to_s
|
533
525
|
@atomic = !!atomic
|
534
|
-
@left =
|
535
|
-
@right =
|
526
|
+
@left = left
|
527
|
+
@right = right
|
536
528
|
end
|
537
529
|
|
538
530
|
def dup
|
539
|
-
self.class.new engine, char, var, pat, atomic: atomic?,
|
540
|
-
end
|
541
|
-
|
542
|
-
def left?
|
543
|
-
@left
|
544
|
-
end
|
545
|
-
|
546
|
-
def right?
|
547
|
-
@right
|
531
|
+
self.class.new engine, char, var, pat, atomic: atomic?, left: left, right: right
|
548
532
|
end
|
549
533
|
|
550
534
|
def atomic?
|
data/lib/list_matcher/version.rb
CHANGED
data/test/basic_test.rb
CHANGED
@@ -289,4 +289,24 @@ class BasicTest < Minitest::Test
|
|
289
289
|
assert rx === ' '
|
290
290
|
assert rx === '#'
|
291
291
|
end
|
292
|
+
|
293
|
+
def test_symbol_bound
|
294
|
+
rx = List::Matcher.pattern %w(1 2 3 d), bound: :word, symbols: { d: { pattern: '\d{4}', left: '0', right: '0' } }, atomic: false
|
295
|
+
assert_equal '\b(?:[1-3]|\d{4})\b', rx
|
296
|
+
end
|
297
|
+
|
298
|
+
def test_symbol_bound_left
|
299
|
+
rx = List::Matcher.pattern %w(1 2 3 d), bound: :word_left, symbols: { d: { pattern: '\d{4}', left: '0', right: '0' } }, atomic: false
|
300
|
+
assert_equal '\b(?:[1-3]|\d{4})', rx
|
301
|
+
end
|
302
|
+
|
303
|
+
def test_symbol_bound_right
|
304
|
+
rx = List::Matcher.pattern %w(1 2 3 d), bound: :word_right, symbols: { d: { pattern: '\d{4}', left: '0', right: '0' } }, atomic: false
|
305
|
+
assert_equal '(?:[1-3]|\d{4})\b', rx
|
306
|
+
end
|
307
|
+
|
308
|
+
def test_sort_bound_word
|
309
|
+
rx = List::Matcher.pattern %w(a), bound: :word, atomic: false
|
310
|
+
assert_equal '\ba\b', rx
|
311
|
+
end
|
292
312
|
end
|
data/test/doc_test.rb
CHANGED
@@ -68,11 +68,12 @@ class DocTest < Minitest::Test
|
|
68
68
|
],
|
69
69
|
normalize_whitespace: true,
|
70
70
|
atomic: true,
|
71
|
+
bound: true,
|
71
72
|
symbols: {
|
72
|
-
year: year,
|
73
|
-
mday: mday,
|
74
|
-
wday: wday,
|
75
|
-
mo: mo
|
73
|
+
year: { pattern: year, atomic: true, left: '1', right: '1' },
|
74
|
+
mday: { pattern: mday, atomic: true, left: '1', right: '1' },
|
75
|
+
wday: { pattern: wday, atomic: true, left: 'a', right: 'a' },
|
76
|
+
mo: { pattern: mo, atomic: true, left: 'a', right: 'a' }
|
76
77
|
}
|
77
78
|
)
|
78
79
|
|
@@ -103,6 +104,12 @@ class DocTest < Minitest::Test
|
|
103
104
|
assert_equal "(?:\\Acat)", List::Matcher.pattern( %w(cat), bound: :string_left )
|
104
105
|
assert_equal "(?:cat$)", List::Matcher.pattern( %w(cat), bound: :line_right )
|
105
106
|
assert_equal "(?:\\#@%|\\bcat\\b)", List::Matcher.pattern( %w( cat #@% ), bound: :word )
|
107
|
+
rx = List::Matcher.pattern %w(dddd ddddddd),
|
108
|
+
bound: :word,
|
109
|
+
symbols: { d: { pattern: '\d', atomic: true, left: '0', right: '0' } }, # <-- this
|
110
|
+
atomic: false
|
111
|
+
assert_equal '\b\d{4}(?:\d{3})?\b', rx
|
112
|
+
|
106
113
|
end
|
107
114
|
|
108
115
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: list_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- dfhoughton
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-09-
|
11
|
+
date: 2015-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|