list_matcher 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 34630b608a56f9a22605ff55956ed8da7793ed73
4
- data.tar.gz: 160a5e81d4c9e3bbd0ba2a516e97a0ec9048d27f
3
+ metadata.gz: 393f3b81be1503c0af7b583119e063ff2c91276a
4
+ data.tar.gz: 66a79b3b020f17b9da3fcd6a8465c8b5966088ac
5
5
  SHA512:
6
- metadata.gz: 5a1523c2c3a36424bd76ba465345466993404c4e3b3e4e531b7ca2b9ea3765e392567df809f188af3bcb2ab8f58d1fe8c21b1dae3eed1087133f9d68767b922e
7
- data.tar.gz: 917e5620134c0726880ba789db20bfe39f850ce11493d2cea17f690bbc5501ec18f5c66f72e41c8b745051e1776c341f86a8805c4e74b48db6486061c8706dc9
6
+ metadata.gz: 23e71cd30a236c5a247d2f6e12c6e9f7bd5f3adb3c1dcf2c4eff781561ed7ec1bb123a720e9161dc6dba7d597c315c0bdac403663a0bb68b2fb957863cce30ef
7
+ data.tar.gz: db2306c5f2cf514961971566446de1de8e746fe157b472f8314c6a82f24063b4d7c1bb0c7ed4e632108ce4ef253c00e1c69c3b74c4e6d5cc64530c2035c4287a
data/README.md CHANGED
@@ -192,9 +192,6 @@ List::Matcher.pattern (1...1000).to_a, bound: { test: /\d/, left: '(?<!\d)', rig
192
192
  As with the predefined boundaries -- `:word_left`, `:line_right`, `:string_left`, etc. -- you can bound items only at one
193
193
  margin, in this case by providing only the `left:` or `right:` key-value pair.
194
194
 
195
- **NOTE** Because boundary tests cannot be applied to symbols, the bound option will give you strange results if you use it
196
- with a list any of whose items have a symbol at their leading or trailing margin.
197
-
198
195
  ### strip
199
196
 
200
197
  ```ruby
@@ -271,6 +268,38 @@ List::Matcher.pattern [ 'Catch foo', 'foo', 'Fahrenheit foo' ], symbols: { foo:
271
268
  Because it is possible for symbol sequences to overlap, sequences with string or symbol keys are evaluated before `Regexps`, and longer keys are
272
269
  evaluated before shorter ones.
273
270
 
271
+ `List::Matcher` doesn't parse regex strings to determine whether they need to be grouped before any iteration suffix
272
+ can be added or to determine whether it is sensible to add boundary sequences before or after them. By default it assumes
273
+ that they need grouping if they repeat and that boundary markers don't make sense. You can override this behavior, however.
274
+ You specify the characteristics of the pattern as a hash with the following keys:
275
+
276
+ **`:pattern`**
277
+
278
+ The value is the pattern to substitute for the symbol.
279
+
280
+ **`:atomic`**
281
+
282
+ The pattern needs no grouping if the value is true.
283
+
284
+ **`:left`**
285
+
286
+ A character to test for the left boundary condition.
287
+
288
+ **`:right`**
289
+
290
+ A character to test for the right boundary condition.
291
+
292
+ For example:
293
+
294
+ ```ruby
295
+ List::Matcher.pattern %w(dddd ddddddd),
296
+ bound: :word,
297
+ symbols: { d: { pattern: '\d', atomic: true, left: '0', right: '0' } }, # <-- this
298
+ atomic: false
299
+
300
+ # \b\d{4}(?:\d{3})?\b
301
+ ```
302
+
274
303
  ### name
275
304
 
276
305
  If you assign your pattern a name, it will be constructed with a named group such that you can extract
@@ -311,11 +340,12 @@ date_20th_century = m.rx(
311
340
  ],
312
341
  normalize_whitespace: true,
313
342
  atomic: true,
343
+ bound: true,
314
344
  symbols: {
315
- year: year,
316
- mday: mday,
317
- wday: wday,
318
- mo: mo
345
+ year: { pattern: year, atomic: true, left: '1', right: '1' },
346
+ mday: { pattern: mday, atomic: true, left: '1', right: '1' },
347
+ wday: { pattern: wday, atomic: true, left: 'a', right: 'a' },
348
+ mo: { pattern: mo, atomic: true, left: 'a', right: 'a' }
319
349
  }
320
350
  )
321
351
 
data/lib/list_matcher.rb CHANGED
@@ -197,7 +197,7 @@ module List
197
197
  def tree(list, symbols)
198
198
  if list.size == 1
199
199
  leaves = list[0].chars.map do |c|
200
- symbols.symbols(c) || Leaf.new( self, c )
200
+ symbols[c] || Leaf.new( self, c )
201
201
  end
202
202
  if leaves.length == 1
203
203
  leaves.first
@@ -205,7 +205,7 @@ module List
205
205
  Sequence.new self, *leaves
206
206
  end
207
207
  elsif list.all?{ |w| w.length == 1 }
208
- chars = list.select{ |w| !symbols.symbols(w) }
208
+ chars = list.select{ |w| !symbols[w] }
209
209
  if chars.size > 1
210
210
  list -= chars
211
211
  c = CharClass.new self, chars
@@ -231,7 +231,7 @@ module List
231
231
  Sequence.new self, c1, c2
232
232
  else
233
233
  grouped = list.group_by{ |w| w[0] }
234
- chars = grouped.select{ |_, w| w.size == 1 && w[0].size == 1 && !symbols.symbols(w[0]) }.map{ |v, _| v }
234
+ chars = grouped.select{ |_, w| w.size == 1 && w[0].size == 1 && !symbols[w[0]] }.map{ |v, _| v }
235
235
  if chars.size > 1
236
236
  list -= chars
237
237
  c = CharClass.new self, chars
@@ -257,7 +257,7 @@ module List
257
257
  Hash[o.map{ |k, v| [ deep_dup(k), deep_dup(v) ] }]
258
258
  elsif o.is_a?(Array)
259
259
  o.map{ |v| deep_dup v }
260
- elsif o.nil? || o.is_a?(Symbol)
260
+ elsif o.nil? || o.is_a?(Symbol) || o.is_a?(TrueClass) || o.is_a?(FalseClass)
261
261
  o
262
262
  else
263
263
  o.dup
@@ -326,11 +326,9 @@ module List
326
326
  end
327
327
 
328
328
  class Special
329
- attr_reader :engine
329
+ attr_reader :engine, :special_map
330
330
  attr_accessor :specials, :list, :left, :right
331
331
 
332
- NULL = Regexp.new '(?!)'
333
-
334
332
  def initialize( engine, specials, list )
335
333
  @engine = engine
336
334
  @list = list
@@ -360,11 +358,11 @@ module List
360
358
  pat = opts.delete :pattern
361
359
  raise Error, "symbol #{var} requires a pattern" unless pat || var.is_a?(Regexp)
362
360
  pat ||= var.to_s
363
- SpecialPattern.new engine, c, var, pat, **opts
361
+ SymbolPattern.new engine, c, var, pat, **opts
364
362
  elsif opts.is_a? String
365
- SpecialPattern.new engine, c, var, opts
363
+ SymbolPattern.new engine, c, var, opts
366
364
  elsif var.is_a?(Regexp) && opts.nil?
367
- SpecialPattern.new engine, c, var, nil
365
+ SymbolPattern.new engine, c, var, nil
368
366
  else
369
367
  raise Error, "symbol #{var} requires a pattern"
370
368
  end
@@ -374,15 +372,16 @@ module List
374
372
  if engine.bound
375
373
  if engine.left_bound
376
374
  c = ( max += 1 ).chr
377
- @left = SpecialPattern.new engine, c, c, engine.left_bound
375
+ @left = SymbolPattern.new engine, c, c, engine.left_bound
378
376
  @specials << @left
379
377
  end
380
378
  if engine.right_bound
381
379
  c = ( max += 1 ).chr
382
- @right = SpecialPattern.new engine, c, c, engine.right_bound
380
+ @right = SymbolPattern.new engine, c, c, engine.right_bound
383
381
  @specials << @right
384
382
  end
385
383
  end
384
+ @special_map = Hash[@specials.map{ |s| [ s.char, s ] }]
386
385
  end
387
386
 
388
387
  # confirm that all special patterns are legitimate regexen
@@ -396,58 +395,51 @@ module List
396
395
  end
397
396
  end
398
397
 
399
- def special_map
400
- @special_map ||= {}
401
- end
402
-
403
- def symbols(s)
398
+ # maps a symbol character back to the symbol object
399
+ def [](s)
404
400
  special_map[s]
405
401
  end
406
402
 
407
403
  # reduce the list to a version ready for pattern generation
408
404
  def normalize
409
405
  rx = if specials.empty?
410
- NULL
406
+ /(?!)/
411
407
  else
412
408
  Regexp.new '(' + specials.map(&:var).map(&:to_s).join('|') + ')'
413
409
  end
414
- l = r = false
415
410
  list = self.list.uniq.map do |w|
416
- parts = w.split rx
411
+ parts = w.split(rx).select{ |p| p.length > 0 }
417
412
  e = parts.size - 1
418
413
  (0..e).map do |i|
419
414
  p = parts[i]
420
415
  if rx === p
421
416
  p = specials.detect{ |sp| sp.var === p }
422
- special_map[p.char] = p
423
417
  if engine.bound
424
- if i == 0 && engine.left_bound && p.left
425
- p = "#{left}#{p}" if t
426
- l = true
418
+ s = p
419
+ if i == 0 && engine.left_bound && engine.word_test === p.left
420
+ s = "#{left}#{s}"
427
421
  end
428
- if i == e && engine.right_bound && p.right
429
- p = "#{p}#{right}"
430
- r = true
422
+ if i == e && engine.right_bound && engine.word_test === p.right
423
+ s = "#{s}#{right}"
431
424
  end
425
+ p = s
432
426
  end
433
427
  else
434
428
  p = p.downcase if engine.case_insensitive
435
429
  if engine.bound
430
+ s = p
436
431
  if i == 0 && engine.left_bound && engine.word_test === p[0]
437
- p = "#{left}#{p}"
438
- l = true
432
+ s = "#{left}#{s}"
439
433
  end
440
434
  if i == e && engine.right_bound && engine.word_test === p[-1]
441
- p = "#{p}#{right}"
442
- r = true
435
+ s = "#{s}#{right}"
443
436
  end
437
+ p = s
444
438
  end
445
439
  end
446
440
  p
447
441
  end.join
448
442
  end.uniq.sort
449
- special_map[left.char] = left if l
450
- special_map[right.char] = right if r
451
443
  list
452
444
  end
453
445
  end
@@ -522,29 +514,21 @@ module List
522
514
 
523
515
  end
524
516
 
525
- class SpecialPattern < Node
517
+ class SymbolPattern < Node
526
518
  attr_accessor :char, :var, :left, :right, :pat, :symbol
527
- def initialize(engine, char, var, pat, atomic: (var.is_a?(Regexp) && pat.nil?), word_left: false, word_right: false)
519
+ def initialize(engine, char, var, pat, atomic: (var.is_a?(Regexp) && pat.nil?), left: nil, right: nil)
528
520
  super(engine, nil)
529
521
  @char = char
530
522
  @symbol = var.to_s
531
523
  @var = var.is_a?(String) || var.is_a?(Symbol) ? Regexp.new(Regexp.quote(var.to_s)) : var
532
524
  @pat = pat || var.to_s
533
525
  @atomic = !!atomic
534
- @left = !!word_left
535
- @right = !!word_right
526
+ @left = left
527
+ @right = right
536
528
  end
537
529
 
538
530
  def dup
539
- self.class.new engine, char, var, pat, atomic: atomic?, word_left: left?, word_right: right?
540
- end
541
-
542
- def left?
543
- @left
544
- end
545
-
546
- def right?
547
- @right
531
+ self.class.new engine, char, var, pat, atomic: atomic?, left: left, right: right
548
532
  end
549
533
 
550
534
  def atomic?
@@ -1,3 +1,3 @@
1
1
  module ListMatcher
2
- VERSION = "1.0.5"
2
+ VERSION = "1.0.6"
3
3
  end
data/test/basic_test.rb CHANGED
@@ -289,4 +289,24 @@ class BasicTest < Minitest::Test
289
289
  assert rx === ' '
290
290
  assert rx === '#'
291
291
  end
292
+
293
+ def test_symbol_bound
294
+ rx = List::Matcher.pattern %w(1 2 3 d), bound: :word, symbols: { d: { pattern: '\d{4}', left: '0', right: '0' } }, atomic: false
295
+ assert_equal '\b(?:[1-3]|\d{4})\b', rx
296
+ end
297
+
298
+ def test_symbol_bound_left
299
+ rx = List::Matcher.pattern %w(1 2 3 d), bound: :word_left, symbols: { d: { pattern: '\d{4}', left: '0', right: '0' } }, atomic: false
300
+ assert_equal '\b(?:[1-3]|\d{4})', rx
301
+ end
302
+
303
+ def test_symbol_bound_right
304
+ rx = List::Matcher.pattern %w(1 2 3 d), bound: :word_right, symbols: { d: { pattern: '\d{4}', left: '0', right: '0' } }, atomic: false
305
+ assert_equal '(?:[1-3]|\d{4})\b', rx
306
+ end
307
+
308
+ def test_sort_bound_word
309
+ rx = List::Matcher.pattern %w(a), bound: :word, atomic: false
310
+ assert_equal '\ba\b', rx
311
+ end
292
312
  end
data/test/doc_test.rb CHANGED
@@ -68,11 +68,12 @@ class DocTest < Minitest::Test
68
68
  ],
69
69
  normalize_whitespace: true,
70
70
  atomic: true,
71
+ bound: true,
71
72
  symbols: {
72
- year: year,
73
- mday: mday,
74
- wday: wday,
75
- mo: mo
73
+ year: { pattern: year, atomic: true, left: '1', right: '1' },
74
+ mday: { pattern: mday, atomic: true, left: '1', right: '1' },
75
+ wday: { pattern: wday, atomic: true, left: 'a', right: 'a' },
76
+ mo: { pattern: mo, atomic: true, left: 'a', right: 'a' }
76
77
  }
77
78
  )
78
79
 
@@ -103,6 +104,12 @@ class DocTest < Minitest::Test
103
104
  assert_equal "(?:\\Acat)", List::Matcher.pattern( %w(cat), bound: :string_left )
104
105
  assert_equal "(?:cat$)", List::Matcher.pattern( %w(cat), bound: :line_right )
105
106
  assert_equal "(?:\\#@%|\\bcat\\b)", List::Matcher.pattern( %w( cat #@% ), bound: :word )
107
+ rx = List::Matcher.pattern %w(dddd ddddddd),
108
+ bound: :word,
109
+ symbols: { d: { pattern: '\d', atomic: true, left: '0', right: '0' } }, # <-- this
110
+ atomic: false
111
+ assert_equal '\b\d{4}(?:\d{3})?\b', rx
112
+
106
113
  end
107
114
 
108
115
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: list_matcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - dfhoughton
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-02 00:00:00.000000000 Z
11
+ date: 2015-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler