list_matcher 1.0.5 → 1.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 34630b608a56f9a22605ff55956ed8da7793ed73
4
- data.tar.gz: 160a5e81d4c9e3bbd0ba2a516e97a0ec9048d27f
3
+ metadata.gz: 393f3b81be1503c0af7b583119e063ff2c91276a
4
+ data.tar.gz: 66a79b3b020f17b9da3fcd6a8465c8b5966088ac
5
5
  SHA512:
6
- metadata.gz: 5a1523c2c3a36424bd76ba465345466993404c4e3b3e4e531b7ca2b9ea3765e392567df809f188af3bcb2ab8f58d1fe8c21b1dae3eed1087133f9d68767b922e
7
- data.tar.gz: 917e5620134c0726880ba789db20bfe39f850ce11493d2cea17f690bbc5501ec18f5c66f72e41c8b745051e1776c341f86a8805c4e74b48db6486061c8706dc9
6
+ metadata.gz: 23e71cd30a236c5a247d2f6e12c6e9f7bd5f3adb3c1dcf2c4eff781561ed7ec1bb123a720e9161dc6dba7d597c315c0bdac403663a0bb68b2fb957863cce30ef
7
+ data.tar.gz: db2306c5f2cf514961971566446de1de8e746fe157b472f8314c6a82f24063b4d7c1bb0c7ed4e632108ce4ef253c00e1c69c3b74c4e6d5cc64530c2035c4287a
data/README.md CHANGED
@@ -192,9 +192,6 @@ List::Matcher.pattern (1...1000).to_a, bound: { test: /\d/, left: '(?<!\d)', rig
192
192
  As with the predefined boundaries -- `:word_left`, `:line_right`, `:string_left`, etc. -- you can bound items only at one
193
193
  margin, in this case by providing only the `left:` or `right:` key-value pair.
194
194
 
195
- **NOTE** Because boundary tests cannot be applied to symbols, the bound option will give you strange results if you use it
196
- with a list any of whose items have a symbol at their leading or trailing margin.
197
-
198
195
  ### strip
199
196
 
200
197
  ```ruby
@@ -271,6 +268,38 @@ List::Matcher.pattern [ 'Catch foo', 'foo', 'Fahrenheit foo' ], symbols: { foo:
271
268
  Because it is possible for symbol sequences to overlap, sequences with string or symbol keys are evaluated before `Regexps`, and longer keys are
272
269
  evaluated before shorter ones.
273
270
 
271
+ `List::Matcher` doesn't parse regex strings to determine whether they need to be grouped before any iteration suffix
272
+ can be added or to determine whether it is sensible to add boundary sequences before or after them. By default it assumes
273
+ that they need grouping if they repeat and that boundary markers don't make sense. You can override this behavior, however.
274
+ You specify the characteristics of the pattern as a hash with the following keys:
275
+
276
+ **`:pattern`**
277
+
278
+ The value is the pattern to substitute for the symbol.
279
+
280
+ **`:atomic`**
281
+
282
+ The pattern needs no grouping if the value is true.
283
+
284
+ **`:left`**
285
+
286
+ A character to test for the left boundary condition.
287
+
288
+ **`:right`**
289
+
290
+ A character to test for the right boundary condition.
291
+
292
+ For example:
293
+
294
+ ```ruby
295
+ List::Matcher.pattern %w(dddd ddddddd),
296
+ bound: :word,
297
+ symbols: { d: { pattern: '\d', atomic: true, left: '0', right: '0' } }, # <-- this
298
+ atomic: false
299
+
300
+ # \b\d{4}(?:\d{3})?\b
301
+ ```
302
+
274
303
  ### name
275
304
 
276
305
  If you assign your pattern a name, it will be constructed with a named group such that you can extract
@@ -311,11 +340,12 @@ date_20th_century = m.rx(
311
340
  ],
312
341
  normalize_whitespace: true,
313
342
  atomic: true,
343
+ bound: true,
314
344
  symbols: {
315
- year: year,
316
- mday: mday,
317
- wday: wday,
318
- mo: mo
345
+ year: { pattern: year, atomic: true, left: '1', right: '1' },
346
+ mday: { pattern: mday, atomic: true, left: '1', right: '1' },
347
+ wday: { pattern: wday, atomic: true, left: 'a', right: 'a' },
348
+ mo: { pattern: mo, atomic: true, left: 'a', right: 'a' }
319
349
  }
320
350
  )
321
351
 
data/lib/list_matcher.rb CHANGED
@@ -197,7 +197,7 @@ module List
197
197
  def tree(list, symbols)
198
198
  if list.size == 1
199
199
  leaves = list[0].chars.map do |c|
200
- symbols.symbols(c) || Leaf.new( self, c )
200
+ symbols[c] || Leaf.new( self, c )
201
201
  end
202
202
  if leaves.length == 1
203
203
  leaves.first
@@ -205,7 +205,7 @@ module List
205
205
  Sequence.new self, *leaves
206
206
  end
207
207
  elsif list.all?{ |w| w.length == 1 }
208
- chars = list.select{ |w| !symbols.symbols(w) }
208
+ chars = list.select{ |w| !symbols[w] }
209
209
  if chars.size > 1
210
210
  list -= chars
211
211
  c = CharClass.new self, chars
@@ -231,7 +231,7 @@ module List
231
231
  Sequence.new self, c1, c2
232
232
  else
233
233
  grouped = list.group_by{ |w| w[0] }
234
- chars = grouped.select{ |_, w| w.size == 1 && w[0].size == 1 && !symbols.symbols(w[0]) }.map{ |v, _| v }
234
+ chars = grouped.select{ |_, w| w.size == 1 && w[0].size == 1 && !symbols[w[0]] }.map{ |v, _| v }
235
235
  if chars.size > 1
236
236
  list -= chars
237
237
  c = CharClass.new self, chars
@@ -257,7 +257,7 @@ module List
257
257
  Hash[o.map{ |k, v| [ deep_dup(k), deep_dup(v) ] }]
258
258
  elsif o.is_a?(Array)
259
259
  o.map{ |v| deep_dup v }
260
- elsif o.nil? || o.is_a?(Symbol)
260
+ elsif o.nil? || o.is_a?(Symbol) || o.is_a?(TrueClass) || o.is_a?(FalseClass)
261
261
  o
262
262
  else
263
263
  o.dup
@@ -326,11 +326,9 @@ module List
326
326
  end
327
327
 
328
328
  class Special
329
- attr_reader :engine
329
+ attr_reader :engine, :special_map
330
330
  attr_accessor :specials, :list, :left, :right
331
331
 
332
- NULL = Regexp.new '(?!)'
333
-
334
332
  def initialize( engine, specials, list )
335
333
  @engine = engine
336
334
  @list = list
@@ -360,11 +358,11 @@ module List
360
358
  pat = opts.delete :pattern
361
359
  raise Error, "symbol #{var} requires a pattern" unless pat || var.is_a?(Regexp)
362
360
  pat ||= var.to_s
363
- SpecialPattern.new engine, c, var, pat, **opts
361
+ SymbolPattern.new engine, c, var, pat, **opts
364
362
  elsif opts.is_a? String
365
- SpecialPattern.new engine, c, var, opts
363
+ SymbolPattern.new engine, c, var, opts
366
364
  elsif var.is_a?(Regexp) && opts.nil?
367
- SpecialPattern.new engine, c, var, nil
365
+ SymbolPattern.new engine, c, var, nil
368
366
  else
369
367
  raise Error, "symbol #{var} requires a pattern"
370
368
  end
@@ -374,15 +372,16 @@ module List
374
372
  if engine.bound
375
373
  if engine.left_bound
376
374
  c = ( max += 1 ).chr
377
- @left = SpecialPattern.new engine, c, c, engine.left_bound
375
+ @left = SymbolPattern.new engine, c, c, engine.left_bound
378
376
  @specials << @left
379
377
  end
380
378
  if engine.right_bound
381
379
  c = ( max += 1 ).chr
382
- @right = SpecialPattern.new engine, c, c, engine.right_bound
380
+ @right = SymbolPattern.new engine, c, c, engine.right_bound
383
381
  @specials << @right
384
382
  end
385
383
  end
384
+ @special_map = Hash[@specials.map{ |s| [ s.char, s ] }]
386
385
  end
387
386
 
388
387
  # confirm that all special patterns are legitimate regexen
@@ -396,58 +395,51 @@ module List
396
395
  end
397
396
  end
398
397
 
399
- def special_map
400
- @special_map ||= {}
401
- end
402
-
403
- def symbols(s)
398
+ # maps a symbol character back to the symbol object
399
+ def [](s)
404
400
  special_map[s]
405
401
  end
406
402
 
407
403
  # reduce the list to a version ready for pattern generation
408
404
  def normalize
409
405
  rx = if specials.empty?
410
- NULL
406
+ /(?!)/
411
407
  else
412
408
  Regexp.new '(' + specials.map(&:var).map(&:to_s).join('|') + ')'
413
409
  end
414
- l = r = false
415
410
  list = self.list.uniq.map do |w|
416
- parts = w.split rx
411
+ parts = w.split(rx).select{ |p| p.length > 0 }
417
412
  e = parts.size - 1
418
413
  (0..e).map do |i|
419
414
  p = parts[i]
420
415
  if rx === p
421
416
  p = specials.detect{ |sp| sp.var === p }
422
- special_map[p.char] = p
423
417
  if engine.bound
424
- if i == 0 && engine.left_bound && p.left
425
- p = "#{left}#{p}" if t
426
- l = true
418
+ s = p
419
+ if i == 0 && engine.left_bound && engine.word_test === p.left
420
+ s = "#{left}#{s}"
427
421
  end
428
- if i == e && engine.right_bound && p.right
429
- p = "#{p}#{right}"
430
- r = true
422
+ if i == e && engine.right_bound && engine.word_test === p.right
423
+ s = "#{s}#{right}"
431
424
  end
425
+ p = s
432
426
  end
433
427
  else
434
428
  p = p.downcase if engine.case_insensitive
435
429
  if engine.bound
430
+ s = p
436
431
  if i == 0 && engine.left_bound && engine.word_test === p[0]
437
- p = "#{left}#{p}"
438
- l = true
432
+ s = "#{left}#{s}"
439
433
  end
440
434
  if i == e && engine.right_bound && engine.word_test === p[-1]
441
- p = "#{p}#{right}"
442
- r = true
435
+ s = "#{s}#{right}"
443
436
  end
437
+ p = s
444
438
  end
445
439
  end
446
440
  p
447
441
  end.join
448
442
  end.uniq.sort
449
- special_map[left.char] = left if l
450
- special_map[right.char] = right if r
451
443
  list
452
444
  end
453
445
  end
@@ -522,29 +514,21 @@ module List
522
514
 
523
515
  end
524
516
 
525
- class SpecialPattern < Node
517
+ class SymbolPattern < Node
526
518
  attr_accessor :char, :var, :left, :right, :pat, :symbol
527
- def initialize(engine, char, var, pat, atomic: (var.is_a?(Regexp) && pat.nil?), word_left: false, word_right: false)
519
+ def initialize(engine, char, var, pat, atomic: (var.is_a?(Regexp) && pat.nil?), left: nil, right: nil)
528
520
  super(engine, nil)
529
521
  @char = char
530
522
  @symbol = var.to_s
531
523
  @var = var.is_a?(String) || var.is_a?(Symbol) ? Regexp.new(Regexp.quote(var.to_s)) : var
532
524
  @pat = pat || var.to_s
533
525
  @atomic = !!atomic
534
- @left = !!word_left
535
- @right = !!word_right
526
+ @left = left
527
+ @right = right
536
528
  end
537
529
 
538
530
  def dup
539
- self.class.new engine, char, var, pat, atomic: atomic?, word_left: left?, word_right: right?
540
- end
541
-
542
- def left?
543
- @left
544
- end
545
-
546
- def right?
547
- @right
531
+ self.class.new engine, char, var, pat, atomic: atomic?, left: left, right: right
548
532
  end
549
533
 
550
534
  def atomic?
@@ -1,3 +1,3 @@
1
1
  module ListMatcher
2
- VERSION = "1.0.5"
2
+ VERSION = "1.0.6"
3
3
  end
data/test/basic_test.rb CHANGED
@@ -289,4 +289,24 @@ class BasicTest < Minitest::Test
289
289
  assert rx === ' '
290
290
  assert rx === '#'
291
291
  end
292
+
293
+ def test_symbol_bound
294
+ rx = List::Matcher.pattern %w(1 2 3 d), bound: :word, symbols: { d: { pattern: '\d{4}', left: '0', right: '0' } }, atomic: false
295
+ assert_equal '\b(?:[1-3]|\d{4})\b', rx
296
+ end
297
+
298
+ def test_symbol_bound_left
299
+ rx = List::Matcher.pattern %w(1 2 3 d), bound: :word_left, symbols: { d: { pattern: '\d{4}', left: '0', right: '0' } }, atomic: false
300
+ assert_equal '\b(?:[1-3]|\d{4})', rx
301
+ end
302
+
303
+ def test_symbol_bound_right
304
+ rx = List::Matcher.pattern %w(1 2 3 d), bound: :word_right, symbols: { d: { pattern: '\d{4}', left: '0', right: '0' } }, atomic: false
305
+ assert_equal '(?:[1-3]|\d{4})\b', rx
306
+ end
307
+
308
+ def test_sort_bound_word
309
+ rx = List::Matcher.pattern %w(a), bound: :word, atomic: false
310
+ assert_equal '\ba\b', rx
311
+ end
292
312
  end
data/test/doc_test.rb CHANGED
@@ -68,11 +68,12 @@ class DocTest < Minitest::Test
68
68
  ],
69
69
  normalize_whitespace: true,
70
70
  atomic: true,
71
+ bound: true,
71
72
  symbols: {
72
- year: year,
73
- mday: mday,
74
- wday: wday,
75
- mo: mo
73
+ year: { pattern: year, atomic: true, left: '1', right: '1' },
74
+ mday: { pattern: mday, atomic: true, left: '1', right: '1' },
75
+ wday: { pattern: wday, atomic: true, left: 'a', right: 'a' },
76
+ mo: { pattern: mo, atomic: true, left: 'a', right: 'a' }
76
77
  }
77
78
  )
78
79
 
@@ -103,6 +104,12 @@ class DocTest < Minitest::Test
103
104
  assert_equal "(?:\\Acat)", List::Matcher.pattern( %w(cat), bound: :string_left )
104
105
  assert_equal "(?:cat$)", List::Matcher.pattern( %w(cat), bound: :line_right )
105
106
  assert_equal "(?:\\#@%|\\bcat\\b)", List::Matcher.pattern( %w( cat #@% ), bound: :word )
107
+ rx = List::Matcher.pattern %w(dddd ddddddd),
108
+ bound: :word,
109
+ symbols: { d: { pattern: '\d', atomic: true, left: '0', right: '0' } }, # <-- this
110
+ atomic: false
111
+ assert_equal '\b\d{4}(?:\d{3})?\b', rx
112
+
106
113
  end
107
114
 
108
115
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: list_matcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - dfhoughton
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-02 00:00:00.000000000 Z
11
+ date: 2015-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler