list_matcher 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 233f0ed367dd0fe761cff13a2998ce8310401d0c
4
- data.tar.gz: a1cbb19b367ce7aa5279d68a519c16f613462ad2
3
+ metadata.gz: 7af9157cc2a2c829220e00e56fac87000c3bf5d4
4
+ data.tar.gz: d7dee5b5521351a63b7f61a5e30dd61cb69d6071
5
5
  SHA512:
6
- metadata.gz: 8665a1df6177622c962ab021aa3a60177e8c2cd33dd6ca68a12f7cf96c3305a156ba8a91a7c6d763644b5d0b809fe4add5c373afb32c3496302dac09dac8eccd
7
- data.tar.gz: fc2f7625c101f014ea1c8151387539b2a89636fcb4d5cedfee742ba99a5da450133d617a8ca71be9ce5ff6ce02b0772e11843095d8355efd7d9b67488cc612f9
6
+ metadata.gz: 085ce4508c859a091ea44ca3c5e5a68db8b184ad37abe599723f02492870f533db63a179e3049d326690a274706fd3dc1448189cea1bcd5455894723c0448b19
7
+ data.tar.gz: 99c1e1da688674e227234d03098573a48a273f2f46022eae0f254233030b0e8de1f16d9073f47c4c9b0576031c92cf3e5989e3e3181513801b422d890555e54a
data/README.md CHANGED
@@ -32,8 +32,8 @@ puts m.pattern %w( catttttttttt ) # (?:cat{10})
32
32
  puts m.pattern %w( cat-t-t-t-t-t-t-t-t-t ) # (?:ca(?:t-){9}t)
33
33
  puts m.pattern %w( catttttttttt batttttttttt ) # (?:[bc]at{10})
34
34
  puts m.pattern %w( cad bad dad ) # (?:[b-d]ad)
35
- puts m.pattern %w( cat catalog ) # (?:cat(?:alog)?+)
36
- puts m.pattern (1..31).to_a # (?:[4-9]|1\d?+|2\d?+|3[01]?+)
35
+ puts m.pattern %w( cat catalog ) # (?:cat(?:alog)?)
36
+ puts m.pattern (1..31).to_a # (?:[4-9]|1\d?|2\d?|3[01]?)
37
37
  ```
38
38
 
39
39
  ## Description
@@ -48,8 +48,8 @@ are provided to minimize initializations and the number of times you specify opt
48
48
  class methods, either `pattern` which generates a string, or `rx`, which returns a `Regexp` object:
49
49
 
50
50
  ```ruby
51
- List::Matcher.pattern %( cat dog ) # "(?:cat|dog)"
52
- List::Matcher.rx %( cat dog ) # /(?:cat|dog)/
51
+ List::Matcher.pattern %w( cat dog ) # "(?:cat|dog)"
52
+ List::Matcher.rx %w( cat dog ) # /(?:cat|dog)/
53
53
  ```
54
54
 
55
55
  If you plan to generate multiple regexen, or have complicated options which you always use, you should generate a configured
@@ -318,6 +318,27 @@ List::Matcher.new symbols: { aw_nuts: '+++' }
318
318
  then you may want to vet your symbols. Vetting is not done by default because one assumes you've worked out
319
319
  your substitutions on your own time and we need not waste runtime checking them.
320
320
 
321
+ ### not_extended
322
+
323
+ ```ruby
324
+ default: false
325
+ ```
326
+
327
+ Under normal circumstances `List::Matcher` will escape simple space characters and `#` lest the pattern
328
+ generated be included in an *extended* regular expression where these are meta-characters. If you find this
329
+ makes the expressions unreadable or otherwise annoying, you can tell `List::Matcher` to explicitly generate
330
+ a non-extended regular expression. This may safely be included in any sort of regular expression, but it will
331
+ be wrapped with the modifier expression `(?-x:...)`.
332
+
333
+ ```ruby
334
+ List::Matcher.pattern [ 'cat and dog', '# is sometimes called the pound symbol' ]
335
+ # "(?:\\#\\ is\\ sometimes\\ called\\ the\\ pound\\ symbol|cat\\ and\\ dog)"
336
+ List::Matcher.pattern [ 'cat and dog', '# is sometimes called the pound symbol' ], not_extended: true
337
+ # "(?-x:cat and dog|# is sometimes called the pound symbol)"
338
+ ```
339
+
340
+ Note that `List::Matcher` will continue to quote other white space characters.
341
+
321
342
  ## Benchmarks
322
343
 
323
344
  Efficiency isn't the principle purpose of List::Matcher, but in almost all cases List::Matcher
data/lib/list_matcher.rb CHANGED
@@ -2,7 +2,8 @@ require "list_matcher/version"
2
2
 
3
3
  module List
4
4
  class Matcher
5
- attr_reader :atomic, :backtracking, :bound, :case_insensitive, :strip, :left_bound, :right_bound, :word_test, :normalize_whitespace, :multiline, :name, :vet
5
+ attr_reader :atomic, :backtracking, :bound, :case_insensitive, :strip, :left_bound,
6
+ :right_bound, :word_test, :normalize_whitespace, :multiline, :name, :vet, :not_extended
6
7
 
7
8
  # convenience method for one-off regexen where there's no point in keeping
8
9
  # around a pattern generator
@@ -25,6 +26,7 @@ module List
25
26
  strip: false,
26
27
  case_insensitive: false,
27
28
  multiline: false,
29
+ not_extended: false,
28
30
  normalize_whitespace: false,
29
31
  symbols: {},
30
32
  name: false,
@@ -35,6 +37,7 @@ module List
35
37
  @strip = strip || normalize_whitespace
36
38
  @case_insensitive = case_insensitive
37
39
  @multiline = multiline
40
+ @not_extended = not_extended
38
41
  @symbols = deep_dup symbols
39
42
  @_bound = bound
40
43
  @bound = !!bound
@@ -65,11 +68,16 @@ module List
65
68
  elsif !( bound === false )
66
69
  raise "unfamiliar value for :bound option: #{bound.inspect}"
67
70
  end
71
+ symbols.keys.each do |k|
72
+ raise "symbols variable #{k} is neither a string, a symbol, nor a regex" unless k.is_a?(String) || k.is_a?(Symbol) || k.is_a?(Regexp)
73
+ end
68
74
  if normalize_whitespace
69
75
  @symbols[' '] = { pattern: '\s++' }
76
+ elsif not_extended
77
+ @symbols[' '] = { pattern: ' ' }
70
78
  end
71
- symbols.keys.each do |k|
72
- raise "symbols variable #{k} is neither a string, a symbol, nor a regex" unless k.is_a?(String) || k.is_a?(Symbol) || k.is_a?(Regexp)
79
+ if not_extended
80
+ @symbols['#'] = { pattern: '#' }
73
81
  end
74
82
  if vet
75
83
  Special.new( self, @symbols, [] ).verify
@@ -85,6 +93,7 @@ module List
85
93
  strip: @strip,
86
94
  case_insensitive: @case_insensitive,
87
95
  multiline: @multiline,
96
+ not_extended: @not_extended,
88
97
  normalize_whitespace: @normalize_whitespace,
89
98
  symbols: @symbols,
90
99
  name: @name,
@@ -124,8 +133,8 @@ module List
124
133
  end
125
134
 
126
135
  def modifiers
127
- ( @modifiers ||= if case_insensitive || multiline
128
- [ ( 'i' if case_insensitive ), ( 'm' if multiline ) ].compact.join
136
+ ( @modifiers ||= if case_insensitive || multiline || not_extended
137
+ [ [ ( 'i' if case_insensitive ), ( 'm' if multiline ), ( '-x' if not_extended ) ].compact.join ]
129
138
  else
130
139
  [nil]
131
140
  end )[0]
@@ -1,3 +1,3 @@
1
1
  module ListMatcher
2
- VERSION = "1.0.1"
2
+ VERSION = "1.0.2"
3
3
  end
data/test/basic_test.rb CHANGED
@@ -245,4 +245,13 @@ class BasicTest < Minitest::Test
245
245
  List::Matcher.pattern %w(cat), symbols: { foo: '+' }, vet: true
246
246
  end
247
247
  end
248
+
249
+ def test_not_extended
250
+ m = List::Matcher.new not_extended: true
251
+ rx = m.pattern [ ' ', '#' ]
252
+ assert_equal '(?-x:#| )', rx
253
+ rx = Regexp.new rx
254
+ assert rx === ' '
255
+ assert rx === '#'
256
+ end
248
257
  end
data/test/doc_test.rb ADDED
@@ -0,0 +1,104 @@
1
+ require "minitest/autorun"
2
+
3
+ require "list_matcher"
4
+
5
+ # test to make sure all the examples in the documentation work
6
+ class DocTest < Minitest::Test
7
+ def test_all
8
+ m = List::Matcher.new
9
+ assert_equal '(?:cat|dog)', (m.pattern %w( cat dog ))
10
+ assert_equal '(?:[cr]at)', (m.pattern %w( cat rat ))
11
+ assert_equal '(?:ca(?:mel|t))', (m.pattern %w( cat camel ))
12
+ assert_equal '(?:(?:c|fl|spr)at)', (m.pattern %w( cat flat sprat ))
13
+ assert_equal '(?:cat{10})', (m.pattern %w( catttttttttt ))
14
+ assert_equal '(?:ca(?:t-){9}t)', (m.pattern %w( cat-t-t-t-t-t-t-t-t-t ))
15
+ assert_equal '(?:[bc]at{10})', (m.pattern %w( catttttttttt batttttttttt ))
16
+ assert_equal '(?:[b-d]ad)', (m.pattern %w( cad bad dad ))
17
+ assert_equal '(?:cat(?:alog)?)', (m.pattern %w( cat catalog ))
18
+ assert_equal '(?:[4-9]|1\d?|2\d?|3[01]?)', (m.pattern (1..31).to_a)
19
+ assert_equal "(?:cat|dog)", (List::Matcher.pattern %w( cat dog ))
20
+ assert_equal /(?:cat|dog)/, (List::Matcher.rx %w( cat dog ))
21
+ m = List::Matcher.new normalize_whitespace: true, bound: true, case_insensitive: true, multiline: true, atomic: false, symbols: { num: '\d++' }
22
+ m2 = m.bud case_insensitive: false
23
+ assert !m2.case_insensitive
24
+ assert_equal "cat|dog", (List::Matcher.pattern %w(cat dog), atomic: false)
25
+ assert_equal "(?:cat|dog)", (List::Matcher.pattern %w(cat dog), atomic: true)
26
+ assert_equal "(?:cat|dog)", (List::Matcher.pattern %w( cat dog ))
27
+ assert_equal "(?>cat|dog)", (List::Matcher.pattern %w( cat dog ), backtracking: false)
28
+ assert_equal "(?:\\bcat\\b)", (List::Matcher.pattern %w(cat), bound: :word)
29
+ assert_equal "(?:\\bcat\\b)", (List::Matcher.pattern %w(cat), bound: true)
30
+ assert_equal "(?:^cat$)", (List::Matcher.pattern %w(cat), bound: :line)
31
+ assert_equal "(?:\\Acat\\z)", (List::Matcher.pattern %w(cat), bound: :string)
32
+ assert_equal "(?:(?<!\\d)[1-9](?:\\d\\d?)?(?!\\d))", (List::Matcher.pattern (1...1000).to_a, bound: { test: /\d/, left: '(?<!\d)', right: '(?!\d)'})
33
+ assert_equal "(?:(?:\\ ){5}cat(?:\\ ){5})", (List::Matcher.pattern [' cat '])
34
+ assert_equal "(?:cat)", (List::Matcher.pattern [' cat '], strip: true)
35
+ assert_equal "(?:C(?:AT|at)|cat)", (List::Matcher.pattern %w( Cat cat CAT ))
36
+ assert_equal "(?i:cat)", (List::Matcher.pattern %w( Cat cat CAT ), case_insensitive: true)
37
+ assert_equal "(?m:cat)", (List::Matcher.pattern %w(cat), multiline: true)
38
+ assert_equal "(?:\\ (?:\\ dog\\ walker|cat\\ \\ walker\\ )|camel\\ \\ walker)", (List::Matcher.pattern [ ' cat walker ', ' dog walker', 'camel walker' ])
39
+ assert_equal "(?:(?:ca(?:mel|t)|dog)\\s++walker)", (List::Matcher.pattern [ ' cat walker ', ' dog walker', 'camel walker' ], normalize_whitespace: true)
40
+ assert_equal "(?:(?:(?:Catch|Fahrenheit)\\ )?\\d++)", (List::Matcher.pattern [ 'Catch 22', '1984', 'Fahrenheit 451' ], symbols: { /\d+/ => '\d++' })
41
+ assert_equal "(?:(?:(?:Catch|Fahrenheit)\\ )?\\d++)", (List::Matcher.pattern [ 'Catch foo', 'foo', 'Fahrenheit foo' ], symbols: { 'foo' => '\d++' })
42
+ assert_equal "(?:(?:(?:Catch|Fahrenheit)\\ )?\\d++)", (List::Matcher.pattern [ 'Catch foo', 'foo', 'Fahrenheit foo' ], symbols: { foo: '\d++' })
43
+ assert_equal "(?<cat>cat)", (List::Matcher.pattern %w(cat), name: :cat)
44
+
45
+ m = List::Matcher.new atomic: false, bound: true
46
+
47
+ year = m.pattern( (1901..2000).to_a, name: :year )
48
+ mday = m.pattern( (1..31).to_a, name: :mday )
49
+ weekdays = %w( Monday Tuesday Wednesday Thursday Friday Saturday Sunday )
50
+ weekdays += weekdays.map{ |w| w[0...3] }
51
+ wday = m.pattern weekdays, case_insensitive: true, name: :wday
52
+ months = %w( January February March April May June July August September October November December )
53
+ months += months.map{ |w| w[0...3] }
54
+ mo = m.pattern months, case_insensitive: true, name: :mo
55
+
56
+ date_20th_century = m.rx(
57
+ [
58
+ 'wday, mo mday',
59
+ 'wday, mo mday year',
60
+ 'mo mday, year',
61
+ 'mo year',
62
+ 'mday mo year',
63
+ 'wday',
64
+ 'year',
65
+ 'mday mo',
66
+ 'mo mday',
67
+ 'mo mday year'
68
+ ],
69
+ normalize_whitespace: true,
70
+ atomic: true,
71
+ symbols: {
72
+ year: year,
73
+ mday: mday,
74
+ wday: wday,
75
+ mo: mo
76
+ }
77
+ )
78
+
79
+ assert m = date_20th_century.match('Friday')
80
+ assert_equal 'Friday', m[:wday]
81
+ assert_nil m[:year]
82
+ assert_nil m[:mo]
83
+ assert_nil m[:mday]
84
+ assert m = date_20th_century.match('August 27')
85
+ assert_equal 'August', m[:mo]
86
+ assert_equal '27', m[:mday]
87
+ assert_nil m[:year]
88
+ assert_nil m[:wday]
89
+ assert m = date_20th_century.match('May 6, 1969')
90
+ assert_equal 'May', m[:mo]
91
+ assert_equal '6', m[:mday]
92
+ assert_equal '1969', m[:year]
93
+ assert_nil m[:wday]
94
+ assert m = date_20th_century.match('1 Jan 2000')
95
+ assert_equal '1', m[:mday]
96
+ assert_equal 'Jan', m[:mo]
97
+ assert_equal '2000', m[:year]
98
+ assert_nil m[:wday]
99
+ assert_nil date_20th_century.match('this is not actually a date')
100
+ assert_equal "(?:\\#\\ is\\ sometimes\\ called\\ the\\ pound\\ symbol|cat\\ and\\ dog)", (List::Matcher.pattern [ 'cat and dog', '# is sometimes called the pound symbol' ])
101
+ assert_equal "(?-x:cat and dog|# is sometimes called the pound symbol)", (List::Matcher.pattern [ 'cat and dog', '# is sometimes called the pound symbol' ], not_extended: true)
102
+ end
103
+
104
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: list_matcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - dfhoughton
@@ -56,6 +56,7 @@ files:
56
56
  - list_matcher.gemspec
57
57
  - test/basic_test.rb
58
58
  - test/benchmarks.rb
59
+ - test/doc_test.rb
59
60
  - test/stress.rb
60
61
  homepage: https://github.com/dfhoughton/list_matcher
61
62
  licenses:
@@ -84,4 +85,5 @@ summary: List::Matcher automates the generation of efficient regular expressions
84
85
  test_files:
85
86
  - test/basic_test.rb
86
87
  - test/benchmarks.rb
88
+ - test/doc_test.rb
87
89
  - test/stress.rb