list_matcher 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +25 -4
- data/lib/list_matcher.rb +14 -5
- data/lib/list_matcher/version.rb +1 -1
- data/test/basic_test.rb +9 -0
- data/test/doc_test.rb +104 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7af9157cc2a2c829220e00e56fac87000c3bf5d4
|
4
|
+
data.tar.gz: d7dee5b5521351a63b7f61a5e30dd61cb69d6071
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 085ce4508c859a091ea44ca3c5e5a68db8b184ad37abe599723f02492870f533db63a179e3049d326690a274706fd3dc1448189cea1bcd5455894723c0448b19
|
7
|
+
data.tar.gz: 99c1e1da688674e227234d03098573a48a273f2f46022eae0f254233030b0e8de1f16d9073f47c4c9b0576031c92cf3e5989e3e3181513801b422d890555e54a
|
data/README.md
CHANGED
@@ -32,8 +32,8 @@ puts m.pattern %w( catttttttttt ) # (?:cat{10})
|
|
32
32
|
puts m.pattern %w( cat-t-t-t-t-t-t-t-t-t ) # (?:ca(?:t-){9}t)
|
33
33
|
puts m.pattern %w( catttttttttt batttttttttt ) # (?:[bc]at{10})
|
34
34
|
puts m.pattern %w( cad bad dad ) # (?:[b-d]ad)
|
35
|
-
puts m.pattern %w( cat catalog ) # (?:cat(?:alog)
|
36
|
-
puts m.pattern (1..31).to_a # (?:[4-9]|1\d
|
35
|
+
puts m.pattern %w( cat catalog ) # (?:cat(?:alog)?)
|
36
|
+
puts m.pattern (1..31).to_a # (?:[4-9]|1\d?|2\d?|3[01]?)
|
37
37
|
```
|
38
38
|
|
39
39
|
## Description
|
@@ -48,8 +48,8 @@ are provided to minimize initializations and the number of times you specify opt
|
|
48
48
|
class methods, either `pattern` which generates a string, or `rx`, which returns a `Regexp` object:
|
49
49
|
|
50
50
|
```ruby
|
51
|
-
List::Matcher.pattern %( cat dog ) # "(?:cat|dog)"
|
52
|
-
List::Matcher.rx %( cat dog ) # /(?:cat|dog)/
|
51
|
+
List::Matcher.pattern %w( cat dog ) # "(?:cat|dog)"
|
52
|
+
List::Matcher.rx %w( cat dog ) # /(?:cat|dog)/
|
53
53
|
```
|
54
54
|
|
55
55
|
If you plan to generate multiple regexen, or have complicated options which you always use, you should generate a configured
|
@@ -318,6 +318,27 @@ List::Matcher.new symbols: { aw_nuts: '+++' }
|
|
318
318
|
then you may want to vet your symbols. Vetting is not done by default because one assumes you've worked out
|
319
319
|
your substitutions on your own time and we need not waste runtime checking them.
|
320
320
|
|
321
|
+
### not_extended
|
322
|
+
|
323
|
+
```ruby
|
324
|
+
default: false
|
325
|
+
```
|
326
|
+
|
327
|
+
Under normal circumstances `List::Matcher` will escape simple space characters and `#` lest the pattern
|
328
|
+
generated be included in an *extended* regular expression where these are meta-characters. If you find this
|
329
|
+
makes the expressions unreadable or otherwise annoying, you can tell `List::Matcher` to explicitly generate
|
330
|
+
a non-extended regular expression. This may safely be included in any sort of regular expression, but it will
|
331
|
+
be wrapped with the modifier expression `(?-x:...)`.
|
332
|
+
|
333
|
+
```ruby
|
334
|
+
List::Matcher.pattern [ 'cat and dog', '# is sometimes called the pound symbol' ]
|
335
|
+
# "(?:\\#\\ is\\ sometimes\\ called\\ the\\ pound\\ symbol|cat\\ and\\ dog)"
|
336
|
+
List::Matcher.pattern [ 'cat and dog', '# is sometimes called the pound symbol' ], not_extended: true
|
337
|
+
# "(?-x:cat and dog|# is sometimes called the pound symbol)"
|
338
|
+
```
|
339
|
+
|
340
|
+
Note that `List::Matcher` will continue to quote other white space characters.
|
341
|
+
|
321
342
|
## Benchmarks
|
322
343
|
|
323
344
|
Efficiency isn't the principle purpose of List::Matcher, but in almost all cases List::Matcher
|
data/lib/list_matcher.rb
CHANGED
@@ -2,7 +2,8 @@ require "list_matcher/version"
|
|
2
2
|
|
3
3
|
module List
|
4
4
|
class Matcher
|
5
|
-
attr_reader :atomic, :backtracking, :bound, :case_insensitive, :strip, :left_bound,
|
5
|
+
attr_reader :atomic, :backtracking, :bound, :case_insensitive, :strip, :left_bound,
|
6
|
+
:right_bound, :word_test, :normalize_whitespace, :multiline, :name, :vet, :not_extended
|
6
7
|
|
7
8
|
# convenience method for one-off regexen where there's no point in keeping
|
8
9
|
# around a pattern generator
|
@@ -25,6 +26,7 @@ module List
|
|
25
26
|
strip: false,
|
26
27
|
case_insensitive: false,
|
27
28
|
multiline: false,
|
29
|
+
not_extended: false,
|
28
30
|
normalize_whitespace: false,
|
29
31
|
symbols: {},
|
30
32
|
name: false,
|
@@ -35,6 +37,7 @@ module List
|
|
35
37
|
@strip = strip || normalize_whitespace
|
36
38
|
@case_insensitive = case_insensitive
|
37
39
|
@multiline = multiline
|
40
|
+
@not_extended = not_extended
|
38
41
|
@symbols = deep_dup symbols
|
39
42
|
@_bound = bound
|
40
43
|
@bound = !!bound
|
@@ -65,11 +68,16 @@ module List
|
|
65
68
|
elsif !( bound === false )
|
66
69
|
raise "unfamiliar value for :bound option: #{bound.inspect}"
|
67
70
|
end
|
71
|
+
symbols.keys.each do |k|
|
72
|
+
raise "symbols variable #{k} is neither a string, a symbol, nor a regex" unless k.is_a?(String) || k.is_a?(Symbol) || k.is_a?(Regexp)
|
73
|
+
end
|
68
74
|
if normalize_whitespace
|
69
75
|
@symbols[' '] = { pattern: '\s++' }
|
76
|
+
elsif not_extended
|
77
|
+
@symbols[' '] = { pattern: ' ' }
|
70
78
|
end
|
71
|
-
|
72
|
-
|
79
|
+
if not_extended
|
80
|
+
@symbols['#'] = { pattern: '#' }
|
73
81
|
end
|
74
82
|
if vet
|
75
83
|
Special.new( self, @symbols, [] ).verify
|
@@ -85,6 +93,7 @@ module List
|
|
85
93
|
strip: @strip,
|
86
94
|
case_insensitive: @case_insensitive,
|
87
95
|
multiline: @multiline,
|
96
|
+
not_extended: @not_extended,
|
88
97
|
normalize_whitespace: @normalize_whitespace,
|
89
98
|
symbols: @symbols,
|
90
99
|
name: @name,
|
@@ -124,8 +133,8 @@ module List
|
|
124
133
|
end
|
125
134
|
|
126
135
|
def modifiers
|
127
|
-
( @modifiers ||= if case_insensitive || multiline
|
128
|
-
[ ( 'i' if case_insensitive ), ( 'm' if multiline ) ].compact.join
|
136
|
+
( @modifiers ||= if case_insensitive || multiline || not_extended
|
137
|
+
[ [ ( 'i' if case_insensitive ), ( 'm' if multiline ), ( '-x' if not_extended ) ].compact.join ]
|
129
138
|
else
|
130
139
|
[nil]
|
131
140
|
end )[0]
|
data/lib/list_matcher/version.rb
CHANGED
data/test/basic_test.rb
CHANGED
@@ -245,4 +245,13 @@ class BasicTest < Minitest::Test
|
|
245
245
|
List::Matcher.pattern %w(cat), symbols: { foo: '+' }, vet: true
|
246
246
|
end
|
247
247
|
end
|
248
|
+
|
249
|
+
def test_not_extended
|
250
|
+
m = List::Matcher.new not_extended: true
|
251
|
+
rx = m.pattern [ ' ', '#' ]
|
252
|
+
assert_equal '(?-x:#| )', rx
|
253
|
+
rx = Regexp.new rx
|
254
|
+
assert rx === ' '
|
255
|
+
assert rx === '#'
|
256
|
+
end
|
248
257
|
end
|
data/test/doc_test.rb
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
|
3
|
+
require "list_matcher"
|
4
|
+
|
5
|
+
# test to make sure all the examples in the documentation work
|
6
|
+
class DocTest < Minitest::Test
|
7
|
+
def test_all
|
8
|
+
m = List::Matcher.new
|
9
|
+
assert_equal '(?:cat|dog)', (m.pattern %w( cat dog ))
|
10
|
+
assert_equal '(?:[cr]at)', (m.pattern %w( cat rat ))
|
11
|
+
assert_equal '(?:ca(?:mel|t))', (m.pattern %w( cat camel ))
|
12
|
+
assert_equal '(?:(?:c|fl|spr)at)', (m.pattern %w( cat flat sprat ))
|
13
|
+
assert_equal '(?:cat{10})', (m.pattern %w( catttttttttt ))
|
14
|
+
assert_equal '(?:ca(?:t-){9}t)', (m.pattern %w( cat-t-t-t-t-t-t-t-t-t ))
|
15
|
+
assert_equal '(?:[bc]at{10})', (m.pattern %w( catttttttttt batttttttttt ))
|
16
|
+
assert_equal '(?:[b-d]ad)', (m.pattern %w( cad bad dad ))
|
17
|
+
assert_equal '(?:cat(?:alog)?)', (m.pattern %w( cat catalog ))
|
18
|
+
assert_equal '(?:[4-9]|1\d?|2\d?|3[01]?)', (m.pattern (1..31).to_a)
|
19
|
+
assert_equal "(?:cat|dog)", (List::Matcher.pattern %w( cat dog ))
|
20
|
+
assert_equal /(?:cat|dog)/, (List::Matcher.rx %w( cat dog ))
|
21
|
+
m = List::Matcher.new normalize_whitespace: true, bound: true, case_insensitive: true, multiline: true, atomic: false, symbols: { num: '\d++' }
|
22
|
+
m2 = m.bud case_insensitive: false
|
23
|
+
assert !m2.case_insensitive
|
24
|
+
assert_equal "cat|dog", (List::Matcher.pattern %w(cat dog), atomic: false)
|
25
|
+
assert_equal "(?:cat|dog)", (List::Matcher.pattern %w(cat dog), atomic: true)
|
26
|
+
assert_equal "(?:cat|dog)", (List::Matcher.pattern %w( cat dog ))
|
27
|
+
assert_equal "(?>cat|dog)", (List::Matcher.pattern %w( cat dog ), backtracking: false)
|
28
|
+
assert_equal "(?:\\bcat\\b)", (List::Matcher.pattern %w(cat), bound: :word)
|
29
|
+
assert_equal "(?:\\bcat\\b)", (List::Matcher.pattern %w(cat), bound: true)
|
30
|
+
assert_equal "(?:^cat$)", (List::Matcher.pattern %w(cat), bound: :line)
|
31
|
+
assert_equal "(?:\\Acat\\z)", (List::Matcher.pattern %w(cat), bound: :string)
|
32
|
+
assert_equal "(?:(?<!\\d)[1-9](?:\\d\\d?)?(?!\\d))", (List::Matcher.pattern (1...1000).to_a, bound: { test: /\d/, left: '(?<!\d)', right: '(?!\d)'})
|
33
|
+
assert_equal "(?:(?:\\ ){5}cat(?:\\ ){5})", (List::Matcher.pattern [' cat '])
|
34
|
+
assert_equal "(?:cat)", (List::Matcher.pattern [' cat '], strip: true)
|
35
|
+
assert_equal "(?:C(?:AT|at)|cat)", (List::Matcher.pattern %w( Cat cat CAT ))
|
36
|
+
assert_equal "(?i:cat)", (List::Matcher.pattern %w( Cat cat CAT ), case_insensitive: true)
|
37
|
+
assert_equal "(?m:cat)", (List::Matcher.pattern %w(cat), multiline: true)
|
38
|
+
assert_equal "(?:\\ (?:\\ dog\\ walker|cat\\ \\ walker\\ )|camel\\ \\ walker)", (List::Matcher.pattern [ ' cat walker ', ' dog walker', 'camel walker' ])
|
39
|
+
assert_equal "(?:(?:ca(?:mel|t)|dog)\\s++walker)", (List::Matcher.pattern [ ' cat walker ', ' dog walker', 'camel walker' ], normalize_whitespace: true)
|
40
|
+
assert_equal "(?:(?:(?:Catch|Fahrenheit)\\ )?\\d++)", (List::Matcher.pattern [ 'Catch 22', '1984', 'Fahrenheit 451' ], symbols: { /\d+/ => '\d++' })
|
41
|
+
assert_equal "(?:(?:(?:Catch|Fahrenheit)\\ )?\\d++)", (List::Matcher.pattern [ 'Catch foo', 'foo', 'Fahrenheit foo' ], symbols: { 'foo' => '\d++' })
|
42
|
+
assert_equal "(?:(?:(?:Catch|Fahrenheit)\\ )?\\d++)", (List::Matcher.pattern [ 'Catch foo', 'foo', 'Fahrenheit foo' ], symbols: { foo: '\d++' })
|
43
|
+
assert_equal "(?<cat>cat)", (List::Matcher.pattern %w(cat), name: :cat)
|
44
|
+
|
45
|
+
m = List::Matcher.new atomic: false, bound: true
|
46
|
+
|
47
|
+
year = m.pattern( (1901..2000).to_a, name: :year )
|
48
|
+
mday = m.pattern( (1..31).to_a, name: :mday )
|
49
|
+
weekdays = %w( Monday Tuesday Wednesday Thursday Friday Saturday Sunday )
|
50
|
+
weekdays += weekdays.map{ |w| w[0...3] }
|
51
|
+
wday = m.pattern weekdays, case_insensitive: true, name: :wday
|
52
|
+
months = %w( January February March April May June July August September October November December )
|
53
|
+
months += months.map{ |w| w[0...3] }
|
54
|
+
mo = m.pattern months, case_insensitive: true, name: :mo
|
55
|
+
|
56
|
+
date_20th_century = m.rx(
|
57
|
+
[
|
58
|
+
'wday, mo mday',
|
59
|
+
'wday, mo mday year',
|
60
|
+
'mo mday, year',
|
61
|
+
'mo year',
|
62
|
+
'mday mo year',
|
63
|
+
'wday',
|
64
|
+
'year',
|
65
|
+
'mday mo',
|
66
|
+
'mo mday',
|
67
|
+
'mo mday year'
|
68
|
+
],
|
69
|
+
normalize_whitespace: true,
|
70
|
+
atomic: true,
|
71
|
+
symbols: {
|
72
|
+
year: year,
|
73
|
+
mday: mday,
|
74
|
+
wday: wday,
|
75
|
+
mo: mo
|
76
|
+
}
|
77
|
+
)
|
78
|
+
|
79
|
+
assert m = date_20th_century.match('Friday')
|
80
|
+
assert_equal 'Friday', m[:wday]
|
81
|
+
assert_nil m[:year]
|
82
|
+
assert_nil m[:mo]
|
83
|
+
assert_nil m[:mday]
|
84
|
+
assert m = date_20th_century.match('August 27')
|
85
|
+
assert_equal 'August', m[:mo]
|
86
|
+
assert_equal '27', m[:mday]
|
87
|
+
assert_nil m[:year]
|
88
|
+
assert_nil m[:wday]
|
89
|
+
assert m = date_20th_century.match('May 6, 1969')
|
90
|
+
assert_equal 'May', m[:mo]
|
91
|
+
assert_equal '6', m[:mday]
|
92
|
+
assert_equal '1969', m[:year]
|
93
|
+
assert_nil m[:wday]
|
94
|
+
assert m = date_20th_century.match('1 Jan 2000')
|
95
|
+
assert_equal '1', m[:mday]
|
96
|
+
assert_equal 'Jan', m[:mo]
|
97
|
+
assert_equal '2000', m[:year]
|
98
|
+
assert_nil m[:wday]
|
99
|
+
assert_nil date_20th_century.match('this is not actually a date')
|
100
|
+
assert_equal "(?:\\#\\ is\\ sometimes\\ called\\ the\\ pound\\ symbol|cat\\ and\\ dog)", (List::Matcher.pattern [ 'cat and dog', '# is sometimes called the pound symbol' ])
|
101
|
+
assert_equal "(?-x:cat and dog|# is sometimes called the pound symbol)", (List::Matcher.pattern [ 'cat and dog', '# is sometimes called the pound symbol' ], not_extended: true)
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: list_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- dfhoughton
|
@@ -56,6 +56,7 @@ files:
|
|
56
56
|
- list_matcher.gemspec
|
57
57
|
- test/basic_test.rb
|
58
58
|
- test/benchmarks.rb
|
59
|
+
- test/doc_test.rb
|
59
60
|
- test/stress.rb
|
60
61
|
homepage: https://github.com/dfhoughton/list_matcher
|
61
62
|
licenses:
|
@@ -84,4 +85,5 @@ summary: List::Matcher automates the generation of efficient regular expressions
|
|
84
85
|
test_files:
|
85
86
|
- test/basic_test.rb
|
86
87
|
- test/benchmarks.rb
|
88
|
+
- test/doc_test.rb
|
87
89
|
- test/stress.rb
|