list_matcher 1.0.7 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/lib/list_matcher.rb +12 -9
- data/lib/list_matcher/version.rb +1 -1
- data/list_matcher.gemspec +5 -2
- data/test/basic_test.rb +8 -0
- metadata +9 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7cd4faa0dd3bafd7a1933d14d325056175c28886
|
4
|
+
data.tar.gz: 895da5871b03ba84b0d41bade4ed9aaa45231b0c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 944331095b454a924a047f7c1f96bae2c8254b4e092622e185de66e35063a6f698e1e8c3a45f99cebe7c4e2077203987cb94bb53d4ffb2720eb601f7a055a032
|
7
|
+
data.tar.gz: eb3ad634394d9fa378f206117e47b553e3f21c032a95cd284b5591c81893374cdc69330c2f48f9e0e1f2693e13e96b391c45eb71b470567527d636b7e25aeaa6
|
data/README.md
CHANGED
@@ -401,6 +401,18 @@ List::Matcher.pattern [ 'cat and dog', '# is sometimes called the pound symbol'
|
|
401
401
|
|
402
402
|
Note that `List::Matcher` will continue to quote other white space characters.
|
403
403
|
|
404
|
+
### encoding
|
405
|
+
|
406
|
+
```ruby
|
407
|
+
default: Encoding::UTF_8
|
408
|
+
```
|
409
|
+
|
410
|
+
`List::Matcher` converts characters into integers during the construction of character classes and then
|
411
|
+
must convert the integers back into characters. In order to do this with characters whose code point is
|
412
|
+
above 255 we must give `chr` a character encoding. This is all that this is. Most likely you will never
|
413
|
+
need to change this default, but if it causes you trouble this parameters is provided so you can fix
|
414
|
+
it without having to fork the code.
|
415
|
+
|
404
416
|
## Benchmarks
|
405
417
|
|
406
418
|
Efficiency isn't the principle purpose of List::Matcher, but in almost all cases List::Matcher
|
data/lib/list_matcher.rb
CHANGED
@@ -3,7 +3,8 @@ require "list_matcher/version"
|
|
3
3
|
module List
|
4
4
|
class Matcher
|
5
5
|
attr_reader :atomic, :backtracking, :bound, :case_insensitive, :strip, :left_bound,
|
6
|
-
:right_bound, :word_test, :normalize_whitespace, :multiline, :name, :vet, :not_extended
|
6
|
+
:right_bound, :word_test, :normalize_whitespace, :multiline, :name, :vet, :not_extended,
|
7
|
+
:encoding
|
7
8
|
|
8
9
|
# a special exception class for List::Matcher
|
9
10
|
class Error < ::StandardError; end
|
@@ -33,7 +34,8 @@ module List
|
|
33
34
|
normalize_whitespace: false,
|
34
35
|
symbols: {},
|
35
36
|
name: false,
|
36
|
-
vet: false
|
37
|
+
vet: false,
|
38
|
+
encoding: Encoding::UTF_8
|
37
39
|
)
|
38
40
|
@atomic = atomic
|
39
41
|
@backtracking = backtracking
|
@@ -46,6 +48,7 @@ module List
|
|
46
48
|
@bound = !!bound
|
47
49
|
@normalize_whitespace = normalize_whitespace
|
48
50
|
@vet = vet
|
51
|
+
@encoding = encoding
|
49
52
|
if name
|
50
53
|
raise Error, "name must be a string or symbol" unless name.is_a?(String) || name.is_a?(Symbol)
|
51
54
|
begin
|
@@ -354,7 +357,7 @@ module List
|
|
354
357
|
s == 0 ? a.to_s <=> b.to_s : s
|
355
358
|
end
|
356
359
|
end.each do |var, opts|
|
357
|
-
c = ( max += 1 ).chr
|
360
|
+
c = ( max += 1 ).chr(engine.encoding)
|
358
361
|
sp = if opts.is_a? Hash
|
359
362
|
pat = opts.delete :pattern
|
360
363
|
raise Error, "symbol #{var} requires a pattern" unless pat || var.is_a?(Regexp)
|
@@ -372,12 +375,12 @@ module List
|
|
372
375
|
end
|
373
376
|
if engine.bound
|
374
377
|
if engine.left_bound
|
375
|
-
c = ( max += 1 ).chr
|
378
|
+
c = ( max += 1 ).chr(engine.encoding)
|
376
379
|
@left = SymbolPattern.new engine, c, c, engine.left_bound
|
377
380
|
@specials << @left
|
378
381
|
end
|
379
382
|
if engine.right_bound
|
380
|
-
c = ( max += 1 ).chr
|
383
|
+
c = ( max += 1 ).chr(engine.encoding)
|
381
384
|
@right = SymbolPattern.new engine, c, c, engine.right_bound
|
382
385
|
@specials << @right
|
383
386
|
end
|
@@ -684,15 +687,15 @@ module List
|
|
684
687
|
else
|
685
688
|
rs = ranges(chars)
|
686
689
|
if rs.size == 1 && rs[0][0] == rs[0][1]
|
687
|
-
cc_quote rs[0][0].chr
|
690
|
+
cc_quote rs[0][0].chr(engine.encoding)
|
688
691
|
else
|
689
692
|
mid = rs.map do |s, e|
|
690
693
|
if s == e
|
691
|
-
cc_quote s.chr
|
694
|
+
cc_quote s.chr(engine.encoding)
|
692
695
|
elsif e == s + 1
|
693
|
-
"#{ cc_quote s.chr }#{ cc_quote e.chr }"
|
696
|
+
"#{ cc_quote s.chr(engine.encoding) }#{ cc_quote e.chr(engine.encoding) }"
|
694
697
|
else
|
695
|
-
"#{ cc_quote s.chr }-#{ cc_quote e.chr }"
|
698
|
+
"#{ cc_quote s.chr(engine.encoding) }-#{ cc_quote e.chr(engine.encoding) }"
|
696
699
|
end
|
697
700
|
end.join
|
698
701
|
end
|
data/lib/list_matcher/version.rb
CHANGED
data/list_matcher.gemspec
CHANGED
@@ -9,7 +9,10 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["dfhoughton"]
|
10
10
|
spec.email = ["dfhoughton@gmail.com"]
|
11
11
|
spec.summary = %q{List::Matcher automates the generation of efficient regular expressions.}
|
12
|
-
spec.description =
|
12
|
+
spec.description = <<-END
|
13
|
+
List::Matcher automates the creation of regular expressions from lists, including lists of other regular
|
14
|
+
expressions. The expressions it generates from lists of strings are non-backtracking and compact.
|
15
|
+
END
|
13
16
|
spec.homepage = "https://github.com/dfhoughton/list_matcher"
|
14
17
|
spec.license = "MIT"
|
15
18
|
|
@@ -21,5 +24,5 @@ Gem::Specification.new do |spec|
|
|
21
24
|
|
22
25
|
spec.add_development_dependency "bundler", "~> 1.7"
|
23
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
24
|
-
spec.add_development_dependency "minitest"
|
27
|
+
spec.add_development_dependency "minitest", "~> 5"
|
25
28
|
end
|
data/test/basic_test.rb
CHANGED
@@ -31,6 +31,14 @@ class BasicTest < Minitest::Test
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
+
def test_fat_chars
|
35
|
+
doublewides = (256...1000).map{ |i| i.chr(Encoding::UTF_8) }
|
36
|
+
rx = List::Matcher.rx doublewides
|
37
|
+
doublewides.each do |word|
|
38
|
+
assert rx === word, "can match #{word}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
34
42
|
def test_word_chars_case_insensitive
|
35
43
|
word = (1..255).map(&:chr).select{ |c| /\w/ === c }
|
36
44
|
chars = word + ['+']
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: list_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- dfhoughton
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-12-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -42,17 +42,19 @@ dependencies:
|
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '5'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
55
|
-
description:
|
54
|
+
version: '5'
|
55
|
+
description: |2
|
56
|
+
List::Matcher automates the creation of regular expressions from lists, including lists of other regular
|
57
|
+
expressions. The expressions it generates from lists of strings are non-backtracking and compact.
|
56
58
|
email:
|
57
59
|
- dfhoughton@gmail.com
|
58
60
|
executables: []
|