list_matcher 1.0.7 → 1.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/lib/list_matcher.rb +12 -9
- data/lib/list_matcher/version.rb +1 -1
- data/list_matcher.gemspec +5 -2
- data/test/basic_test.rb +8 -0
- metadata +9 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7cd4faa0dd3bafd7a1933d14d325056175c28886
|
4
|
+
data.tar.gz: 895da5871b03ba84b0d41bade4ed9aaa45231b0c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 944331095b454a924a047f7c1f96bae2c8254b4e092622e185de66e35063a6f698e1e8c3a45f99cebe7c4e2077203987cb94bb53d4ffb2720eb601f7a055a032
|
7
|
+
data.tar.gz: eb3ad634394d9fa378f206117e47b553e3f21c032a95cd284b5591c81893374cdc69330c2f48f9e0e1f2693e13e96b391c45eb71b470567527d636b7e25aeaa6
|
data/README.md
CHANGED
@@ -401,6 +401,18 @@ List::Matcher.pattern [ 'cat and dog', '# is sometimes called the pound symbol'
|
|
401
401
|
|
402
402
|
Note that `List::Matcher` will continue to quote other white space characters.
|
403
403
|
|
404
|
+
### encoding
|
405
|
+
|
406
|
+
```ruby
|
407
|
+
default: Encoding::UTF_8
|
408
|
+
```
|
409
|
+
|
410
|
+
`List::Matcher` converts characters into integers during the construction of character classes and then
|
411
|
+
must convert the integers back into characters. In order to do this with characters whose code point is
|
412
|
+
above 255 we must give `chr` a character encoding. This is all that this is. Most likely you will never
|
413
|
+
need to change this default, but if it causes you trouble this parameters is provided so you can fix
|
414
|
+
it without having to fork the code.
|
415
|
+
|
404
416
|
## Benchmarks
|
405
417
|
|
406
418
|
Efficiency isn't the principle purpose of List::Matcher, but in almost all cases List::Matcher
|
data/lib/list_matcher.rb
CHANGED
@@ -3,7 +3,8 @@ require "list_matcher/version"
|
|
3
3
|
module List
|
4
4
|
class Matcher
|
5
5
|
attr_reader :atomic, :backtracking, :bound, :case_insensitive, :strip, :left_bound,
|
6
|
-
:right_bound, :word_test, :normalize_whitespace, :multiline, :name, :vet, :not_extended
|
6
|
+
:right_bound, :word_test, :normalize_whitespace, :multiline, :name, :vet, :not_extended,
|
7
|
+
:encoding
|
7
8
|
|
8
9
|
# a special exception class for List::Matcher
|
9
10
|
class Error < ::StandardError; end
|
@@ -33,7 +34,8 @@ module List
|
|
33
34
|
normalize_whitespace: false,
|
34
35
|
symbols: {},
|
35
36
|
name: false,
|
36
|
-
vet: false
|
37
|
+
vet: false,
|
38
|
+
encoding: Encoding::UTF_8
|
37
39
|
)
|
38
40
|
@atomic = atomic
|
39
41
|
@backtracking = backtracking
|
@@ -46,6 +48,7 @@ module List
|
|
46
48
|
@bound = !!bound
|
47
49
|
@normalize_whitespace = normalize_whitespace
|
48
50
|
@vet = vet
|
51
|
+
@encoding = encoding
|
49
52
|
if name
|
50
53
|
raise Error, "name must be a string or symbol" unless name.is_a?(String) || name.is_a?(Symbol)
|
51
54
|
begin
|
@@ -354,7 +357,7 @@ module List
|
|
354
357
|
s == 0 ? a.to_s <=> b.to_s : s
|
355
358
|
end
|
356
359
|
end.each do |var, opts|
|
357
|
-
c = ( max += 1 ).chr
|
360
|
+
c = ( max += 1 ).chr(engine.encoding)
|
358
361
|
sp = if opts.is_a? Hash
|
359
362
|
pat = opts.delete :pattern
|
360
363
|
raise Error, "symbol #{var} requires a pattern" unless pat || var.is_a?(Regexp)
|
@@ -372,12 +375,12 @@ module List
|
|
372
375
|
end
|
373
376
|
if engine.bound
|
374
377
|
if engine.left_bound
|
375
|
-
c = ( max += 1 ).chr
|
378
|
+
c = ( max += 1 ).chr(engine.encoding)
|
376
379
|
@left = SymbolPattern.new engine, c, c, engine.left_bound
|
377
380
|
@specials << @left
|
378
381
|
end
|
379
382
|
if engine.right_bound
|
380
|
-
c = ( max += 1 ).chr
|
383
|
+
c = ( max += 1 ).chr(engine.encoding)
|
381
384
|
@right = SymbolPattern.new engine, c, c, engine.right_bound
|
382
385
|
@specials << @right
|
383
386
|
end
|
@@ -684,15 +687,15 @@ module List
|
|
684
687
|
else
|
685
688
|
rs = ranges(chars)
|
686
689
|
if rs.size == 1 && rs[0][0] == rs[0][1]
|
687
|
-
cc_quote rs[0][0].chr
|
690
|
+
cc_quote rs[0][0].chr(engine.encoding)
|
688
691
|
else
|
689
692
|
mid = rs.map do |s, e|
|
690
693
|
if s == e
|
691
|
-
cc_quote s.chr
|
694
|
+
cc_quote s.chr(engine.encoding)
|
692
695
|
elsif e == s + 1
|
693
|
-
"#{ cc_quote s.chr }#{ cc_quote e.chr }"
|
696
|
+
"#{ cc_quote s.chr(engine.encoding) }#{ cc_quote e.chr(engine.encoding) }"
|
694
697
|
else
|
695
|
-
"#{ cc_quote s.chr }-#{ cc_quote e.chr }"
|
698
|
+
"#{ cc_quote s.chr(engine.encoding) }-#{ cc_quote e.chr(engine.encoding) }"
|
696
699
|
end
|
697
700
|
end.join
|
698
701
|
end
|
data/lib/list_matcher/version.rb
CHANGED
data/list_matcher.gemspec
CHANGED
@@ -9,7 +9,10 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["dfhoughton"]
|
10
10
|
spec.email = ["dfhoughton@gmail.com"]
|
11
11
|
spec.summary = %q{List::Matcher automates the generation of efficient regular expressions.}
|
12
|
-
spec.description =
|
12
|
+
spec.description = <<-END
|
13
|
+
List::Matcher automates the creation of regular expressions from lists, including lists of other regular
|
14
|
+
expressions. The expressions it generates from lists of strings are non-backtracking and compact.
|
15
|
+
END
|
13
16
|
spec.homepage = "https://github.com/dfhoughton/list_matcher"
|
14
17
|
spec.license = "MIT"
|
15
18
|
|
@@ -21,5 +24,5 @@ Gem::Specification.new do |spec|
|
|
21
24
|
|
22
25
|
spec.add_development_dependency "bundler", "~> 1.7"
|
23
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
24
|
-
spec.add_development_dependency "minitest"
|
27
|
+
spec.add_development_dependency "minitest", "~> 5"
|
25
28
|
end
|
data/test/basic_test.rb
CHANGED
@@ -31,6 +31,14 @@ class BasicTest < Minitest::Test
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
+
def test_fat_chars
|
35
|
+
doublewides = (256...1000).map{ |i| i.chr(Encoding::UTF_8) }
|
36
|
+
rx = List::Matcher.rx doublewides
|
37
|
+
doublewides.each do |word|
|
38
|
+
assert rx === word, "can match #{word}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
34
42
|
def test_word_chars_case_insensitive
|
35
43
|
word = (1..255).map(&:chr).select{ |c| /\w/ === c }
|
36
44
|
chars = word + ['+']
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: list_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- dfhoughton
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-12-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -42,17 +42,19 @@ dependencies:
|
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '5'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
55
|
-
description:
|
54
|
+
version: '5'
|
55
|
+
description: |2
|
56
|
+
List::Matcher automates the creation of regular expressions from lists, including lists of other regular
|
57
|
+
expressions. The expressions it generates from lists of strings are non-backtracking and compact.
|
56
58
|
email:
|
57
59
|
- dfhoughton@gmail.com
|
58
60
|
executables: []
|