list_matcher 1.0.7 → 1.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0f79c113982bca292d38166efef13f34dad0e94c
4
- data.tar.gz: 57d959f68c59bcb5e46d845bbc91fc6c886938ea
3
+ metadata.gz: 7cd4faa0dd3bafd7a1933d14d325056175c28886
4
+ data.tar.gz: 895da5871b03ba84b0d41bade4ed9aaa45231b0c
5
5
  SHA512:
6
- metadata.gz: 60928c05f29c22d7c35dd185754ac15e00c67241371b63a8ec49623cbbd4f4d56039ccceba89ee7bfabf8fd42d48d61032b6fb04fade09c46e955fc671c5a59d
7
- data.tar.gz: 5833bb49787f30342f8217fdce788b8ba1e60cae48e4cef83c90346168112c2a3d1ac0581a3261328d9537e06bc7fd0664feff39eda097cd042183b4a74c9072
6
+ metadata.gz: 944331095b454a924a047f7c1f96bae2c8254b4e092622e185de66e35063a6f698e1e8c3a45f99cebe7c4e2077203987cb94bb53d4ffb2720eb601f7a055a032
7
+ data.tar.gz: eb3ad634394d9fa378f206117e47b553e3f21c032a95cd284b5591c81893374cdc69330c2f48f9e0e1f2693e13e96b391c45eb71b470567527d636b7e25aeaa6
data/README.md CHANGED
@@ -401,6 +401,18 @@ List::Matcher.pattern [ 'cat and dog', '# is sometimes called the pound symbol'
401
401
 
402
402
  Note that `List::Matcher` will continue to quote other white space characters.
403
403
 
404
+ ### encoding
405
+
406
+ ```ruby
407
+ default: Encoding::UTF_8
408
+ ```
409
+
410
+ `List::Matcher` converts characters into integers during the construction of character classes and then
411
+ must convert the integers back into characters. In order to do this with characters whose code point is
412
+ above 255 we must give `chr` a character encoding. This is all that this is. Most likely you will never
413
+ need to change this default, but if it causes you trouble this parameters is provided so you can fix
414
+ it without having to fork the code.
415
+
404
416
  ## Benchmarks
405
417
 
406
418
  Efficiency isn't the principle purpose of List::Matcher, but in almost all cases List::Matcher
@@ -3,7 +3,8 @@ require "list_matcher/version"
3
3
  module List
4
4
  class Matcher
5
5
  attr_reader :atomic, :backtracking, :bound, :case_insensitive, :strip, :left_bound,
6
- :right_bound, :word_test, :normalize_whitespace, :multiline, :name, :vet, :not_extended
6
+ :right_bound, :word_test, :normalize_whitespace, :multiline, :name, :vet, :not_extended,
7
+ :encoding
7
8
 
8
9
  # a special exception class for List::Matcher
9
10
  class Error < ::StandardError; end
@@ -33,7 +34,8 @@ module List
33
34
  normalize_whitespace: false,
34
35
  symbols: {},
35
36
  name: false,
36
- vet: false
37
+ vet: false,
38
+ encoding: Encoding::UTF_8
37
39
  )
38
40
  @atomic = atomic
39
41
  @backtracking = backtracking
@@ -46,6 +48,7 @@ module List
46
48
  @bound = !!bound
47
49
  @normalize_whitespace = normalize_whitespace
48
50
  @vet = vet
51
+ @encoding = encoding
49
52
  if name
50
53
  raise Error, "name must be a string or symbol" unless name.is_a?(String) || name.is_a?(Symbol)
51
54
  begin
@@ -354,7 +357,7 @@ module List
354
357
  s == 0 ? a.to_s <=> b.to_s : s
355
358
  end
356
359
  end.each do |var, opts|
357
- c = ( max += 1 ).chr
360
+ c = ( max += 1 ).chr(engine.encoding)
358
361
  sp = if opts.is_a? Hash
359
362
  pat = opts.delete :pattern
360
363
  raise Error, "symbol #{var} requires a pattern" unless pat || var.is_a?(Regexp)
@@ -372,12 +375,12 @@ module List
372
375
  end
373
376
  if engine.bound
374
377
  if engine.left_bound
375
- c = ( max += 1 ).chr
378
+ c = ( max += 1 ).chr(engine.encoding)
376
379
  @left = SymbolPattern.new engine, c, c, engine.left_bound
377
380
  @specials << @left
378
381
  end
379
382
  if engine.right_bound
380
- c = ( max += 1 ).chr
383
+ c = ( max += 1 ).chr(engine.encoding)
381
384
  @right = SymbolPattern.new engine, c, c, engine.right_bound
382
385
  @specials << @right
383
386
  end
@@ -684,15 +687,15 @@ module List
684
687
  else
685
688
  rs = ranges(chars)
686
689
  if rs.size == 1 && rs[0][0] == rs[0][1]
687
- cc_quote rs[0][0].chr
690
+ cc_quote rs[0][0].chr(engine.encoding)
688
691
  else
689
692
  mid = rs.map do |s, e|
690
693
  if s == e
691
- cc_quote s.chr
694
+ cc_quote s.chr(engine.encoding)
692
695
  elsif e == s + 1
693
- "#{ cc_quote s.chr }#{ cc_quote e.chr }"
696
+ "#{ cc_quote s.chr(engine.encoding) }#{ cc_quote e.chr(engine.encoding) }"
694
697
  else
695
- "#{ cc_quote s.chr }-#{ cc_quote e.chr }"
698
+ "#{ cc_quote s.chr(engine.encoding) }-#{ cc_quote e.chr(engine.encoding) }"
696
699
  end
697
700
  end.join
698
701
  end
@@ -1,3 +1,3 @@
1
1
  module ListMatcher
2
- VERSION = "1.0.7"
2
+ VERSION = "1.1.8"
3
3
  end
@@ -9,7 +9,10 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["dfhoughton"]
10
10
  spec.email = ["dfhoughton@gmail.com"]
11
11
  spec.summary = %q{List::Matcher automates the generation of efficient regular expressions.}
12
- spec.description = spec.summary
12
+ spec.description = <<-END
13
+ List::Matcher automates the creation of regular expressions from lists, including lists of other regular
14
+ expressions. The expressions it generates from lists of strings are non-backtracking and compact.
15
+ END
13
16
  spec.homepage = "https://github.com/dfhoughton/list_matcher"
14
17
  spec.license = "MIT"
15
18
 
@@ -21,5 +24,5 @@ Gem::Specification.new do |spec|
21
24
 
22
25
  spec.add_development_dependency "bundler", "~> 1.7"
23
26
  spec.add_development_dependency "rake", "~> 10.0"
24
- spec.add_development_dependency "minitest"
27
+ spec.add_development_dependency "minitest", "~> 5"
25
28
  end
@@ -31,6 +31,14 @@ class BasicTest < Minitest::Test
31
31
  end
32
32
  end
33
33
 
34
+ def test_fat_chars
35
+ doublewides = (256...1000).map{ |i| i.chr(Encoding::UTF_8) }
36
+ rx = List::Matcher.rx doublewides
37
+ doublewides.each do |word|
38
+ assert rx === word, "can match #{word}"
39
+ end
40
+ end
41
+
34
42
  def test_word_chars_case_insensitive
35
43
  word = (1..255).map(&:chr).select{ |c| /\w/ === c }
36
44
  chars = word + ['+']
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: list_matcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.7
4
+ version: 1.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - dfhoughton
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-19 00:00:00.000000000 Z
11
+ date: 2017-12-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -42,17 +42,19 @@ dependencies:
42
42
  name: minitest
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '5'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
55
- description: List::Matcher automates the generation of efficient regular expressions.
54
+ version: '5'
55
+ description: |2
56
+ List::Matcher automates the creation of regular expressions from lists, including lists of other regular
57
+ expressions. The expressions it generates from lists of strings are non-backtracking and compact.
56
58
  email:
57
59
  - dfhoughton@gmail.com
58
60
  executables: []