list_matcher 1.0.7 → 1.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0f79c113982bca292d38166efef13f34dad0e94c
4
- data.tar.gz: 57d959f68c59bcb5e46d845bbc91fc6c886938ea
3
+ metadata.gz: 7cd4faa0dd3bafd7a1933d14d325056175c28886
4
+ data.tar.gz: 895da5871b03ba84b0d41bade4ed9aaa45231b0c
5
5
  SHA512:
6
- metadata.gz: 60928c05f29c22d7c35dd185754ac15e00c67241371b63a8ec49623cbbd4f4d56039ccceba89ee7bfabf8fd42d48d61032b6fb04fade09c46e955fc671c5a59d
7
- data.tar.gz: 5833bb49787f30342f8217fdce788b8ba1e60cae48e4cef83c90346168112c2a3d1ac0581a3261328d9537e06bc7fd0664feff39eda097cd042183b4a74c9072
6
+ metadata.gz: 944331095b454a924a047f7c1f96bae2c8254b4e092622e185de66e35063a6f698e1e8c3a45f99cebe7c4e2077203987cb94bb53d4ffb2720eb601f7a055a032
7
+ data.tar.gz: eb3ad634394d9fa378f206117e47b553e3f21c032a95cd284b5591c81893374cdc69330c2f48f9e0e1f2693e13e96b391c45eb71b470567527d636b7e25aeaa6
data/README.md CHANGED
@@ -401,6 +401,18 @@ List::Matcher.pattern [ 'cat and dog', '# is sometimes called the pound symbol'
401
401
 
402
402
  Note that `List::Matcher` will continue to quote other white space characters.
403
403
 
404
+ ### encoding
405
+
406
+ ```ruby
407
+ default: Encoding::UTF_8
408
+ ```
409
+
410
+ `List::Matcher` converts characters into integers during the construction of character classes and then
411
+ must convert the integers back into characters. In order to do this with characters whose code point is
412
+ above 255 we must give `chr` a character encoding. This is all that this is. Most likely you will never
413
+ need to change this default, but if it causes you trouble this parameters is provided so you can fix
414
+ it without having to fork the code.
415
+
404
416
  ## Benchmarks
405
417
 
406
418
  Efficiency isn't the principle purpose of List::Matcher, but in almost all cases List::Matcher
@@ -3,7 +3,8 @@ require "list_matcher/version"
3
3
  module List
4
4
  class Matcher
5
5
  attr_reader :atomic, :backtracking, :bound, :case_insensitive, :strip, :left_bound,
6
- :right_bound, :word_test, :normalize_whitespace, :multiline, :name, :vet, :not_extended
6
+ :right_bound, :word_test, :normalize_whitespace, :multiline, :name, :vet, :not_extended,
7
+ :encoding
7
8
 
8
9
  # a special exception class for List::Matcher
9
10
  class Error < ::StandardError; end
@@ -33,7 +34,8 @@ module List
33
34
  normalize_whitespace: false,
34
35
  symbols: {},
35
36
  name: false,
36
- vet: false
37
+ vet: false,
38
+ encoding: Encoding::UTF_8
37
39
  )
38
40
  @atomic = atomic
39
41
  @backtracking = backtracking
@@ -46,6 +48,7 @@ module List
46
48
  @bound = !!bound
47
49
  @normalize_whitespace = normalize_whitespace
48
50
  @vet = vet
51
+ @encoding = encoding
49
52
  if name
50
53
  raise Error, "name must be a string or symbol" unless name.is_a?(String) || name.is_a?(Symbol)
51
54
  begin
@@ -354,7 +357,7 @@ module List
354
357
  s == 0 ? a.to_s <=> b.to_s : s
355
358
  end
356
359
  end.each do |var, opts|
357
- c = ( max += 1 ).chr
360
+ c = ( max += 1 ).chr(engine.encoding)
358
361
  sp = if opts.is_a? Hash
359
362
  pat = opts.delete :pattern
360
363
  raise Error, "symbol #{var} requires a pattern" unless pat || var.is_a?(Regexp)
@@ -372,12 +375,12 @@ module List
372
375
  end
373
376
  if engine.bound
374
377
  if engine.left_bound
375
- c = ( max += 1 ).chr
378
+ c = ( max += 1 ).chr(engine.encoding)
376
379
  @left = SymbolPattern.new engine, c, c, engine.left_bound
377
380
  @specials << @left
378
381
  end
379
382
  if engine.right_bound
380
- c = ( max += 1 ).chr
383
+ c = ( max += 1 ).chr(engine.encoding)
381
384
  @right = SymbolPattern.new engine, c, c, engine.right_bound
382
385
  @specials << @right
383
386
  end
@@ -684,15 +687,15 @@ module List
684
687
  else
685
688
  rs = ranges(chars)
686
689
  if rs.size == 1 && rs[0][0] == rs[0][1]
687
- cc_quote rs[0][0].chr
690
+ cc_quote rs[0][0].chr(engine.encoding)
688
691
  else
689
692
  mid = rs.map do |s, e|
690
693
  if s == e
691
- cc_quote s.chr
694
+ cc_quote s.chr(engine.encoding)
692
695
  elsif e == s + 1
693
- "#{ cc_quote s.chr }#{ cc_quote e.chr }"
696
+ "#{ cc_quote s.chr(engine.encoding) }#{ cc_quote e.chr(engine.encoding) }"
694
697
  else
695
- "#{ cc_quote s.chr }-#{ cc_quote e.chr }"
698
+ "#{ cc_quote s.chr(engine.encoding) }-#{ cc_quote e.chr(engine.encoding) }"
696
699
  end
697
700
  end.join
698
701
  end
@@ -1,3 +1,3 @@
1
1
  module ListMatcher
2
- VERSION = "1.0.7"
2
+ VERSION = "1.1.8"
3
3
  end
@@ -9,7 +9,10 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["dfhoughton"]
10
10
  spec.email = ["dfhoughton@gmail.com"]
11
11
  spec.summary = %q{List::Matcher automates the generation of efficient regular expressions.}
12
- spec.description = spec.summary
12
+ spec.description = <<-END
13
+ List::Matcher automates the creation of regular expressions from lists, including lists of other regular
14
+ expressions. The expressions it generates from lists of strings are non-backtracking and compact.
15
+ END
13
16
  spec.homepage = "https://github.com/dfhoughton/list_matcher"
14
17
  spec.license = "MIT"
15
18
 
@@ -21,5 +24,5 @@ Gem::Specification.new do |spec|
21
24
 
22
25
  spec.add_development_dependency "bundler", "~> 1.7"
23
26
  spec.add_development_dependency "rake", "~> 10.0"
24
- spec.add_development_dependency "minitest"
27
+ spec.add_development_dependency "minitest", "~> 5"
25
28
  end
@@ -31,6 +31,14 @@ class BasicTest < Minitest::Test
31
31
  end
32
32
  end
33
33
 
34
+ def test_fat_chars
35
+ doublewides = (256...1000).map{ |i| i.chr(Encoding::UTF_8) }
36
+ rx = List::Matcher.rx doublewides
37
+ doublewides.each do |word|
38
+ assert rx === word, "can match #{word}"
39
+ end
40
+ end
41
+
34
42
  def test_word_chars_case_insensitive
35
43
  word = (1..255).map(&:chr).select{ |c| /\w/ === c }
36
44
  chars = word + ['+']
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: list_matcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.7
4
+ version: 1.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - dfhoughton
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-19 00:00:00.000000000 Z
11
+ date: 2017-12-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -42,17 +42,19 @@ dependencies:
42
42
  name: minitest
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '5'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
55
- description: List::Matcher automates the generation of efficient regular expressions.
54
+ version: '5'
55
+ description: |2
56
+ List::Matcher automates the creation of regular expressions from lists, including lists of other regular
57
+ expressions. The expressions it generates from lists of strings are non-backtracking and compact.
56
58
  email:
57
59
  - dfhoughton@gmail.com
58
60
  executables: []