regexp_optimized_union 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/changes +3 -0
  3. data/lib/regexp_optimized_union.rb +12 -21
  4. metadata +9 -9
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 45abdaf8017a48216c2d3de78c008aef03e54a64
4
+ data.tar.gz: 90f26948c74bdcbc769f3f827fb0d601b533999b
5
+ SHA512:
6
+ metadata.gz: 600fdd924917e83e8ce77067d3e28f83a1f219820c4a91967beb46854ac26d1c92cde44aca97c3d1ae19119d496be5e6a0e47089fcc42deb596bc1f26e3ebc75
7
+ data.tar.gz: 000e8b87b88482f6d7b3309ca85322f13dd4bce5c39b948680a7e38fd962bbbf64512fc4dbae99d47b95d3006fbb22fcb1b05761a272b73cf07f3ae9e6908d41
data/changes ADDED
@@ -0,0 +1,3 @@
1
+ 0.2.0
2
+ - Fix error for UTF-8 string
3
+ - Drop support for Ruby <= 1.8
@@ -1,13 +1,4 @@
1
- if RUBY_VERSION < '1.9'
2
- require 'enumerator'
3
- class String
4
- unless defined?(ord)
5
- def ord
6
- unpack('C').first
7
- end
8
- end
9
- end
10
- end
1
+ # coding: UTF-8
11
2
 
12
3
  class Regexp
13
4
  # trie for optimization
@@ -75,11 +66,7 @@ class Regexp
75
66
  def build_char_group chars
76
67
  return chars.first if chars.size == 1
77
68
 
78
- if RUBY_VERSION < '1.9'
79
- chars, mb_chars = chars.partition{|c| c.bytesize == 1}
80
- else
81
- mb_chars = []
82
- end
69
+ mb_chars = []
83
70
 
84
71
  chars = chars.map(&:ord)
85
72
  chars.sort!
@@ -93,18 +80,18 @@ class Regexp
93
80
  end
94
81
  end
95
82
 
96
- groups.map! do |range|
83
+ groups = groups.flat_map do |range|
97
84
  # only apply range to >= 4 contiguous chars
98
85
  if range.end >= range.begin + 3
99
- "#{range.begin.chr}-#{range.end.chr}"
86
+ [range.begin, '-'.ord, range.end]
100
87
  elsif range.end > range.begin
101
- range.map(&:chr).join
88
+ range.to_a
102
89
  else
103
- range.begin.chr
90
+ [range.begin]
104
91
  end
105
92
  end
106
93
 
107
- "[#{groups.join}#{mb_chars.join}]"
94
+ "[#{groups.pack 'U*'}#{mb_chars.join}]"
108
95
  end
109
96
 
110
97
  def to_re_src
@@ -148,6 +135,7 @@ class Regexp
148
135
  # build trie
149
136
  a.each do |s|
150
137
  next if s.empty?
138
+ s = s.encode 'utf-8'
151
139
  t = trie
152
140
  s.chars.each do |c|
153
141
  c = Regexp.escape c
@@ -193,7 +181,10 @@ if __FILE__ == $PROGRAM_NAME
193
181
  %w[foobar fooabar foogabar] => /foo(?:|a|ga)bar/,
194
182
  %w[vax vcx vbx vdx] => /v[a-d]x/,
195
183
  %w[vax vcx vbx] => /v[abc]x/,
196
- %w[xa xc xb x] => /x[abc]?/
184
+ %w[xa xc xb x] => /x[abc]?/,
185
+ %w[一郎 二郎 三郎 四郎] => /[一三二四]郎/,
186
+ # caveats: escape chars can not be turned into char group
187
+ %w[a^ a- a*] => /a(?:\^|\-|\*)/,
197
188
  }.each do |a, r|
198
189
  l = Regexp.optimized_union a
199
190
  a.each do |s|
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_optimized_union
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
5
- prerelease:
4
+ version: 0.2.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - luikore
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-11-14 00:00:00.000000000 Z
11
+ date: 2016-07-11 00:00:00.000000000 Z
13
12
  dependencies: []
14
13
  description: Regexp.optimized_union(word_list, regexp_options) generates optimized
15
14
  regexp for matching union of word list
@@ -18,32 +17,33 @@ executables: []
18
17
  extensions: []
19
18
  extra_rdoc_files: []
20
19
  files:
20
+ - changes
21
21
  - lib/regexp_optimized_union.rb
22
22
  - readme.md
23
23
  homepage: https://github.com/luikore/regexp_optimized_union
24
24
  licenses:
25
25
  - WTFPL
26
+ metadata: {}
26
27
  post_install_message:
27
28
  rdoc_options: []
28
29
  require_paths:
29
30
  - lib
30
31
  required_ruby_version: !ruby/object:Gem::Requirement
31
- none: false
32
32
  requirements:
33
- - - ! '>='
33
+ - - ">="
34
34
  - !ruby/object:Gem::Version
35
35
  version: '0'
36
36
  required_rubygems_version: !ruby/object:Gem::Requirement
37
- none: false
38
37
  requirements:
39
- - - ! '>='
38
+ - - ">="
40
39
  - !ruby/object:Gem::Version
41
40
  version: 1.3.6
42
41
  requirements: []
43
42
  rubyforge_project:
44
- rubygems_version: 1.8.24
43
+ rubygems_version: 2.5.1
45
44
  signing_key:
46
- specification_version: 3
45
+ specification_version: 4
47
46
  summary: Regexp.optimized_union(word_list, regexp_options) generates optimized regexp
48
47
  for matching union of word list
49
48
  test_files: []
49
+ has_rdoc: