regexp_optimized_union 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/changes +3 -0
  3. data/lib/regexp_optimized_union.rb +12 -21
  4. metadata +9 -9
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 45abdaf8017a48216c2d3de78c008aef03e54a64
4
+ data.tar.gz: 90f26948c74bdcbc769f3f827fb0d601b533999b
5
+ SHA512:
6
+ metadata.gz: 600fdd924917e83e8ce77067d3e28f83a1f219820c4a91967beb46854ac26d1c92cde44aca97c3d1ae19119d496be5e6a0e47089fcc42deb596bc1f26e3ebc75
7
+ data.tar.gz: 000e8b87b88482f6d7b3309ca85322f13dd4bce5c39b948680a7e38fd962bbbf64512fc4dbae99d47b95d3006fbb22fcb1b05761a272b73cf07f3ae9e6908d41
data/changes ADDED
@@ -0,0 +1,3 @@
1
+ 0.2.0
2
+ - Fix error for UTF-8 string
3
+ - Drop support for Ruby <= 1.8
@@ -1,13 +1,4 @@
1
- if RUBY_VERSION < '1.9'
2
- require 'enumerator'
3
- class String
4
- unless defined?(ord)
5
- def ord
6
- unpack('C').first
7
- end
8
- end
9
- end
10
- end
1
+ # coding: UTF-8
11
2
 
12
3
  class Regexp
13
4
  # trie for optimization
@@ -75,11 +66,7 @@ class Regexp
75
66
  def build_char_group chars
76
67
  return chars.first if chars.size == 1
77
68
 
78
- if RUBY_VERSION < '1.9'
79
- chars, mb_chars = chars.partition{|c| c.bytesize == 1}
80
- else
81
- mb_chars = []
82
- end
69
+ mb_chars = []
83
70
 
84
71
  chars = chars.map(&:ord)
85
72
  chars.sort!
@@ -93,18 +80,18 @@ class Regexp
93
80
  end
94
81
  end
95
82
 
96
- groups.map! do |range|
83
+ groups = groups.flat_map do |range|
97
84
  # only apply range to >= 4 contiguous chars
98
85
  if range.end >= range.begin + 3
99
- "#{range.begin.chr}-#{range.end.chr}"
86
+ [range.begin, '-'.ord, range.end]
100
87
  elsif range.end > range.begin
101
- range.map(&:chr).join
88
+ range.to_a
102
89
  else
103
- range.begin.chr
90
+ [range.begin]
104
91
  end
105
92
  end
106
93
 
107
- "[#{groups.join}#{mb_chars.join}]"
94
+ "[#{groups.pack 'U*'}#{mb_chars.join}]"
108
95
  end
109
96
 
110
97
  def to_re_src
@@ -148,6 +135,7 @@ class Regexp
148
135
  # build trie
149
136
  a.each do |s|
150
137
  next if s.empty?
138
+ s = s.encode 'utf-8'
151
139
  t = trie
152
140
  s.chars.each do |c|
153
141
  c = Regexp.escape c
@@ -193,7 +181,10 @@ if __FILE__ == $PROGRAM_NAME
193
181
  %w[foobar fooabar foogabar] => /foo(?:|a|ga)bar/,
194
182
  %w[vax vcx vbx vdx] => /v[a-d]x/,
195
183
  %w[vax vcx vbx] => /v[abc]x/,
196
- %w[xa xc xb x] => /x[abc]?/
184
+ %w[xa xc xb x] => /x[abc]?/,
185
+ %w[一郎 二郎 三郎 四郎] => /[一三二四]郎/,
186
+ # caveats: escape chars can not be turned into char group
187
+ %w[a^ a- a*] => /a(?:\^|\-|\*)/,
197
188
  }.each do |a, r|
198
189
  l = Regexp.optimized_union a
199
190
  a.each do |s|
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_optimized_union
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
5
- prerelease:
4
+ version: 0.2.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - luikore
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-11-14 00:00:00.000000000 Z
11
+ date: 2016-07-11 00:00:00.000000000 Z
13
12
  dependencies: []
14
13
  description: Regexp.optimized_union(word_list, regexp_options) generates optimized
15
14
  regexp for matching union of word list
@@ -18,32 +17,33 @@ executables: []
18
17
  extensions: []
19
18
  extra_rdoc_files: []
20
19
  files:
20
+ - changes
21
21
  - lib/regexp_optimized_union.rb
22
22
  - readme.md
23
23
  homepage: https://github.com/luikore/regexp_optimized_union
24
24
  licenses:
25
25
  - WTFPL
26
+ metadata: {}
26
27
  post_install_message:
27
28
  rdoc_options: []
28
29
  require_paths:
29
30
  - lib
30
31
  required_ruby_version: !ruby/object:Gem::Requirement
31
- none: false
32
32
  requirements:
33
- - - ! '>='
33
+ - - ">="
34
34
  - !ruby/object:Gem::Version
35
35
  version: '0'
36
36
  required_rubygems_version: !ruby/object:Gem::Requirement
37
- none: false
38
37
  requirements:
39
- - - ! '>='
38
+ - - ">="
40
39
  - !ruby/object:Gem::Version
41
40
  version: 1.3.6
42
41
  requirements: []
43
42
  rubyforge_project:
44
- rubygems_version: 1.8.24
43
+ rubygems_version: 2.5.1
45
44
  signing_key:
46
- specification_version: 3
45
+ specification_version: 4
47
46
  summary: Regexp.optimized_union(word_list, regexp_options) generates optimized regexp
48
47
  for matching union of word list
49
48
  test_files: []
49
+ has_rdoc: