regexp_optimized_union 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/changes +3 -0
- data/lib/regexp_optimized_union.rb +12 -21
- metadata +9 -9
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 45abdaf8017a48216c2d3de78c008aef03e54a64
|
4
|
+
data.tar.gz: 90f26948c74bdcbc769f3f827fb0d601b533999b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 600fdd924917e83e8ce77067d3e28f83a1f219820c4a91967beb46854ac26d1c92cde44aca97c3d1ae19119d496be5e6a0e47089fcc42deb596bc1f26e3ebc75
|
7
|
+
data.tar.gz: 000e8b87b88482f6d7b3309ca85322f13dd4bce5c39b948680a7e38fd962bbbf64512fc4dbae99d47b95d3006fbb22fcb1b05761a272b73cf07f3ae9e6908d41
|
data/changes
ADDED
@@ -1,13 +1,4 @@
|
|
1
|
-
|
2
|
-
require 'enumerator'
|
3
|
-
class String
|
4
|
-
unless defined?(ord)
|
5
|
-
def ord
|
6
|
-
unpack('C').first
|
7
|
-
end
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
1
|
+
# coding: UTF-8
|
11
2
|
|
12
3
|
class Regexp
|
13
4
|
# trie for optimization
|
@@ -75,11 +66,7 @@ class Regexp
|
|
75
66
|
def build_char_group chars
|
76
67
|
return chars.first if chars.size == 1
|
77
68
|
|
78
|
-
|
79
|
-
chars, mb_chars = chars.partition{|c| c.bytesize == 1}
|
80
|
-
else
|
81
|
-
mb_chars = []
|
82
|
-
end
|
69
|
+
mb_chars = []
|
83
70
|
|
84
71
|
chars = chars.map(&:ord)
|
85
72
|
chars.sort!
|
@@ -93,18 +80,18 @@ class Regexp
|
|
93
80
|
end
|
94
81
|
end
|
95
82
|
|
96
|
-
groups.
|
83
|
+
groups = groups.flat_map do |range|
|
97
84
|
# only apply range to >= 4 contiguous chars
|
98
85
|
if range.end >= range.begin + 3
|
99
|
-
|
86
|
+
[range.begin, '-'.ord, range.end]
|
100
87
|
elsif range.end > range.begin
|
101
|
-
range.
|
88
|
+
range.to_a
|
102
89
|
else
|
103
|
-
range.begin
|
90
|
+
[range.begin]
|
104
91
|
end
|
105
92
|
end
|
106
93
|
|
107
|
-
"[#{groups.
|
94
|
+
"[#{groups.pack 'U*'}#{mb_chars.join}]"
|
108
95
|
end
|
109
96
|
|
110
97
|
def to_re_src
|
@@ -148,6 +135,7 @@ class Regexp
|
|
148
135
|
# build trie
|
149
136
|
a.each do |s|
|
150
137
|
next if s.empty?
|
138
|
+
s = s.encode 'utf-8'
|
151
139
|
t = trie
|
152
140
|
s.chars.each do |c|
|
153
141
|
c = Regexp.escape c
|
@@ -193,7 +181,10 @@ if __FILE__ == $PROGRAM_NAME
|
|
193
181
|
%w[foobar fooabar foogabar] => /foo(?:|a|ga)bar/,
|
194
182
|
%w[vax vcx vbx vdx] => /v[a-d]x/,
|
195
183
|
%w[vax vcx vbx] => /v[abc]x/,
|
196
|
-
%w[xa xc xb x] => /x[abc]
|
184
|
+
%w[xa xc xb x] => /x[abc]?/,
|
185
|
+
%w[一郎 二郎 三郎 四郎] => /[一三二四]郎/,
|
186
|
+
# caveats: escape chars can not be turned into char group
|
187
|
+
%w[a^ a- a*] => /a(?:\^|\-|\*)/,
|
197
188
|
}.each do |a, r|
|
198
189
|
l = Regexp.optimized_union a
|
199
190
|
a.each do |s|
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_optimized_union
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.2.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- luikore
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2016-07-11 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
13
|
description: Regexp.optimized_union(word_list, regexp_options) generates optimized
|
15
14
|
regexp for matching union of word list
|
@@ -18,32 +17,33 @@ executables: []
|
|
18
17
|
extensions: []
|
19
18
|
extra_rdoc_files: []
|
20
19
|
files:
|
20
|
+
- changes
|
21
21
|
- lib/regexp_optimized_union.rb
|
22
22
|
- readme.md
|
23
23
|
homepage: https://github.com/luikore/regexp_optimized_union
|
24
24
|
licenses:
|
25
25
|
- WTFPL
|
26
|
+
metadata: {}
|
26
27
|
post_install_message:
|
27
28
|
rdoc_options: []
|
28
29
|
require_paths:
|
29
30
|
- lib
|
30
31
|
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
-
none: false
|
32
32
|
requirements:
|
33
|
-
- -
|
33
|
+
- - ">="
|
34
34
|
- !ruby/object:Gem::Version
|
35
35
|
version: '0'
|
36
36
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
-
none: false
|
38
37
|
requirements:
|
39
|
-
- -
|
38
|
+
- - ">="
|
40
39
|
- !ruby/object:Gem::Version
|
41
40
|
version: 1.3.6
|
42
41
|
requirements: []
|
43
42
|
rubyforge_project:
|
44
|
-
rubygems_version:
|
43
|
+
rubygems_version: 2.5.1
|
45
44
|
signing_key:
|
46
|
-
specification_version:
|
45
|
+
specification_version: 4
|
47
46
|
summary: Regexp.optimized_union(word_list, regexp_options) generates optimized regexp
|
48
47
|
for matching union of word list
|
49
48
|
test_files: []
|
49
|
+
has_rdoc:
|