regexp_optimized_union 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/changes +3 -0
- data/lib/regexp_optimized_union.rb +12 -21
- metadata +9 -9
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 45abdaf8017a48216c2d3de78c008aef03e54a64
|
4
|
+
data.tar.gz: 90f26948c74bdcbc769f3f827fb0d601b533999b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 600fdd924917e83e8ce77067d3e28f83a1f219820c4a91967beb46854ac26d1c92cde44aca97c3d1ae19119d496be5e6a0e47089fcc42deb596bc1f26e3ebc75
|
7
|
+
data.tar.gz: 000e8b87b88482f6d7b3309ca85322f13dd4bce5c39b948680a7e38fd962bbbf64512fc4dbae99d47b95d3006fbb22fcb1b05761a272b73cf07f3ae9e6908d41
|
data/changes
ADDED
@@ -1,13 +1,4 @@
|
|
1
|
-
|
2
|
-
require 'enumerator'
|
3
|
-
class String
|
4
|
-
unless defined?(ord)
|
5
|
-
def ord
|
6
|
-
unpack('C').first
|
7
|
-
end
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
1
|
+
# coding: UTF-8
|
11
2
|
|
12
3
|
class Regexp
|
13
4
|
# trie for optimization
|
@@ -75,11 +66,7 @@ class Regexp
|
|
75
66
|
def build_char_group chars
|
76
67
|
return chars.first if chars.size == 1
|
77
68
|
|
78
|
-
|
79
|
-
chars, mb_chars = chars.partition{|c| c.bytesize == 1}
|
80
|
-
else
|
81
|
-
mb_chars = []
|
82
|
-
end
|
69
|
+
mb_chars = []
|
83
70
|
|
84
71
|
chars = chars.map(&:ord)
|
85
72
|
chars.sort!
|
@@ -93,18 +80,18 @@ class Regexp
|
|
93
80
|
end
|
94
81
|
end
|
95
82
|
|
96
|
-
groups.
|
83
|
+
groups = groups.flat_map do |range|
|
97
84
|
# only apply range to >= 4 contiguous chars
|
98
85
|
if range.end >= range.begin + 3
|
99
|
-
|
86
|
+
[range.begin, '-'.ord, range.end]
|
100
87
|
elsif range.end > range.begin
|
101
|
-
range.
|
88
|
+
range.to_a
|
102
89
|
else
|
103
|
-
range.begin
|
90
|
+
[range.begin]
|
104
91
|
end
|
105
92
|
end
|
106
93
|
|
107
|
-
"[#{groups.
|
94
|
+
"[#{groups.pack 'U*'}#{mb_chars.join}]"
|
108
95
|
end
|
109
96
|
|
110
97
|
def to_re_src
|
@@ -148,6 +135,7 @@ class Regexp
|
|
148
135
|
# build trie
|
149
136
|
a.each do |s|
|
150
137
|
next if s.empty?
|
138
|
+
s = s.encode 'utf-8'
|
151
139
|
t = trie
|
152
140
|
s.chars.each do |c|
|
153
141
|
c = Regexp.escape c
|
@@ -193,7 +181,10 @@ if __FILE__ == $PROGRAM_NAME
|
|
193
181
|
%w[foobar fooabar foogabar] => /foo(?:|a|ga)bar/,
|
194
182
|
%w[vax vcx vbx vdx] => /v[a-d]x/,
|
195
183
|
%w[vax vcx vbx] => /v[abc]x/,
|
196
|
-
%w[xa xc xb x] => /x[abc]
|
184
|
+
%w[xa xc xb x] => /x[abc]?/,
|
185
|
+
%w[一郎 二郎 三郎 四郎] => /[一三二四]郎/,
|
186
|
+
# caveats: escape chars can not be turned into char group
|
187
|
+
%w[a^ a- a*] => /a(?:\^|\-|\*)/,
|
197
188
|
}.each do |a, r|
|
198
189
|
l = Regexp.optimized_union a
|
199
190
|
a.each do |s|
|
metadata
CHANGED
@@ -1,15 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_optimized_union
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.2.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- luikore
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2016-07-11 00:00:00.000000000 Z
|
13
12
|
dependencies: []
|
14
13
|
description: Regexp.optimized_union(word_list, regexp_options) generates optimized
|
15
14
|
regexp for matching union of word list
|
@@ -18,32 +17,33 @@ executables: []
|
|
18
17
|
extensions: []
|
19
18
|
extra_rdoc_files: []
|
20
19
|
files:
|
20
|
+
- changes
|
21
21
|
- lib/regexp_optimized_union.rb
|
22
22
|
- readme.md
|
23
23
|
homepage: https://github.com/luikore/regexp_optimized_union
|
24
24
|
licenses:
|
25
25
|
- WTFPL
|
26
|
+
metadata: {}
|
26
27
|
post_install_message:
|
27
28
|
rdoc_options: []
|
28
29
|
require_paths:
|
29
30
|
- lib
|
30
31
|
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
-
none: false
|
32
32
|
requirements:
|
33
|
-
- -
|
33
|
+
- - ">="
|
34
34
|
- !ruby/object:Gem::Version
|
35
35
|
version: '0'
|
36
36
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
-
none: false
|
38
37
|
requirements:
|
39
|
-
- -
|
38
|
+
- - ">="
|
40
39
|
- !ruby/object:Gem::Version
|
41
40
|
version: 1.3.6
|
42
41
|
requirements: []
|
43
42
|
rubyforge_project:
|
44
|
-
rubygems_version:
|
43
|
+
rubygems_version: 2.5.1
|
45
44
|
signing_key:
|
46
|
-
specification_version:
|
45
|
+
specification_version: 4
|
47
46
|
summary: Regexp.optimized_union(word_list, regexp_options) generates optimized regexp
|
48
47
|
for matching union of word list
|
49
48
|
test_files: []
|
49
|
+
has_rdoc:
|