regexp_trie 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +4 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +40 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/example/benchmark.rb +90 -0
- data/example/hatena-keyword-list.csv +454722 -0
- data/example/synopsis.rb +4 -0
- data/lib/regexp_trie/version.rb +3 -0
- data/lib/regexp_trie.rb +81 -0
- data/regexp_trie.gemspec +28 -0
- metadata +115 -0
data/example/synopsis.rb
ADDED
data/lib/regexp_trie.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
require_relative "regexp_trie/version"
|
2
|
+
|
3
|
+
class RegexpTrie
|
4
|
+
|
5
|
+
# @param [Array<String>] strings Set of patterns
|
6
|
+
# @param [Fixnum,Boolean] option The second argument of Regexp.new()
|
7
|
+
# @return [Regexp]
|
8
|
+
def self.union(*strings, option: nil)
|
9
|
+
rt = new
|
10
|
+
strings.flatten.each do |arg|
|
11
|
+
rt.add(arg)
|
12
|
+
end
|
13
|
+
rt.to_regexp(option)
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@head = {}
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param [String] pattern
|
21
|
+
def add(str)
|
22
|
+
return self unless str && str.size > 0
|
23
|
+
|
24
|
+
entry = @head
|
25
|
+
str.each_char do |c|
|
26
|
+
entry[c] ||= {}
|
27
|
+
entry = entry[c]
|
28
|
+
end
|
29
|
+
entry[''] = true # terminator
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [Regexp]
|
34
|
+
def to_regexp(option = nil)
|
35
|
+
if @head.empty?
|
36
|
+
Regexp.union
|
37
|
+
else
|
38
|
+
Regexp.new(build(@head), option)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def build(entry)
|
45
|
+
return nil if entry[''] && entry.size == 1
|
46
|
+
|
47
|
+
alt = []
|
48
|
+
cc = []
|
49
|
+
q = false
|
50
|
+
|
51
|
+
entry.keys.each do |c|
|
52
|
+
if entry[c].kind_of?(Hash)
|
53
|
+
recurse = build(entry[c])
|
54
|
+
qc = Regexp.quote(c)
|
55
|
+
if recurse
|
56
|
+
alt.push(qc + recurse)
|
57
|
+
else
|
58
|
+
cc.push(qc)
|
59
|
+
end
|
60
|
+
else
|
61
|
+
q = true
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
cconly = alt.empty?
|
66
|
+
unless cc.empty?
|
67
|
+
alt.push(cc.size == 1 ? cc.first : "[#{cc.join('')}]")
|
68
|
+
end
|
69
|
+
|
70
|
+
result = alt.size == 1 ? alt.first : "(?:#{alt.join('|')})"
|
71
|
+
if q
|
72
|
+
if cconly
|
73
|
+
"#{result}?"
|
74
|
+
else
|
75
|
+
"(?:#{$result})?"
|
76
|
+
end
|
77
|
+
else
|
78
|
+
result
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/regexp_trie.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'regexp_trie/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "regexp_trie"
|
8
|
+
spec.version = RegexpTrie::VERSION
|
9
|
+
spec.authors = ["FUJI Goro (gfx)"]
|
10
|
+
spec.email = ["gfuji@cpan.org"]
|
11
|
+
|
12
|
+
spec.summary = %q{Optimized Regexp builder with Trie}
|
13
|
+
spec.description = %q{Optimized Regexp builder with Trie as a port of Perl's Regexp::Trie}
|
14
|
+
spec.homepage = "https://github.com/gfx/ruby-regexp_trie"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.metadata['allowed_push_host'] = 'https://rubygems.org/'
|
18
|
+
|
19
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.11"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
27
|
+
spec.add_development_dependency "minitest-power_assert"
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: regexp_trie
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- FUJI Goro (gfx)
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-01-22 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.11'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.11'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest-power_assert
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Optimized Regexp builder with Trie as a port of Perl's Regexp::Trie
|
70
|
+
email:
|
71
|
+
- gfuji@cpan.org
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- ".travis.yml"
|
78
|
+
- Gemfile
|
79
|
+
- LICENSE.txt
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- bin/console
|
83
|
+
- bin/setup
|
84
|
+
- example/benchmark.rb
|
85
|
+
- example/hatena-keyword-list.csv
|
86
|
+
- example/synopsis.rb
|
87
|
+
- lib/regexp_trie.rb
|
88
|
+
- lib/regexp_trie/version.rb
|
89
|
+
- regexp_trie.gemspec
|
90
|
+
homepage: https://github.com/gfx/ruby-regexp_trie
|
91
|
+
licenses:
|
92
|
+
- MIT
|
93
|
+
metadata:
|
94
|
+
allowed_push_host: https://rubygems.org/
|
95
|
+
post_install_message:
|
96
|
+
rdoc_options: []
|
97
|
+
require_paths:
|
98
|
+
- lib
|
99
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
106
|
+
- - ">="
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
requirements: []
|
110
|
+
rubyforge_project:
|
111
|
+
rubygems_version: 2.5.1
|
112
|
+
signing_key:
|
113
|
+
specification_version: 4
|
114
|
+
summary: Optimized Regexp builder with Trie
|
115
|
+
test_files: []
|