regexp_trie 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +4 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +40 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/example/benchmark.rb +90 -0
- data/example/hatena-keyword-list.csv +454722 -0
- data/example/synopsis.rb +4 -0
- data/lib/regexp_trie/version.rb +3 -0
- data/lib/regexp_trie.rb +81 -0
- data/regexp_trie.gemspec +28 -0
- metadata +115 -0
data/example/synopsis.rb
ADDED
data/lib/regexp_trie.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
require_relative "regexp_trie/version"
|
2
|
+
|
3
|
+
class RegexpTrie
|
4
|
+
|
5
|
+
# @param [Array<String>] strings Set of patterns
|
6
|
+
# @param [Fixnum,Boolean] option The second argument of Regexp.new()
|
7
|
+
# @return [Regexp]
|
8
|
+
def self.union(*strings, option: nil)
|
9
|
+
rt = new
|
10
|
+
strings.flatten.each do |arg|
|
11
|
+
rt.add(arg)
|
12
|
+
end
|
13
|
+
rt.to_regexp(option)
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@head = {}
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param [String] pattern
|
21
|
+
def add(str)
|
22
|
+
return self unless str && str.size > 0
|
23
|
+
|
24
|
+
entry = @head
|
25
|
+
str.each_char do |c|
|
26
|
+
entry[c] ||= {}
|
27
|
+
entry = entry[c]
|
28
|
+
end
|
29
|
+
entry[''] = true # terminator
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [Regexp]
|
34
|
+
def to_regexp(option = nil)
|
35
|
+
if @head.empty?
|
36
|
+
Regexp.union
|
37
|
+
else
|
38
|
+
Regexp.new(build(@head), option)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def build(entry)
|
45
|
+
return nil if entry[''] && entry.size == 1
|
46
|
+
|
47
|
+
alt = []
|
48
|
+
cc = []
|
49
|
+
q = false
|
50
|
+
|
51
|
+
entry.keys.each do |c|
|
52
|
+
if entry[c].kind_of?(Hash)
|
53
|
+
recurse = build(entry[c])
|
54
|
+
qc = Regexp.quote(c)
|
55
|
+
if recurse
|
56
|
+
alt.push(qc + recurse)
|
57
|
+
else
|
58
|
+
cc.push(qc)
|
59
|
+
end
|
60
|
+
else
|
61
|
+
q = true
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
cconly = alt.empty?
|
66
|
+
unless cc.empty?
|
67
|
+
alt.push(cc.size == 1 ? cc.first : "[#{cc.join('')}]")
|
68
|
+
end
|
69
|
+
|
70
|
+
result = alt.size == 1 ? alt.first : "(?:#{alt.join('|')})"
|
71
|
+
if q
|
72
|
+
if cconly
|
73
|
+
"#{result}?"
|
74
|
+
else
|
75
|
+
"(?:#{$result})?"
|
76
|
+
end
|
77
|
+
else
|
78
|
+
result
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/regexp_trie.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'regexp_trie/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "regexp_trie"
|
8
|
+
spec.version = RegexpTrie::VERSION
|
9
|
+
spec.authors = ["FUJI Goro (gfx)"]
|
10
|
+
spec.email = ["gfuji@cpan.org"]
|
11
|
+
|
12
|
+
spec.summary = %q{Optimized Regexp builder with Trie}
|
13
|
+
spec.description = %q{Optimized Regexp builder with Trie as a port of Perl's Regexp::Trie}
|
14
|
+
spec.homepage = "https://github.com/gfx/ruby-regexp_trie"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.metadata['allowed_push_host'] = 'https://rubygems.org/'
|
18
|
+
|
19
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.11"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
27
|
+
spec.add_development_dependency "minitest-power_assert"
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,115 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: regexp_trie
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- FUJI Goro (gfx)
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-01-22 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.11'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.11'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest-power_assert
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Optimized Regexp builder with Trie as a port of Perl's Regexp::Trie
|
70
|
+
email:
|
71
|
+
- gfuji@cpan.org
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- ".travis.yml"
|
78
|
+
- Gemfile
|
79
|
+
- LICENSE.txt
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- bin/console
|
83
|
+
- bin/setup
|
84
|
+
- example/benchmark.rb
|
85
|
+
- example/hatena-keyword-list.csv
|
86
|
+
- example/synopsis.rb
|
87
|
+
- lib/regexp_trie.rb
|
88
|
+
- lib/regexp_trie/version.rb
|
89
|
+
- regexp_trie.gemspec
|
90
|
+
homepage: https://github.com/gfx/ruby-regexp_trie
|
91
|
+
licenses:
|
92
|
+
- MIT
|
93
|
+
metadata:
|
94
|
+
allowed_push_host: https://rubygems.org/
|
95
|
+
post_install_message:
|
96
|
+
rdoc_options: []
|
97
|
+
require_paths:
|
98
|
+
- lib
|
99
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
106
|
+
- - ">="
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
requirements: []
|
110
|
+
rubyforge_project:
|
111
|
+
rubygems_version: 2.5.1
|
112
|
+
signing_key:
|
113
|
+
specification_version: 4
|
114
|
+
summary: Optimized Regexp builder with Trie
|
115
|
+
test_files: []
|