aho_corasick 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/aho_corasick.rb +13 -2
- data/lib/aho_corasick/version.rb +3 -0
- metadata +24 -33
- data/Gemfile +0 -8
- data/Gemfile.lock +0 -14
- data/Rakefile +0 -79
- data/spec/aho_corasick_spec.rb +0 -56
- data/spec/spec_helper.rb +0 -8
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 9f6e421348a6325c2d72bfa6ea3719a160c00c8f
|
4
|
+
data.tar.gz: 4beff5dc400588ce17788d05ac73ac6feec2d35d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bfd393d096029c9b352bca77c21dc7dec3423be0245c78724452dba61f626083bde2477a36a8ea06e02d317611d09440802adfb3fe1a37e3fd0bdd2c3a1e9704
|
7
|
+
data.tar.gz: de26ede37c3fcde55786743316251669c3f5a0a9894f4be324c9d8b221f9d80972b072bee474297df301ab246062955324875f85cc5d6f6ea3d788340855d97b
|
data/lib/aho_corasick.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
class AhoCorasick
|
2
|
-
def initialize(*
|
2
|
+
def initialize(*args)
|
3
|
+
terms = terms_for(args)
|
3
4
|
@root = TreeNode.new
|
4
5
|
unsafe_insert(terms)
|
5
6
|
create_suffix_links
|
@@ -15,13 +16,22 @@ class AhoCorasick
|
|
15
16
|
return matches
|
16
17
|
end
|
17
18
|
|
18
|
-
def insert(*
|
19
|
+
def insert(*args)
|
20
|
+
terms = terms_for(args)
|
19
21
|
unsafe_insert(terms)
|
20
22
|
create_suffix_links
|
21
23
|
end
|
22
24
|
|
23
25
|
private
|
24
26
|
|
27
|
+
def terms_for(args)
|
28
|
+
if args.length == 1 && args[0].is_a?(Array)
|
29
|
+
args[0]
|
30
|
+
else
|
31
|
+
args
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
25
35
|
def unsafe_insert(terms)
|
26
36
|
terms.each do |t|
|
27
37
|
t.each_char.inject(@root) {|node, char| node.child_for(char.to_sym) }.add_match(t)
|
@@ -65,3 +75,4 @@ class AhoCorasick
|
|
65
75
|
|
66
76
|
end
|
67
77
|
end
|
78
|
+
require 'aho_corasick/version'
|
metadata
CHANGED
@@ -1,71 +1,62 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aho_corasick
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Tim Cowlishaw
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-10-02 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rspec
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version: '
|
19
|
+
version: '3.1'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
25
|
-
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.1'
|
27
|
+
description: An algorithm that allows searching for members of a known set of strings
|
28
|
+
appearing as substrings of a larger string in time linear to both the size of the
|
29
|
+
string and the size of the set
|
26
30
|
email: tim@timcowlishaw.co.uk
|
27
31
|
executables: []
|
28
32
|
extensions: []
|
29
|
-
extra_rdoc_files:
|
30
|
-
- README.md
|
33
|
+
extra_rdoc_files: []
|
31
34
|
files:
|
32
|
-
- Gemfile
|
33
|
-
- Gemfile.lock
|
34
|
-
- Rakefile
|
35
35
|
- README.md
|
36
|
-
- spec/aho_corasick_spec.rb
|
37
|
-
- spec/spec_helper.rb
|
38
36
|
- lib/aho_corasick.rb
|
39
|
-
|
40
|
-
|
37
|
+
- lib/aho_corasick/version.rb
|
38
|
+
homepage: http://github.com/timcowlishaw/aho_corasick
|
39
|
+
licenses:
|
40
|
+
- MIT
|
41
|
+
metadata: {}
|
41
42
|
post_install_message:
|
42
|
-
rdoc_options:
|
43
|
-
- --main
|
44
|
-
- README.md
|
43
|
+
rdoc_options: []
|
45
44
|
require_paths:
|
46
45
|
- lib
|
47
46
|
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
-
none: false
|
49
47
|
requirements:
|
50
|
-
- -
|
48
|
+
- - ">="
|
51
49
|
- !ruby/object:Gem::Version
|
52
50
|
version: '0'
|
53
|
-
segments:
|
54
|
-
- 0
|
55
|
-
hash: 3276065860397495026
|
56
51
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
52
|
requirements:
|
59
|
-
- -
|
53
|
+
- - ">="
|
60
54
|
- !ruby/object:Gem::Version
|
61
55
|
version: '0'
|
62
|
-
segments:
|
63
|
-
- 0
|
64
|
-
hash: 3276065860397495026
|
65
56
|
requirements: []
|
66
57
|
rubyforge_project:
|
67
|
-
rubygems_version:
|
58
|
+
rubygems_version: 2.2.2
|
68
59
|
signing_key:
|
69
|
-
specification_version:
|
60
|
+
specification_version: 4
|
70
61
|
summary: The Aho-Corasick string-matching algorithm
|
71
62
|
test_files: []
|
data/Gemfile
DELETED
data/Gemfile.lock
DELETED
data/Rakefile
DELETED
@@ -1,79 +0,0 @@
|
|
1
|
-
require 'bundler/setup'
|
2
|
-
require "rubygems/package_task"
|
3
|
-
require "rdoc/task"
|
4
|
-
require 'rake/testtask'
|
5
|
-
|
6
|
-
task :default => :test
|
7
|
-
task :spec => :test
|
8
|
-
|
9
|
-
Rake::TestTask.new do |t|
|
10
|
-
t.pattern = "spec/*_spec.rb"
|
11
|
-
end
|
12
|
-
|
13
|
-
# This builds the actual gem. For details of what all these options
|
14
|
-
# mean, and other ones you can add, check the documentation here:
|
15
|
-
#
|
16
|
-
# http://rubygems.org/read/chapter/20
|
17
|
-
#
|
18
|
-
spec = Gem::Specification.new do |s|
|
19
|
-
|
20
|
-
# Change these as appropriate
|
21
|
-
s.name = "aho_corasick"
|
22
|
-
s.version = "0.0.2"
|
23
|
-
s.summary = "The Aho-Corasick string-matching algorithm"
|
24
|
-
s.author = "Tim Cowlishaw"
|
25
|
-
s.email = "tim@timcowlishaw.co.uk"
|
26
|
-
s.homepage = "http://github.com/likely/aho_corasick"
|
27
|
-
|
28
|
-
s.has_rdoc = true
|
29
|
-
s.extra_rdoc_files = %w(README.md)
|
30
|
-
s.rdoc_options = %w(--main README.md)
|
31
|
-
|
32
|
-
# Add any extra files to include in the gem
|
33
|
-
s.files = %w(Gemfile Gemfile.lock Rakefile README.md Gemfile) + Dir.glob("{spec,lib}/**/*")
|
34
|
-
s.require_paths = ["lib"]
|
35
|
-
|
36
|
-
# If you want to depend on other gems, add them here, along with any
|
37
|
-
# relevant versions
|
38
|
-
#s.add_dependency("eventmachine")
|
39
|
-
|
40
|
-
# If your tests use any gems, include them here
|
41
|
-
s.add_development_dependency("rspec")
|
42
|
-
end
|
43
|
-
|
44
|
-
# This task actually builds the gem. We also regenerate a static
|
45
|
-
# .gemspec file, which is useful if something (i.e. GitHub) will
|
46
|
-
# be automatically building a gem for this project. If you're not
|
47
|
-
# using GitHub, edit as appropriate.
|
48
|
-
#
|
49
|
-
# To publish your gem online, install the 'gemcutter' gem; Read more
|
50
|
-
# about that here: http://gemcutter.org/pages/gem_docs
|
51
|
-
Gem::PackageTask.new(spec) do |pkg|
|
52
|
-
pkg.gem_spec = spec
|
53
|
-
end
|
54
|
-
|
55
|
-
desc "Build the gemspec file #{spec.name}.gemspec"
|
56
|
-
task :gemspec do
|
57
|
-
file = File.dirname(__FILE__) + "/#{spec.name}.gemspec"
|
58
|
-
File.open(file, "w") {|f| f << spec.to_ruby }
|
59
|
-
end
|
60
|
-
|
61
|
-
# If you don't want to generate the .gemspec file, just remove this line. Reasons
|
62
|
-
# why you might want to generate a gemspec:
|
63
|
-
# - using bundler with a git source
|
64
|
-
# - building the gem without rake (i.e. gem build blah.gemspec)
|
65
|
-
# - maybe others?
|
66
|
-
task :package => :gemspec
|
67
|
-
|
68
|
-
# Generate documentation
|
69
|
-
RDoc::Task.new do |rd|
|
70
|
-
rd.main = "README.markdown"
|
71
|
-
rd.rdoc_files.include("README.markdown", "lib/**/*.rb")
|
72
|
-
rd.rdoc_dir = "rdoc"
|
73
|
-
end
|
74
|
-
|
75
|
-
desc 'Clear out RDoc and generated packages'
|
76
|
-
task :clean => [:clobber_rdoc, :clobber_package] do
|
77
|
-
rm "#{spec.name}.gemspec"
|
78
|
-
end
|
79
|
-
|
data/spec/aho_corasick_spec.rb
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
$: << File.dirname(__FILE__)
|
2
|
-
require 'spec_helper'
|
3
|
-
|
4
|
-
describe "AhoCorasick" do
|
5
|
-
it "returns matched substrings" do
|
6
|
-
a = AhoCorasick.new("ab")
|
7
|
-
a.match("abcde").must_include("ab")
|
8
|
-
end
|
9
|
-
|
10
|
-
it "returns multiple matched substrings" do
|
11
|
-
a = AhoCorasick.new("ab", "cd")
|
12
|
-
a.match("cd123ab").to_set.must_equal ["ab", "cd"].to_set
|
13
|
-
end
|
14
|
-
|
15
|
-
it "returns overlapping matched substrings" do
|
16
|
-
a = AhoCorasick.new("ab", "bc")
|
17
|
-
a.match("abc").to_set.must_equal ["ab", "bc"].to_set
|
18
|
-
end
|
19
|
-
|
20
|
-
it "does not return unmatched substrings" do
|
21
|
-
a = AhoCorasick.new("ab")
|
22
|
-
a.match("abc").wont_include("bc")
|
23
|
-
end
|
24
|
-
|
25
|
-
it "matches adjacent terms" do
|
26
|
-
a = AhoCorasick.new("ab", "cd")
|
27
|
-
a.match("abcd").to_set.must_equal ["ab", "cd"].to_set
|
28
|
-
end
|
29
|
-
|
30
|
-
it "matches terms directly following a partial match" do
|
31
|
-
a = AhoCorasick.new("abc", "de")
|
32
|
-
a.match("ade").must_include("de")
|
33
|
-
end
|
34
|
-
|
35
|
-
it "returns terms added to the matcher after instantiation" do
|
36
|
-
a = AhoCorasick.new("ab")
|
37
|
-
a.insert("cd", "ef")
|
38
|
-
a.match("ab12cd12ef").to_set.must_equal ["ab", "cd", "ef"].to_set
|
39
|
-
end
|
40
|
-
|
41
|
-
describe "benchmarks" do
|
42
|
-
|
43
|
-
before do
|
44
|
-
words = 1000.times.map { rand(6).times.inject("") {|s,_| s + (65 + rand(26)).chr}}
|
45
|
-
@matcher = AhoCorasick.new(*words)
|
46
|
-
end
|
47
|
-
|
48
|
-
bench_performance_linear "string matching" do |n|
|
49
|
-
10.times do
|
50
|
-
string = n.times.inject("") {|s, _| s + (65 + rand(26)).chr }
|
51
|
-
@matcher.match(string)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
end
|