aho_corasick 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/aho_corasick.rb +13 -2
- data/lib/aho_corasick/version.rb +3 -0
- metadata +24 -33
- data/Gemfile +0 -8
- data/Gemfile.lock +0 -14
- data/Rakefile +0 -79
- data/spec/aho_corasick_spec.rb +0 -56
- data/spec/spec_helper.rb +0 -8
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 9f6e421348a6325c2d72bfa6ea3719a160c00c8f
|
4
|
+
data.tar.gz: 4beff5dc400588ce17788d05ac73ac6feec2d35d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bfd393d096029c9b352bca77c21dc7dec3423be0245c78724452dba61f626083bde2477a36a8ea06e02d317611d09440802adfb3fe1a37e3fd0bdd2c3a1e9704
|
7
|
+
data.tar.gz: de26ede37c3fcde55786743316251669c3f5a0a9894f4be324c9d8b221f9d80972b072bee474297df301ab246062955324875f85cc5d6f6ea3d788340855d97b
|
data/lib/aho_corasick.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
class AhoCorasick
|
2
|
-
def initialize(*
|
2
|
+
def initialize(*args)
|
3
|
+
terms = terms_for(args)
|
3
4
|
@root = TreeNode.new
|
4
5
|
unsafe_insert(terms)
|
5
6
|
create_suffix_links
|
@@ -15,13 +16,22 @@ class AhoCorasick
|
|
15
16
|
return matches
|
16
17
|
end
|
17
18
|
|
18
|
-
def insert(*
|
19
|
+
def insert(*args)
|
20
|
+
terms = terms_for(args)
|
19
21
|
unsafe_insert(terms)
|
20
22
|
create_suffix_links
|
21
23
|
end
|
22
24
|
|
23
25
|
private
|
24
26
|
|
27
|
+
def terms_for(args)
|
28
|
+
if args.length == 1 && args[0].is_a?(Array)
|
29
|
+
args[0]
|
30
|
+
else
|
31
|
+
args
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
25
35
|
def unsafe_insert(terms)
|
26
36
|
terms.each do |t|
|
27
37
|
t.each_char.inject(@root) {|node, char| node.child_for(char.to_sym) }.add_match(t)
|
@@ -65,3 +75,4 @@ class AhoCorasick
|
|
65
75
|
|
66
76
|
end
|
67
77
|
end
|
78
|
+
require 'aho_corasick/version'
|
metadata
CHANGED
@@ -1,71 +1,62 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aho_corasick
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 0.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Tim Cowlishaw
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-10-02 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rspec
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version: '
|
19
|
+
version: '3.1'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
25
|
-
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.1'
|
27
|
+
description: An algorithm that allows searching for members of a known set of strings
|
28
|
+
appearing as substrings of a larger string in time linear to both the size of the
|
29
|
+
string and the size of the set
|
26
30
|
email: tim@timcowlishaw.co.uk
|
27
31
|
executables: []
|
28
32
|
extensions: []
|
29
|
-
extra_rdoc_files:
|
30
|
-
- README.md
|
33
|
+
extra_rdoc_files: []
|
31
34
|
files:
|
32
|
-
- Gemfile
|
33
|
-
- Gemfile.lock
|
34
|
-
- Rakefile
|
35
35
|
- README.md
|
36
|
-
- spec/aho_corasick_spec.rb
|
37
|
-
- spec/spec_helper.rb
|
38
36
|
- lib/aho_corasick.rb
|
39
|
-
|
40
|
-
|
37
|
+
- lib/aho_corasick/version.rb
|
38
|
+
homepage: http://github.com/timcowlishaw/aho_corasick
|
39
|
+
licenses:
|
40
|
+
- MIT
|
41
|
+
metadata: {}
|
41
42
|
post_install_message:
|
42
|
-
rdoc_options:
|
43
|
-
- --main
|
44
|
-
- README.md
|
43
|
+
rdoc_options: []
|
45
44
|
require_paths:
|
46
45
|
- lib
|
47
46
|
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
-
none: false
|
49
47
|
requirements:
|
50
|
-
- -
|
48
|
+
- - ">="
|
51
49
|
- !ruby/object:Gem::Version
|
52
50
|
version: '0'
|
53
|
-
segments:
|
54
|
-
- 0
|
55
|
-
hash: 3276065860397495026
|
56
51
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
52
|
requirements:
|
59
|
-
- -
|
53
|
+
- - ">="
|
60
54
|
- !ruby/object:Gem::Version
|
61
55
|
version: '0'
|
62
|
-
segments:
|
63
|
-
- 0
|
64
|
-
hash: 3276065860397495026
|
65
56
|
requirements: []
|
66
57
|
rubyforge_project:
|
67
|
-
rubygems_version:
|
58
|
+
rubygems_version: 2.2.2
|
68
59
|
signing_key:
|
69
|
-
specification_version:
|
60
|
+
specification_version: 4
|
70
61
|
summary: The Aho-Corasick string-matching algorithm
|
71
62
|
test_files: []
|
data/Gemfile
DELETED
data/Gemfile.lock
DELETED
data/Rakefile
DELETED
@@ -1,79 +0,0 @@
|
|
1
|
-
require 'bundler/setup'
|
2
|
-
require "rubygems/package_task"
|
3
|
-
require "rdoc/task"
|
4
|
-
require 'rake/testtask'
|
5
|
-
|
6
|
-
task :default => :test
|
7
|
-
task :spec => :test
|
8
|
-
|
9
|
-
Rake::TestTask.new do |t|
|
10
|
-
t.pattern = "spec/*_spec.rb"
|
11
|
-
end
|
12
|
-
|
13
|
-
# This builds the actual gem. For details of what all these options
|
14
|
-
# mean, and other ones you can add, check the documentation here:
|
15
|
-
#
|
16
|
-
# http://rubygems.org/read/chapter/20
|
17
|
-
#
|
18
|
-
spec = Gem::Specification.new do |s|
|
19
|
-
|
20
|
-
# Change these as appropriate
|
21
|
-
s.name = "aho_corasick"
|
22
|
-
s.version = "0.0.2"
|
23
|
-
s.summary = "The Aho-Corasick string-matching algorithm"
|
24
|
-
s.author = "Tim Cowlishaw"
|
25
|
-
s.email = "tim@timcowlishaw.co.uk"
|
26
|
-
s.homepage = "http://github.com/likely/aho_corasick"
|
27
|
-
|
28
|
-
s.has_rdoc = true
|
29
|
-
s.extra_rdoc_files = %w(README.md)
|
30
|
-
s.rdoc_options = %w(--main README.md)
|
31
|
-
|
32
|
-
# Add any extra files to include in the gem
|
33
|
-
s.files = %w(Gemfile Gemfile.lock Rakefile README.md Gemfile) + Dir.glob("{spec,lib}/**/*")
|
34
|
-
s.require_paths = ["lib"]
|
35
|
-
|
36
|
-
# If you want to depend on other gems, add them here, along with any
|
37
|
-
# relevant versions
|
38
|
-
#s.add_dependency("eventmachine")
|
39
|
-
|
40
|
-
# If your tests use any gems, include them here
|
41
|
-
s.add_development_dependency("rspec")
|
42
|
-
end
|
43
|
-
|
44
|
-
# This task actually builds the gem. We also regenerate a static
|
45
|
-
# .gemspec file, which is useful if something (i.e. GitHub) will
|
46
|
-
# be automatically building a gem for this project. If you're not
|
47
|
-
# using GitHub, edit as appropriate.
|
48
|
-
#
|
49
|
-
# To publish your gem online, install the 'gemcutter' gem; Read more
|
50
|
-
# about that here: http://gemcutter.org/pages/gem_docs
|
51
|
-
Gem::PackageTask.new(spec) do |pkg|
|
52
|
-
pkg.gem_spec = spec
|
53
|
-
end
|
54
|
-
|
55
|
-
desc "Build the gemspec file #{spec.name}.gemspec"
|
56
|
-
task :gemspec do
|
57
|
-
file = File.dirname(__FILE__) + "/#{spec.name}.gemspec"
|
58
|
-
File.open(file, "w") {|f| f << spec.to_ruby }
|
59
|
-
end
|
60
|
-
|
61
|
-
# If you don't want to generate the .gemspec file, just remove this line. Reasons
|
62
|
-
# why you might want to generate a gemspec:
|
63
|
-
# - using bundler with a git source
|
64
|
-
# - building the gem without rake (i.e. gem build blah.gemspec)
|
65
|
-
# - maybe others?
|
66
|
-
task :package => :gemspec
|
67
|
-
|
68
|
-
# Generate documentation
|
69
|
-
RDoc::Task.new do |rd|
|
70
|
-
rd.main = "README.markdown"
|
71
|
-
rd.rdoc_files.include("README.markdown", "lib/**/*.rb")
|
72
|
-
rd.rdoc_dir = "rdoc"
|
73
|
-
end
|
74
|
-
|
75
|
-
desc 'Clear out RDoc and generated packages'
|
76
|
-
task :clean => [:clobber_rdoc, :clobber_package] do
|
77
|
-
rm "#{spec.name}.gemspec"
|
78
|
-
end
|
79
|
-
|
data/spec/aho_corasick_spec.rb
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
$: << File.dirname(__FILE__)
|
2
|
-
require 'spec_helper'
|
3
|
-
|
4
|
-
describe "AhoCorasick" do
|
5
|
-
it "returns matched substrings" do
|
6
|
-
a = AhoCorasick.new("ab")
|
7
|
-
a.match("abcde").must_include("ab")
|
8
|
-
end
|
9
|
-
|
10
|
-
it "returns multiple matched substrings" do
|
11
|
-
a = AhoCorasick.new("ab", "cd")
|
12
|
-
a.match("cd123ab").to_set.must_equal ["ab", "cd"].to_set
|
13
|
-
end
|
14
|
-
|
15
|
-
it "returns overlapping matched substrings" do
|
16
|
-
a = AhoCorasick.new("ab", "bc")
|
17
|
-
a.match("abc").to_set.must_equal ["ab", "bc"].to_set
|
18
|
-
end
|
19
|
-
|
20
|
-
it "does not return unmatched substrings" do
|
21
|
-
a = AhoCorasick.new("ab")
|
22
|
-
a.match("abc").wont_include("bc")
|
23
|
-
end
|
24
|
-
|
25
|
-
it "matches adjacent terms" do
|
26
|
-
a = AhoCorasick.new("ab", "cd")
|
27
|
-
a.match("abcd").to_set.must_equal ["ab", "cd"].to_set
|
28
|
-
end
|
29
|
-
|
30
|
-
it "matches terms directly following a partial match" do
|
31
|
-
a = AhoCorasick.new("abc", "de")
|
32
|
-
a.match("ade").must_include("de")
|
33
|
-
end
|
34
|
-
|
35
|
-
it "returns terms added to the matcher after instantiation" do
|
36
|
-
a = AhoCorasick.new("ab")
|
37
|
-
a.insert("cd", "ef")
|
38
|
-
a.match("ab12cd12ef").to_set.must_equal ["ab", "cd", "ef"].to_set
|
39
|
-
end
|
40
|
-
|
41
|
-
describe "benchmarks" do
|
42
|
-
|
43
|
-
before do
|
44
|
-
words = 1000.times.map { rand(6).times.inject("") {|s,_| s + (65 + rand(26)).chr}}
|
45
|
-
@matcher = AhoCorasick.new(*words)
|
46
|
-
end
|
47
|
-
|
48
|
-
bench_performance_linear "string matching" do |n|
|
49
|
-
10.times do
|
50
|
-
string = n.times.inject("") {|s, _| s + (65 + rand(26)).chr }
|
51
|
-
@matcher.match(string)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
end
|