aho_corasick 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9f6e421348a6325c2d72bfa6ea3719a160c00c8f
4
+ data.tar.gz: 4beff5dc400588ce17788d05ac73ac6feec2d35d
5
+ SHA512:
6
+ metadata.gz: bfd393d096029c9b352bca77c21dc7dec3423be0245c78724452dba61f626083bde2477a36a8ea06e02d317611d09440802adfb3fe1a37e3fd0bdd2c3a1e9704
7
+ data.tar.gz: de26ede37c3fcde55786743316251669c3f5a0a9894f4be324c9d8b221f9d80972b072bee474297df301ab246062955324875f85cc5d6f6ea3d788340855d97b
@@ -1,5 +1,6 @@
1
1
  class AhoCorasick
2
- def initialize(*terms)
2
+ def initialize(*args)
3
+ terms = terms_for(args)
3
4
  @root = TreeNode.new
4
5
  unsafe_insert(terms)
5
6
  create_suffix_links
@@ -15,13 +16,22 @@ class AhoCorasick
15
16
  return matches
16
17
  end
17
18
 
18
- def insert(*terms)
19
+ def insert(*args)
20
+ terms = terms_for(args)
19
21
  unsafe_insert(terms)
20
22
  create_suffix_links
21
23
  end
22
24
 
23
25
  private
24
26
 
27
+ def terms_for(args)
28
+ if args.length == 1 && args[0].is_a?(Array)
29
+ args[0]
30
+ else
31
+ args
32
+ end
33
+ end
34
+
25
35
  def unsafe_insert(terms)
26
36
  terms.each do |t|
27
37
  t.each_char.inject(@root) {|node, char| node.child_for(char.to_sym) }.add_match(t)
@@ -65,3 +75,4 @@ class AhoCorasick
65
75
 
66
76
  end
67
77
  end
78
+ require 'aho_corasick/version'
@@ -0,0 +1,3 @@
1
+ class AhoCorasick
2
+ VERSION="0.1.0"
3
+ end
metadata CHANGED
@@ -1,71 +1,62 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aho_corasick
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
5
- prerelease:
4
+ version: 0.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Tim Cowlishaw
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2011-11-18 00:00:00.000000000Z
11
+ date: 2014-10-02 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rspec
16
- requirement: &12269020 !ruby/object:Gem::Requirement
17
- none: false
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
- - - ! '>='
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
- version: '0'
19
+ version: '3.1'
22
20
  type: :development
23
21
  prerelease: false
24
- version_requirements: *12269020
25
- description:
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.1'
27
+ description: An algorithm that allows searching for members of a known set of strings
28
+ appearing as substrings of a larger string in time linear to both the size of the
29
+ string and the size of the set
26
30
  email: tim@timcowlishaw.co.uk
27
31
  executables: []
28
32
  extensions: []
29
- extra_rdoc_files:
30
- - README.md
33
+ extra_rdoc_files: []
31
34
  files:
32
- - Gemfile
33
- - Gemfile.lock
34
- - Rakefile
35
35
  - README.md
36
- - spec/aho_corasick_spec.rb
37
- - spec/spec_helper.rb
38
36
  - lib/aho_corasick.rb
39
- homepage: http://github.com/likely/aho_corasick
40
- licenses: []
37
+ - lib/aho_corasick/version.rb
38
+ homepage: http://github.com/timcowlishaw/aho_corasick
39
+ licenses:
40
+ - MIT
41
+ metadata: {}
41
42
  post_install_message:
42
- rdoc_options:
43
- - --main
44
- - README.md
43
+ rdoc_options: []
45
44
  require_paths:
46
45
  - lib
47
46
  required_ruby_version: !ruby/object:Gem::Requirement
48
- none: false
49
47
  requirements:
50
- - - ! '>='
48
+ - - ">="
51
49
  - !ruby/object:Gem::Version
52
50
  version: '0'
53
- segments:
54
- - 0
55
- hash: 3276065860397495026
56
51
  required_rubygems_version: !ruby/object:Gem::Requirement
57
- none: false
58
52
  requirements:
59
- - - ! '>='
53
+ - - ">="
60
54
  - !ruby/object:Gem::Version
61
55
  version: '0'
62
- segments:
63
- - 0
64
- hash: 3276065860397495026
65
56
  requirements: []
66
57
  rubyforge_project:
67
- rubygems_version: 1.8.10
58
+ rubygems_version: 2.2.2
68
59
  signing_key:
69
- specification_version: 3
60
+ specification_version: 4
70
61
  summary: The Aho-Corasick string-matching algorithm
71
62
  test_files: []
data/Gemfile DELETED
@@ -1,8 +0,0 @@
1
- source "http://rubygems.org"
2
-
3
- group :development, :test do
4
- gem "gem-this"
5
- gem "rake"
6
- gem "minitest"
7
- #gem "rspec"
8
- end
@@ -1,14 +0,0 @@
1
- GEM
2
- remote: http://rubygems.org/
3
- specs:
4
- gem-this (0.3.7)
5
- minitest (2.6.2)
6
- rake (0.9.2)
7
-
8
- PLATFORMS
9
- ruby
10
-
11
- DEPENDENCIES
12
- gem-this
13
- minitest
14
- rake
data/Rakefile DELETED
@@ -1,79 +0,0 @@
1
- require 'bundler/setup'
2
- require "rubygems/package_task"
3
- require "rdoc/task"
4
- require 'rake/testtask'
5
-
6
- task :default => :test
7
- task :spec => :test
8
-
9
- Rake::TestTask.new do |t|
10
- t.pattern = "spec/*_spec.rb"
11
- end
12
-
13
- # This builds the actual gem. For details of what all these options
14
- # mean, and other ones you can add, check the documentation here:
15
- #
16
- # http://rubygems.org/read/chapter/20
17
- #
18
- spec = Gem::Specification.new do |s|
19
-
20
- # Change these as appropriate
21
- s.name = "aho_corasick"
22
- s.version = "0.0.2"
23
- s.summary = "The Aho-Corasick string-matching algorithm"
24
- s.author = "Tim Cowlishaw"
25
- s.email = "tim@timcowlishaw.co.uk"
26
- s.homepage = "http://github.com/likely/aho_corasick"
27
-
28
- s.has_rdoc = true
29
- s.extra_rdoc_files = %w(README.md)
30
- s.rdoc_options = %w(--main README.md)
31
-
32
- # Add any extra files to include in the gem
33
- s.files = %w(Gemfile Gemfile.lock Rakefile README.md Gemfile) + Dir.glob("{spec,lib}/**/*")
34
- s.require_paths = ["lib"]
35
-
36
- # If you want to depend on other gems, add them here, along with any
37
- # relevant versions
38
- #s.add_dependency("eventmachine")
39
-
40
- # If your tests use any gems, include them here
41
- s.add_development_dependency("rspec")
42
- end
43
-
44
- # This task actually builds the gem. We also regenerate a static
45
- # .gemspec file, which is useful if something (i.e. GitHub) will
46
- # be automatically building a gem for this project. If you're not
47
- # using GitHub, edit as appropriate.
48
- #
49
- # To publish your gem online, install the 'gemcutter' gem; Read more
50
- # about that here: http://gemcutter.org/pages/gem_docs
51
- Gem::PackageTask.new(spec) do |pkg|
52
- pkg.gem_spec = spec
53
- end
54
-
55
- desc "Build the gemspec file #{spec.name}.gemspec"
56
- task :gemspec do
57
- file = File.dirname(__FILE__) + "/#{spec.name}.gemspec"
58
- File.open(file, "w") {|f| f << spec.to_ruby }
59
- end
60
-
61
- # If you don't want to generate the .gemspec file, just remove this line. Reasons
62
- # why you might want to generate a gemspec:
63
- # - using bundler with a git source
64
- # - building the gem without rake (i.e. gem build blah.gemspec)
65
- # - maybe others?
66
- task :package => :gemspec
67
-
68
- # Generate documentation
69
- RDoc::Task.new do |rd|
70
- rd.main = "README.markdown"
71
- rd.rdoc_files.include("README.markdown", "lib/**/*.rb")
72
- rd.rdoc_dir = "rdoc"
73
- end
74
-
75
- desc 'Clear out RDoc and generated packages'
76
- task :clean => [:clobber_rdoc, :clobber_package] do
77
- rm "#{spec.name}.gemspec"
78
- end
79
-
@@ -1,56 +0,0 @@
1
- $: << File.dirname(__FILE__)
2
- require 'spec_helper'
3
-
4
- describe "AhoCorasick" do
5
- it "returns matched substrings" do
6
- a = AhoCorasick.new("ab")
7
- a.match("abcde").must_include("ab")
8
- end
9
-
10
- it "returns multiple matched substrings" do
11
- a = AhoCorasick.new("ab", "cd")
12
- a.match("cd123ab").to_set.must_equal ["ab", "cd"].to_set
13
- end
14
-
15
- it "returns overlapping matched substrings" do
16
- a = AhoCorasick.new("ab", "bc")
17
- a.match("abc").to_set.must_equal ["ab", "bc"].to_set
18
- end
19
-
20
- it "does not return unmatched substrings" do
21
- a = AhoCorasick.new("ab")
22
- a.match("abc").wont_include("bc")
23
- end
24
-
25
- it "matches adjacent terms" do
26
- a = AhoCorasick.new("ab", "cd")
27
- a.match("abcd").to_set.must_equal ["ab", "cd"].to_set
28
- end
29
-
30
- it "matches terms directly following a partial match" do
31
- a = AhoCorasick.new("abc", "de")
32
- a.match("ade").must_include("de")
33
- end
34
-
35
- it "returns terms added to the matcher after instantiation" do
36
- a = AhoCorasick.new("ab")
37
- a.insert("cd", "ef")
38
- a.match("ab12cd12ef").to_set.must_equal ["ab", "cd", "ef"].to_set
39
- end
40
-
41
- describe "benchmarks" do
42
-
43
- before do
44
- words = 1000.times.map { rand(6).times.inject("") {|s,_| s + (65 + rand(26)).chr}}
45
- @matcher = AhoCorasick.new(*words)
46
- end
47
-
48
- bench_performance_linear "string matching" do |n|
49
- 10.times do
50
- string = n.times.inject("") {|s, _| s + (65 + rand(26)).chr }
51
- @matcher.match(string)
52
- end
53
- end
54
- end
55
-
56
- end
@@ -1,8 +0,0 @@
1
- $: << File.join(File.dirname(__FILE__),"..")
2
- require 'rubygems'
3
- require 'bundler/setup'
4
- require 'lib/aho_corasick'
5
- require 'minitest/spec'
6
- require 'minitest/benchmark'
7
- require 'minitest/autorun'
8
-