aho_corasick 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 9f6e421348a6325c2d72bfa6ea3719a160c00c8f
4
+ data.tar.gz: 4beff5dc400588ce17788d05ac73ac6feec2d35d
5
+ SHA512:
6
+ metadata.gz: bfd393d096029c9b352bca77c21dc7dec3423be0245c78724452dba61f626083bde2477a36a8ea06e02d317611d09440802adfb3fe1a37e3fd0bdd2c3a1e9704
7
+ data.tar.gz: de26ede37c3fcde55786743316251669c3f5a0a9894f4be324c9d8b221f9d80972b072bee474297df301ab246062955324875f85cc5d6f6ea3d788340855d97b
@@ -1,5 +1,6 @@
1
1
  class AhoCorasick
2
- def initialize(*terms)
2
+ def initialize(*args)
3
+ terms = terms_for(args)
3
4
  @root = TreeNode.new
4
5
  unsafe_insert(terms)
5
6
  create_suffix_links
@@ -15,13 +16,22 @@ class AhoCorasick
15
16
  return matches
16
17
  end
17
18
 
18
- def insert(*terms)
19
+ def insert(*args)
20
+ terms = terms_for(args)
19
21
  unsafe_insert(terms)
20
22
  create_suffix_links
21
23
  end
22
24
 
23
25
  private
24
26
 
27
+ def terms_for(args)
28
+ if args.length == 1 && args[0].is_a?(Array)
29
+ args[0]
30
+ else
31
+ args
32
+ end
33
+ end
34
+
25
35
  def unsafe_insert(terms)
26
36
  terms.each do |t|
27
37
  t.each_char.inject(@root) {|node, char| node.child_for(char.to_sym) }.add_match(t)
@@ -65,3 +75,4 @@ class AhoCorasick
65
75
 
66
76
  end
67
77
  end
78
+ require 'aho_corasick/version'
@@ -0,0 +1,3 @@
1
+ class AhoCorasick
2
+ VERSION="0.1.0"
3
+ end
metadata CHANGED
@@ -1,71 +1,62 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aho_corasick
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
5
- prerelease:
4
+ version: 0.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Tim Cowlishaw
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2011-11-18 00:00:00.000000000Z
11
+ date: 2014-10-02 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rspec
16
- requirement: &12269020 !ruby/object:Gem::Requirement
17
- none: false
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
- - - ! '>='
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
- version: '0'
19
+ version: '3.1'
22
20
  type: :development
23
21
  prerelease: false
24
- version_requirements: *12269020
25
- description:
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.1'
27
+ description: An algorithm that allows searching for members of a known set of strings
28
+ appearing as substrings of a larger string in time linear to both the size of the
29
+ string and the size of the set
26
30
  email: tim@timcowlishaw.co.uk
27
31
  executables: []
28
32
  extensions: []
29
- extra_rdoc_files:
30
- - README.md
33
+ extra_rdoc_files: []
31
34
  files:
32
- - Gemfile
33
- - Gemfile.lock
34
- - Rakefile
35
35
  - README.md
36
- - spec/aho_corasick_spec.rb
37
- - spec/spec_helper.rb
38
36
  - lib/aho_corasick.rb
39
- homepage: http://github.com/likely/aho_corasick
40
- licenses: []
37
+ - lib/aho_corasick/version.rb
38
+ homepage: http://github.com/timcowlishaw/aho_corasick
39
+ licenses:
40
+ - MIT
41
+ metadata: {}
41
42
  post_install_message:
42
- rdoc_options:
43
- - --main
44
- - README.md
43
+ rdoc_options: []
45
44
  require_paths:
46
45
  - lib
47
46
  required_ruby_version: !ruby/object:Gem::Requirement
48
- none: false
49
47
  requirements:
50
- - - ! '>='
48
+ - - ">="
51
49
  - !ruby/object:Gem::Version
52
50
  version: '0'
53
- segments:
54
- - 0
55
- hash: 3276065860397495026
56
51
  required_rubygems_version: !ruby/object:Gem::Requirement
57
- none: false
58
52
  requirements:
59
- - - ! '>='
53
+ - - ">="
60
54
  - !ruby/object:Gem::Version
61
55
  version: '0'
62
- segments:
63
- - 0
64
- hash: 3276065860397495026
65
56
  requirements: []
66
57
  rubyforge_project:
67
- rubygems_version: 1.8.10
58
+ rubygems_version: 2.2.2
68
59
  signing_key:
69
- specification_version: 3
60
+ specification_version: 4
70
61
  summary: The Aho-Corasick string-matching algorithm
71
62
  test_files: []
data/Gemfile DELETED
@@ -1,8 +0,0 @@
1
- source "http://rubygems.org"
2
-
3
- group :development, :test do
4
- gem "gem-this"
5
- gem "rake"
6
- gem "minitest"
7
- #gem "rspec"
8
- end
@@ -1,14 +0,0 @@
1
- GEM
2
- remote: http://rubygems.org/
3
- specs:
4
- gem-this (0.3.7)
5
- minitest (2.6.2)
6
- rake (0.9.2)
7
-
8
- PLATFORMS
9
- ruby
10
-
11
- DEPENDENCIES
12
- gem-this
13
- minitest
14
- rake
data/Rakefile DELETED
@@ -1,79 +0,0 @@
1
- require 'bundler/setup'
2
- require "rubygems/package_task"
3
- require "rdoc/task"
4
- require 'rake/testtask'
5
-
6
- task :default => :test
7
- task :spec => :test
8
-
9
- Rake::TestTask.new do |t|
10
- t.pattern = "spec/*_spec.rb"
11
- end
12
-
13
- # This builds the actual gem. For details of what all these options
14
- # mean, and other ones you can add, check the documentation here:
15
- #
16
- # http://rubygems.org/read/chapter/20
17
- #
18
- spec = Gem::Specification.new do |s|
19
-
20
- # Change these as appropriate
21
- s.name = "aho_corasick"
22
- s.version = "0.0.2"
23
- s.summary = "The Aho-Corasick string-matching algorithm"
24
- s.author = "Tim Cowlishaw"
25
- s.email = "tim@timcowlishaw.co.uk"
26
- s.homepage = "http://github.com/likely/aho_corasick"
27
-
28
- s.has_rdoc = true
29
- s.extra_rdoc_files = %w(README.md)
30
- s.rdoc_options = %w(--main README.md)
31
-
32
- # Add any extra files to include in the gem
33
- s.files = %w(Gemfile Gemfile.lock Rakefile README.md Gemfile) + Dir.glob("{spec,lib}/**/*")
34
- s.require_paths = ["lib"]
35
-
36
- # If you want to depend on other gems, add them here, along with any
37
- # relevant versions
38
- #s.add_dependency("eventmachine")
39
-
40
- # If your tests use any gems, include them here
41
- s.add_development_dependency("rspec")
42
- end
43
-
44
- # This task actually builds the gem. We also regenerate a static
45
- # .gemspec file, which is useful if something (i.e. GitHub) will
46
- # be automatically building a gem for this project. If you're not
47
- # using GitHub, edit as appropriate.
48
- #
49
- # To publish your gem online, install the 'gemcutter' gem; Read more
50
- # about that here: http://gemcutter.org/pages/gem_docs
51
- Gem::PackageTask.new(spec) do |pkg|
52
- pkg.gem_spec = spec
53
- end
54
-
55
- desc "Build the gemspec file #{spec.name}.gemspec"
56
- task :gemspec do
57
- file = File.dirname(__FILE__) + "/#{spec.name}.gemspec"
58
- File.open(file, "w") {|f| f << spec.to_ruby }
59
- end
60
-
61
- # If you don't want to generate the .gemspec file, just remove this line. Reasons
62
- # why you might want to generate a gemspec:
63
- # - using bundler with a git source
64
- # - building the gem without rake (i.e. gem build blah.gemspec)
65
- # - maybe others?
66
- task :package => :gemspec
67
-
68
- # Generate documentation
69
- RDoc::Task.new do |rd|
70
- rd.main = "README.markdown"
71
- rd.rdoc_files.include("README.markdown", "lib/**/*.rb")
72
- rd.rdoc_dir = "rdoc"
73
- end
74
-
75
- desc 'Clear out RDoc and generated packages'
76
- task :clean => [:clobber_rdoc, :clobber_package] do
77
- rm "#{spec.name}.gemspec"
78
- end
79
-
@@ -1,56 +0,0 @@
1
- $: << File.dirname(__FILE__)
2
- require 'spec_helper'
3
-
4
- describe "AhoCorasick" do
5
- it "returns matched substrings" do
6
- a = AhoCorasick.new("ab")
7
- a.match("abcde").must_include("ab")
8
- end
9
-
10
- it "returns multiple matched substrings" do
11
- a = AhoCorasick.new("ab", "cd")
12
- a.match("cd123ab").to_set.must_equal ["ab", "cd"].to_set
13
- end
14
-
15
- it "returns overlapping matched substrings" do
16
- a = AhoCorasick.new("ab", "bc")
17
- a.match("abc").to_set.must_equal ["ab", "bc"].to_set
18
- end
19
-
20
- it "does not return unmatched substrings" do
21
- a = AhoCorasick.new("ab")
22
- a.match("abc").wont_include("bc")
23
- end
24
-
25
- it "matches adjacent terms" do
26
- a = AhoCorasick.new("ab", "cd")
27
- a.match("abcd").to_set.must_equal ["ab", "cd"].to_set
28
- end
29
-
30
- it "matches terms directly following a partial match" do
31
- a = AhoCorasick.new("abc", "de")
32
- a.match("ade").must_include("de")
33
- end
34
-
35
- it "returns terms added to the matcher after instantiation" do
36
- a = AhoCorasick.new("ab")
37
- a.insert("cd", "ef")
38
- a.match("ab12cd12ef").to_set.must_equal ["ab", "cd", "ef"].to_set
39
- end
40
-
41
- describe "benchmarks" do
42
-
43
- before do
44
- words = 1000.times.map { rand(6).times.inject("") {|s,_| s + (65 + rand(26)).chr}}
45
- @matcher = AhoCorasick.new(*words)
46
- end
47
-
48
- bench_performance_linear "string matching" do |n|
49
- 10.times do
50
- string = n.times.inject("") {|s, _| s + (65 + rand(26)).chr }
51
- @matcher.match(string)
52
- end
53
- end
54
- end
55
-
56
- end
@@ -1,8 +0,0 @@
1
- $: << File.join(File.dirname(__FILE__),"..")
2
- require 'rubygems'
3
- require 'bundler/setup'
4
- require 'lib/aho_corasick'
5
- require 'minitest/spec'
6
- require 'minitest/benchmark'
7
- require 'minitest/autorun'
8
-