aho_corasick_matcher 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -6
- data/lib/aho_corasick_matcher.rb +16 -12
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 22835eda3c317e6993083b400e28fd83c926ff33
|
4
|
+
data.tar.gz: e4cd6c29e23087dce59d4eb3989f1e9fcdeeb09a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bbd1e3bcf0709c2e55bd731d4fec5d8928773bd7b0d3b8fa45601f96d056970e1470386de8c85b687ea3572142814c2440752cc1fa4c1f956980e946ba2c79f0
|
7
|
+
data.tar.gz: de17ee446c5dff842e1eb299950eeae7e1200396e137df97b1a0b54d05e894173335074e6f26f960af4f2e11193034db652e9dd0c4ab7262c7003b06339ac2a7
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# Aho-Corasick Matcher [![Build Status](https://travis-ci.org/altmetric/aho_corasick_matcher.svg?branch=master)](https://travis-ci.org/altmetric/aho_corasick_matcher)
|
2
2
|
|
3
3
|
A Ruby gem for finding strings in text using the [Aho-Corasick string matching search](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.96.4671&rep=rep1&type=pdf).
|
4
4
|
|
@@ -10,8 +10,8 @@ increases only linearly.
|
|
10
10
|
It's quite memory-intensive, and building a matcher is expensive – but once it's
|
11
11
|
been built, matching terms is very fast.
|
12
12
|
|
13
|
-
**Current version:** 1.0.
|
14
|
-
**Supported Ruby versions:** 1.9.2, 1.9.3, 2.0, 2.1, 2.2, jruby-1.7, rbx-2.
|
13
|
+
**Current version:** 1.0.1
|
14
|
+
**Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0, 2.1, 2.2, jruby-1.7, rbx-2.5
|
15
15
|
|
16
16
|
## Usage
|
17
17
|
|
@@ -22,14 +22,14 @@ matcher = AhoCorasickMatcher.new(['a', 'b', 'ab'])
|
|
22
22
|
matcher.match('aba')
|
23
23
|
#=> ['a', 'ab', 'b', 'a']
|
24
24
|
|
25
|
-
matcher = AhoCorasickMatcher.new([
|
26
|
-
matcher.match(
|
25
|
+
matcher = AhoCorasickMatcher.new(['thistle', 'sift', 'thistles'])
|
26
|
+
matcher.match('Theophilus thistle, the successful thistle sifter, in sifting a sieve full of un-sifted thistles, thrust three thousand thistles through the thick of his thumb.')
|
27
27
|
#=> ["thistle", "thistle", "sift", "sift", "sift", "thistle", "thistles", "thistle", "thistles"]
|
28
28
|
```
|
29
29
|
|
30
30
|
## Thanks
|
31
31
|
|
32
|
-
Loosely based on Tim Cowlishaw's implementation of the same algorithm
|
32
|
+
Loosely based on [Tim Cowlishaw's implementation of the same algorithm](https://github.com/timcowlishaw/aho_corasick).
|
33
33
|
|
34
34
|
## License
|
35
35
|
|
data/lib/aho_corasick_matcher.rb
CHANGED
@@ -12,13 +12,16 @@ class AhoCorasickMatcher
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def match(string)
|
15
|
-
[]
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
15
|
+
matches = []
|
16
|
+
string.each_char.reduce(root) do |node, char|
|
17
|
+
child = (node || root).search(char.intern)
|
18
|
+
next unless child
|
19
|
+
|
20
|
+
matches.push(*child.matches)
|
21
|
+
child
|
21
22
|
end
|
23
|
+
|
24
|
+
matches
|
22
25
|
end
|
23
26
|
|
24
27
|
private
|
@@ -47,8 +50,8 @@ class AhoCorasickMatcher
|
|
47
50
|
end
|
48
51
|
|
49
52
|
class Node
|
50
|
-
attr_reader :matches, :child_map, :
|
51
|
-
|
53
|
+
attr_reader :matches, :child_map, :parent
|
54
|
+
attr_accessor :suffix
|
52
55
|
|
53
56
|
def initialize(parent = nil)
|
54
57
|
@matches = []
|
@@ -57,19 +60,19 @@ class AhoCorasickMatcher
|
|
57
60
|
end
|
58
61
|
|
59
62
|
def search(char)
|
60
|
-
|
63
|
+
child_map[char] || (suffix && suffix.search(char))
|
61
64
|
end
|
62
65
|
|
63
66
|
def child_or_create(char)
|
64
|
-
|
67
|
+
child_map[char] ||= self.class.new(self)
|
65
68
|
end
|
66
69
|
|
67
70
|
def children
|
68
|
-
|
71
|
+
child_map.values
|
69
72
|
end
|
70
73
|
|
71
74
|
def root?
|
72
|
-
parent
|
75
|
+
!parent
|
73
76
|
end
|
74
77
|
|
75
78
|
def build_child_suffixes
|
@@ -89,6 +92,7 @@ class AhoCorasickMatcher
|
|
89
92
|
def find_failure_node(char)
|
90
93
|
failure = suffix
|
91
94
|
failure = failure.suffix until failure.search(char) || failure.root?
|
95
|
+
|
92
96
|
failure
|
93
97
|
end
|
94
98
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aho_corasick_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matthew MacLeod
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -64,3 +64,4 @@ summary: A library to search text for occurrences of a list of strings
|
|
64
64
|
test_files:
|
65
65
|
- spec/aho_corasick_matcher_spec.rb
|
66
66
|
- spec/spec_helper.rb
|
67
|
+
has_rdoc:
|