aho_corasick_matcher 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -6
- data/lib/aho_corasick_matcher.rb +16 -12
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 22835eda3c317e6993083b400e28fd83c926ff33
|
4
|
+
data.tar.gz: e4cd6c29e23087dce59d4eb3989f1e9fcdeeb09a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bbd1e3bcf0709c2e55bd731d4fec5d8928773bd7b0d3b8fa45601f96d056970e1470386de8c85b687ea3572142814c2440752cc1fa4c1f956980e946ba2c79f0
|
7
|
+
data.tar.gz: de17ee446c5dff842e1eb299950eeae7e1200396e137df97b1a0b54d05e894173335074e6f26f960af4f2e11193034db652e9dd0c4ab7262c7003b06339ac2a7
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# Aho-Corasick Matcher [](https://travis-ci.org/altmetric/aho_corasick_matcher)
|
2
2
|
|
3
3
|
A Ruby gem for finding strings in text using the [Aho-Corasick string matching search](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.96.4671&rep=rep1&type=pdf).
|
4
4
|
|
@@ -10,8 +10,8 @@ increases only linearly.
|
|
10
10
|
It's quite memory-intensive, and building a matcher is expensive – but once it's
|
11
11
|
been built, matching terms is very fast.
|
12
12
|
|
13
|
-
**Current version:** 1.0.
|
14
|
-
**Supported Ruby versions:** 1.9.2, 1.9.3, 2.0, 2.1, 2.2, jruby-1.7, rbx-2.
|
13
|
+
**Current version:** 1.0.1
|
14
|
+
**Supported Ruby versions:** 1.8.7, 1.9.2, 1.9.3, 2.0, 2.1, 2.2, jruby-1.7, rbx-2.5
|
15
15
|
|
16
16
|
## Usage
|
17
17
|
|
@@ -22,14 +22,14 @@ matcher = AhoCorasickMatcher.new(['a', 'b', 'ab'])
|
|
22
22
|
matcher.match('aba')
|
23
23
|
#=> ['a', 'ab', 'b', 'a']
|
24
24
|
|
25
|
-
matcher = AhoCorasickMatcher.new([
|
26
|
-
matcher.match(
|
25
|
+
matcher = AhoCorasickMatcher.new(['thistle', 'sift', 'thistles'])
|
26
|
+
matcher.match('Theophilus thistle, the successful thistle sifter, in sifting a sieve full of un-sifted thistles, thrust three thousand thistles through the thick of his thumb.')
|
27
27
|
#=> ["thistle", "thistle", "sift", "sift", "sift", "thistle", "thistles", "thistle", "thistles"]
|
28
28
|
```
|
29
29
|
|
30
30
|
## Thanks
|
31
31
|
|
32
|
-
Loosely based on Tim Cowlishaw's implementation of the same algorithm
|
32
|
+
Loosely based on [Tim Cowlishaw's implementation of the same algorithm](https://github.com/timcowlishaw/aho_corasick).
|
33
33
|
|
34
34
|
## License
|
35
35
|
|
data/lib/aho_corasick_matcher.rb
CHANGED
@@ -12,13 +12,16 @@ class AhoCorasickMatcher
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def match(string)
|
15
|
-
[]
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
15
|
+
matches = []
|
16
|
+
string.each_char.reduce(root) do |node, char|
|
17
|
+
child = (node || root).search(char.intern)
|
18
|
+
next unless child
|
19
|
+
|
20
|
+
matches.push(*child.matches)
|
21
|
+
child
|
21
22
|
end
|
23
|
+
|
24
|
+
matches
|
22
25
|
end
|
23
26
|
|
24
27
|
private
|
@@ -47,8 +50,8 @@ class AhoCorasickMatcher
|
|
47
50
|
end
|
48
51
|
|
49
52
|
class Node
|
50
|
-
attr_reader :matches, :child_map, :
|
51
|
-
|
53
|
+
attr_reader :matches, :child_map, :parent
|
54
|
+
attr_accessor :suffix
|
52
55
|
|
53
56
|
def initialize(parent = nil)
|
54
57
|
@matches = []
|
@@ -57,19 +60,19 @@ class AhoCorasickMatcher
|
|
57
60
|
end
|
58
61
|
|
59
62
|
def search(char)
|
60
|
-
|
63
|
+
child_map[char] || (suffix && suffix.search(char))
|
61
64
|
end
|
62
65
|
|
63
66
|
def child_or_create(char)
|
64
|
-
|
67
|
+
child_map[char] ||= self.class.new(self)
|
65
68
|
end
|
66
69
|
|
67
70
|
def children
|
68
|
-
|
71
|
+
child_map.values
|
69
72
|
end
|
70
73
|
|
71
74
|
def root?
|
72
|
-
parent
|
75
|
+
!parent
|
73
76
|
end
|
74
77
|
|
75
78
|
def build_child_suffixes
|
@@ -89,6 +92,7 @@ class AhoCorasickMatcher
|
|
89
92
|
def find_failure_node(char)
|
90
93
|
failure = suffix
|
91
94
|
failure = failure.suffix until failure.search(char) || failure.root?
|
95
|
+
|
92
96
|
failure
|
93
97
|
end
|
94
98
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aho_corasick_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matthew MacLeod
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-06-
|
11
|
+
date: 2015-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
@@ -64,3 +64,4 @@ summary: A library to search text for occurrences of a list of strings
|
|
64
64
|
test_files:
|
65
65
|
- spec/aho_corasick_matcher_spec.rb
|
66
66
|
- spec/spec_helper.rb
|
67
|
+
has_rdoc:
|