aurelian-ruby-ahocorasick 0.6.1 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +1 -1
- data/ext/ahocorasick/ruby-ahocorasick.c +30 -21
- data/lib/ahocorasick.rb +19 -1
- metadata +1 -1
data/MIT-LICENSE
CHANGED
@@ -11,7 +11,6 @@
|
|
11
11
|
//
|
12
12
|
// * kwt.find_each ("str") {|r| .. }
|
13
13
|
// * kwt.find_first("str")
|
14
|
-
// * kwt.find_all ("str")
|
15
14
|
//
|
16
15
|
// TODO: change last_id and dictionary_size to long
|
17
16
|
//
|
@@ -40,11 +39,21 @@ struct kwt_struct_data {
|
|
40
39
|
// ResultFilter interface
|
41
40
|
//
|
42
41
|
|
42
|
+
/*
|
43
|
+
* call-seq: initialize
|
44
|
+
*
|
45
|
+
* Does nothing.
|
46
|
+
*/
|
43
47
|
static VALUE
|
44
48
|
rb_rf_init(VALUE self) {
|
45
49
|
return self;
|
46
50
|
}
|
47
51
|
|
52
|
+
/*
|
53
|
+
* call-seq: valid?(result, string)
|
54
|
+
*
|
55
|
+
* Only defines the signature for this method.
|
56
|
+
*/
|
48
57
|
static VALUE
|
49
58
|
rb_rf_valid(int argc, VALUE *argv, VALUE self) {
|
50
59
|
VALUE result;
|
@@ -85,7 +94,6 @@ rb_kwt_init(VALUE self)
|
|
85
94
|
}
|
86
95
|
|
87
96
|
/*
|
88
|
-
* Document-method: make
|
89
97
|
* call-seq: make
|
90
98
|
*
|
91
99
|
* It freezes the current KeywordTree.
|
@@ -118,16 +126,15 @@ rb_kwt_make(VALUE self)
|
|
118
126
|
}
|
119
127
|
|
120
128
|
/*
|
121
|
-
*
|
122
|
-
* call-seq: find_all
|
129
|
+
* call-seq: find_all(string)
|
123
130
|
*
|
124
131
|
* Search the current tree.
|
125
132
|
*
|
126
133
|
* It returns an array on hashes, e.g.
|
127
134
|
*
|
128
|
-
* [ { :id => int, :value =>
|
135
|
+
* [ { :id => int, :value => string, :starts_at => int, :ends_at => int}, { ... } ]
|
129
136
|
*
|
130
|
-
*
|
137
|
+
* Or an empty array if it did not find anything.
|
131
138
|
*
|
132
139
|
* # assuming a valid KeywordTree kwt object:
|
133
140
|
* kwt.add_string("one")
|
@@ -193,7 +200,6 @@ rb_kwt_find_all(int argc, VALUE *argv, VALUE self)
|
|
193
200
|
}
|
194
201
|
|
195
202
|
/*
|
196
|
-
* Document-method: size
|
197
203
|
* call-seq: size
|
198
204
|
*
|
199
205
|
* Returns the size of this KeywordTree
|
@@ -213,8 +219,7 @@ rb_kwt_size(VALUE self)
|
|
213
219
|
}
|
214
220
|
|
215
221
|
/*
|
216
|
-
*
|
217
|
-
* call-seq: add_string
|
222
|
+
* call-seq: add_string(string, id= nil)
|
218
223
|
*
|
219
224
|
* Adds a sequence to this KeywordTree.
|
220
225
|
*
|
@@ -238,13 +243,11 @@ rb_kwt_add_string(int argc, VALUE *argv, VALUE self)
|
|
238
243
|
{
|
239
244
|
VALUE v_string, v_id;
|
240
245
|
struct kwt_struct_data *kwt_data;
|
241
|
-
// char * string;
|
242
246
|
int id;
|
243
247
|
|
244
248
|
rb_scan_args(argc, argv, "11", &v_string, &v_id);
|
245
249
|
|
246
250
|
Check_Type(v_string, T_STRING);
|
247
|
-
// string= StringValuePtr(v_string);
|
248
251
|
KeywordTree(self, kwt_data);
|
249
252
|
|
250
253
|
if(kwt_data->is_frozen == 1)
|
@@ -267,6 +270,14 @@ rb_kwt_add_string(int argc, VALUE *argv, VALUE self)
|
|
267
270
|
return INT2FIX(id);
|
268
271
|
}
|
269
272
|
|
273
|
+
/*
|
274
|
+
* call-seq: filter=(AhoCorasick::ResultFilter)
|
275
|
+
*
|
276
|
+
* Attach a <tt>filter</tt> to this KeywordTree.
|
277
|
+
*
|
278
|
+
* A <tt>filter</tt> should extend AhoCorasick::ResultFilter and implement <tt>valid?</tt> method.
|
279
|
+
*
|
280
|
+
*/
|
270
281
|
static VALUE
|
271
282
|
rb_kwt_set_filter(int argc, VALUE *argv, VALUE self) {
|
272
283
|
struct kwt_struct_data *kwt_data;
|
@@ -283,6 +294,12 @@ rb_kwt_set_filter(int argc, VALUE *argv, VALUE self) {
|
|
283
294
|
return filter;
|
284
295
|
}
|
285
296
|
|
297
|
+
/*
|
298
|
+
* call-seq: filter
|
299
|
+
*
|
300
|
+
* It gets the <tt>filter</tt>. D'oh.
|
301
|
+
*
|
302
|
+
*/
|
286
303
|
static VALUE
|
287
304
|
rb_kwt_get_filter(VALUE self) {
|
288
305
|
VALUE filter;
|
@@ -294,17 +311,9 @@ rb_kwt_get_filter(VALUE self) {
|
|
294
311
|
}
|
295
312
|
|
296
313
|
/*
|
297
|
-
* call-seq:
|
314
|
+
* call-seq: _from_file
|
298
315
|
*
|
299
|
-
*
|
300
|
-
*
|
301
|
-
* % cat dict0.txt
|
302
|
-
* foo
|
303
|
-
* bar
|
304
|
-
* base
|
305
|
-
*
|
306
|
-
* k= AhoCorasick::KeywordTree.from_file "dict0.txt"
|
307
|
-
* k.search("basement").size # => 1
|
316
|
+
* ==== Note: It's not safe to use this method, but rather from_file.
|
308
317
|
*
|
309
318
|
*/
|
310
319
|
static VALUE
|
data/lib/ahocorasick.rb
CHANGED
@@ -2,15 +2,33 @@
|
|
2
2
|
require 'ahocorasick/native'
|
3
3
|
|
4
4
|
module AhoCorasick
|
5
|
-
VERSION='0.6.
|
5
|
+
VERSION='0.6.2'
|
6
6
|
|
7
7
|
class KeywordTree
|
8
8
|
|
9
|
+
#
|
10
|
+
# Loads the contents of file into the KeywordTree
|
11
|
+
#
|
12
|
+
# k= AhoCorasick::KeywordTree.new
|
13
|
+
# k.from_file "dictionary.txt"
|
14
|
+
#
|
15
|
+
#
|
9
16
|
def from_file file
|
10
17
|
File.read(file).each { | string | self.add_string string }
|
11
18
|
self
|
12
19
|
end
|
13
20
|
|
21
|
+
#
|
22
|
+
# Creates a new KeywordTree and loads the dictionary from a file
|
23
|
+
#
|
24
|
+
# % cat dict0.txt
|
25
|
+
# foo
|
26
|
+
# bar
|
27
|
+
# base
|
28
|
+
#
|
29
|
+
# k= AhoCorasick::KeywordTree.from_file "dict0.txt"
|
30
|
+
# k.find_all("basement").size # => 1
|
31
|
+
#
|
14
32
|
def self.from_file filename
|
15
33
|
self._from_file filename
|
16
34
|
end
|