aurelian-ruby-ahocorasick 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/ext/ahocorasick/ruby-ahocorasick.c +30 -21
- data/lib/ahocorasick.rb +19 -1
- metadata +1 -1
data/MIT-LICENSE
CHANGED
@@ -11,7 +11,6 @@
|
|
11
11
|
//
|
12
12
|
// * kwt.find_each ("str") {|r| .. }
|
13
13
|
// * kwt.find_first("str")
|
14
|
-
// * kwt.find_all ("str")
|
15
14
|
//
|
16
15
|
// TODO: change last_id and dictionary_size to long
|
17
16
|
//
|
@@ -40,11 +39,21 @@ struct kwt_struct_data {
|
|
40
39
|
// ResultFilter interface
|
41
40
|
//
|
42
41
|
|
42
|
+
/*
|
43
|
+
* call-seq: initialize
|
44
|
+
*
|
45
|
+
* Does nothing.
|
46
|
+
*/
|
43
47
|
static VALUE
|
44
48
|
rb_rf_init(VALUE self) {
|
45
49
|
return self;
|
46
50
|
}
|
47
51
|
|
52
|
+
/*
|
53
|
+
* call-seq: valid?(result, string)
|
54
|
+
*
|
55
|
+
* Only defines the signature for this method.
|
56
|
+
*/
|
48
57
|
static VALUE
|
49
58
|
rb_rf_valid(int argc, VALUE *argv, VALUE self) {
|
50
59
|
VALUE result;
|
@@ -85,7 +94,6 @@ rb_kwt_init(VALUE self)
|
|
85
94
|
}
|
86
95
|
|
87
96
|
/*
|
88
|
-
* Document-method: make
|
89
97
|
* call-seq: make
|
90
98
|
*
|
91
99
|
* It freezes the current KeywordTree.
|
@@ -118,16 +126,15 @@ rb_kwt_make(VALUE self)
|
|
118
126
|
}
|
119
127
|
|
120
128
|
/*
|
121
|
-
*
|
122
|
-
* call-seq: find_all
|
129
|
+
* call-seq: find_all(string)
|
123
130
|
*
|
124
131
|
* Search the current tree.
|
125
132
|
*
|
126
133
|
* It returns an array on hashes, e.g.
|
127
134
|
*
|
128
|
-
* [ { :id => int, :value =>
|
135
|
+
* [ { :id => int, :value => string, :starts_at => int, :ends_at => int}, { ... } ]
|
129
136
|
*
|
130
|
-
*
|
137
|
+
* Or an empty array if it did not find anything.
|
131
138
|
*
|
132
139
|
* # assuming a valid KeywordTree kwt object:
|
133
140
|
* kwt.add_string("one")
|
@@ -193,7 +200,6 @@ rb_kwt_find_all(int argc, VALUE *argv, VALUE self)
|
|
193
200
|
}
|
194
201
|
|
195
202
|
/*
|
196
|
-
* Document-method: size
|
197
203
|
* call-seq: size
|
198
204
|
*
|
199
205
|
* Returns the size of this KeywordTree
|
@@ -213,8 +219,7 @@ rb_kwt_size(VALUE self)
|
|
213
219
|
}
|
214
220
|
|
215
221
|
/*
|
216
|
-
*
|
217
|
-
* call-seq: add_string
|
222
|
+
* call-seq: add_string(string, id= nil)
|
218
223
|
*
|
219
224
|
* Adds a sequence to this KeywordTree.
|
220
225
|
*
|
@@ -238,13 +243,11 @@ rb_kwt_add_string(int argc, VALUE *argv, VALUE self)
|
|
238
243
|
{
|
239
244
|
VALUE v_string, v_id;
|
240
245
|
struct kwt_struct_data *kwt_data;
|
241
|
-
// char * string;
|
242
246
|
int id;
|
243
247
|
|
244
248
|
rb_scan_args(argc, argv, "11", &v_string, &v_id);
|
245
249
|
|
246
250
|
Check_Type(v_string, T_STRING);
|
247
|
-
// string= StringValuePtr(v_string);
|
248
251
|
KeywordTree(self, kwt_data);
|
249
252
|
|
250
253
|
if(kwt_data->is_frozen == 1)
|
@@ -267,6 +270,14 @@ rb_kwt_add_string(int argc, VALUE *argv, VALUE self)
|
|
267
270
|
return INT2FIX(id);
|
268
271
|
}
|
269
272
|
|
273
|
+
/*
|
274
|
+
* call-seq: filter=(AhoCorasick::ResultFilter)
|
275
|
+
*
|
276
|
+
* Attach a <tt>filter</tt> to this KeywordTree.
|
277
|
+
*
|
278
|
+
* A <tt>filter</tt> should extend AhoCorasick::ResultFilter and implement <tt>valid?</tt> method.
|
279
|
+
*
|
280
|
+
*/
|
270
281
|
static VALUE
|
271
282
|
rb_kwt_set_filter(int argc, VALUE *argv, VALUE self) {
|
272
283
|
struct kwt_struct_data *kwt_data;
|
@@ -283,6 +294,12 @@ rb_kwt_set_filter(int argc, VALUE *argv, VALUE self) {
|
|
283
294
|
return filter;
|
284
295
|
}
|
285
296
|
|
297
|
+
/*
|
298
|
+
* call-seq: filter
|
299
|
+
*
|
300
|
+
* It gets the <tt>filter</tt>. D'oh.
|
301
|
+
*
|
302
|
+
*/
|
286
303
|
static VALUE
|
287
304
|
rb_kwt_get_filter(VALUE self) {
|
288
305
|
VALUE filter;
|
@@ -294,17 +311,9 @@ rb_kwt_get_filter(VALUE self) {
|
|
294
311
|
}
|
295
312
|
|
296
313
|
/*
|
297
|
-
* call-seq:
|
314
|
+
* call-seq: _from_file
|
298
315
|
*
|
299
|
-
*
|
300
|
-
*
|
301
|
-
* % cat dict0.txt
|
302
|
-
* foo
|
303
|
-
* bar
|
304
|
-
* base
|
305
|
-
*
|
306
|
-
* k= AhoCorasick::KeywordTree.from_file "dict0.txt"
|
307
|
-
* k.search("basement").size # => 1
|
316
|
+
* ==== Note: It's not safe to use this method, but rather from_file.
|
308
317
|
*
|
309
318
|
*/
|
310
319
|
static VALUE
|
data/lib/ahocorasick.rb
CHANGED
@@ -2,15 +2,33 @@
|
|
2
2
|
require 'ahocorasick/native'
|
3
3
|
|
4
4
|
module AhoCorasick
|
5
|
-
VERSION='0.6.
|
5
|
+
VERSION='0.6.2'
|
6
6
|
|
7
7
|
class KeywordTree
|
8
8
|
|
9
|
+
#
|
10
|
+
# Loads the contents of file into the KeywordTree
|
11
|
+
#
|
12
|
+
# k= AhoCorasick::KeywordTree.new
|
13
|
+
# k.from_file "dictionary.txt"
|
14
|
+
#
|
15
|
+
#
|
9
16
|
def from_file file
|
10
17
|
File.read(file).each { | string | self.add_string string }
|
11
18
|
self
|
12
19
|
end
|
13
20
|
|
21
|
+
#
|
22
|
+
# Creates a new KeywordTree and loads the dictionary from a file
|
23
|
+
#
|
24
|
+
# % cat dict0.txt
|
25
|
+
# foo
|
26
|
+
# bar
|
27
|
+
# base
|
28
|
+
#
|
29
|
+
# k= AhoCorasick::KeywordTree.from_file "dict0.txt"
|
30
|
+
# k.find_all("basement").size # => 1
|
31
|
+
#
|
14
32
|
def self.from_file filename
|
15
33
|
self._from_file filename
|
16
34
|
end
|