aurelian-ruby-ahocorasick 0.4.5 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.textile CHANGED
@@ -48,7 +48,11 @@ $ rake install
48
48
 
49
49
  h3. Rubygems - Stable Version
50
50
 
51
- There's no stable version right now.
51
+ Get version 0.4.5 (released on 19 November 2008) from "rubyforge":http://rubyforge.org/frs/?group_id=4024&release_id=28421 :
52
+
53
+ <pre>
54
+ $ gem install ruby-ahocorasick
55
+ </pre>
52
56
 
53
57
 
54
58
  h4. Notes
File without changes
File without changes
@@ -0,0 +1,8 @@
1
+ require "mkmf"
2
+
3
+ $CFLAGS << " -O3 -Wall -Wextra -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
4
+
5
+ dir_config("ahocorasick")
6
+
7
+ create_makefile("ahocorasick/native")
8
+
@@ -132,7 +132,7 @@ static VALUE
132
132
  rb_kwt_find_all(int argc, VALUE *argv, VALUE self)
133
133
  {
134
134
  char * remain; // returned by ac_search, the remaing text to search
135
- int lgt, id, ends_at, starts_at; // filled in by ac_search: the length of the result, the id, and starts_at/ends_at position
135
+ int lgt, id, ends_at; // filled in by ac_search: the length of the result, the id, and starts_at/ends_at position
136
136
  VALUE v_result; // one result, as hash
137
137
  VALUE v_results; // all the results, an array
138
138
 
@@ -143,12 +143,14 @@ rb_kwt_find_all(int argc, VALUE *argv, VALUE self)
143
143
  rb_scan_args(argc, argv, "1", &v_search);
144
144
  // it should be string.
145
145
  Check_Type(v_search, T_STRING);
146
+ v_search= StringValue( v_search );
147
+
146
148
  // get the structure
147
149
  KeywordTree(self, kwt_data);
148
150
  // freeze the tree, if not already
149
151
  if(kwt_data->is_frozen == 0) {
150
152
  if(ac_prep( kwt_data->tree ) == 0)
151
- rb_raise(rb_eRuntimeError, "Cannot freeze the tree");
153
+ rb_raise(rb_eRuntimeError, "Cannot freeze the tree!");
152
154
  kwt_data->is_frozen = 1;
153
155
  }
154
156
  // prepare the return value
@@ -157,15 +159,15 @@ rb_kwt_find_all(int argc, VALUE *argv, VALUE self)
157
159
  if(kwt_data->dictionary_size == 0)
158
160
  return v_results;
159
161
  // prepare the search
160
- ac_search_init(kwt_data->tree, RSTRING( v_search )->ptr, RSTRING( v_search )->len);
162
+ ac_search_init(kwt_data->tree, StringValuePtr(v_search), (int)NUM2INT(rb_funcall(v_search, rb_intern("length"), 0)));
161
163
  // loop trought the results
162
164
  while((remain= ac_search(kwt_data->tree, &lgt, &id, &ends_at)) != NULL) {
163
165
  // this is an individual result as a hash
164
166
  v_result= rb_hash_new();
165
- rb_hash_aset( v_result, sym_id, INT2FIX(id) );
166
- rb_hash_aset( v_result, sym_starts_at, INT2FIX( ends_at - lgt - 1 ) );
167
- rb_hash_aset( v_result, sym_ends_at, INT2FIX( ends_at - 1 ) );
168
- rb_hash_aset( v_result, sym_value, rb_str_new(remain, lgt) );
167
+ rb_hash_aset( v_result, sym_id, INT2NUM( (long)id ) );
168
+ rb_hash_aset( v_result, sym_starts_at, INT2NUM( (long)(ends_at - lgt - 1) ) );
169
+ rb_hash_aset( v_result, sym_ends_at, INT2NUM( (long)(ends_at - 1) ) );
170
+ rb_hash_aset( v_result, sym_value, rb_str_new(remain, (long)lgt) );
169
171
  rb_ary_push( v_results, v_result );
170
172
  }
171
173
  // reopen the tree
@@ -219,30 +221,29 @@ rb_kwt_add_string(int argc, VALUE *argv, VALUE self)
219
221
  {
220
222
  VALUE v_string, v_id;
221
223
  struct kwt_struct_data *kwt_data;
222
- char * string;
224
+ // char * string;
223
225
  int id;
224
226
 
225
227
  rb_scan_args(argc, argv, "11", &v_string, &v_id);
226
228
 
227
229
  Check_Type(v_string, T_STRING);
228
- string= RSTRING(v_string)->ptr;
230
+ // string= StringValuePtr(v_string);
229
231
  KeywordTree(self, kwt_data);
230
232
 
231
233
  if(kwt_data->is_frozen == 1)
232
- rb_raise(rb_eRuntimeError, "Cannot add `%s\" into a frozen tree.", string);
234
+ rb_raise(rb_eRuntimeError, "Cannot add `%s\" into a frozen tree.", StringValuePtr(v_string));
233
235
 
234
236
  if(v_id == Qnil) {
235
237
  id = kwt_data->last_id;
236
238
  } else if(TYPE(v_id) != T_FIXNUM) {
237
- rb_raise(rb_eRuntimeError, "Please use a number from 1 to K as id, or leave nil to auto-generate one. `%s\" given.", RSTRING(v_id)->ptr);
239
+ rb_raise(rb_eRuntimeError, "Please use a number from 1 to K as id, or leave nil to auto-generate one. `%s\" given.", StringValuePtr(v_id));
238
240
  } else if(NUM2INT(v_id) <= 0) {
239
241
  rb_raise(rb_eRuntimeError, "Please use a number from 1 to K as id, or leave nil to auto-generate one. `%d\" given.", NUM2INT(v_id));
240
242
  } else {
241
243
  id= NUM2INT(v_id);
242
244
  }
243
-
244
- if(ac_add_string(kwt_data->tree, string, strlen(string), id) == 0)
245
- rb_raise(rb_eRuntimeError, "Failed to add `%s\", duplicate id `%d\"?", string, id);
245
+ if(ac_add_string(kwt_data->tree, StringValuePtr(v_string), (int)NUM2INT(rb_funcall(v_string, rb_intern("length"), 0)), id) == 0)
246
+ rb_raise(rb_eRuntimeError, "Failed to add `%s\", duplicate id `%d\"?", StringValuePtr(v_string), id);
246
247
 
247
248
  kwt_data->last_id= id + 1;
248
249
  kwt_data->dictionary_size++;
@@ -273,24 +274,23 @@ rb_kwt_new_from_file(int argc, VALUE *argv, VALUE klass)
273
274
 
274
275
  struct kwt_struct_data *kwt_data;
275
276
  char word[1024];
276
- int id;
277
+ int id = 0;
277
278
  VALUE self;
278
- VALUE f_string;
279
+ VALUE filename;
279
280
  FILE *dictionary;
280
281
 
281
- rb_scan_args(argc, argv, "10", &f_string);
282
-
283
- id = 0;
284
- SafeStringValue( f_string );
282
+ rb_scan_args(argc, argv, "10", &filename);
283
+
284
+ SafeStringValue(filename);
285
285
  self= rb_class_new_instance( 0, NULL, klass );
286
286
  KeywordTree( self, kwt_data );
287
287
 
288
- dictionary = fopen( RSTRING( f_string )->ptr, "r" );
288
+ dictionary= fopen( StringValuePtr(filename), "r" );
289
289
  if(dictionary == NULL)
290
- rb_raise(rb_eRuntimeError, "Cannot open `%s\". No such file?", RSTRING(f_string)->ptr);
290
+ rb_raise(rb_eRuntimeError, "Cannot open `%s\". No such file?", StringValuePtr(filename));
291
291
 
292
292
  while(fgets(word, 1024, dictionary) != NULL) {
293
- ac_add_string(kwt_data->tree, word, strlen(word)-1, id++);
293
+ ac_add_string(kwt_data->tree, word, (int)(strlen(word)-1), id++);
294
294
  kwt_data->dictionary_size++;
295
295
  }
296
296
 
@@ -314,7 +314,7 @@ rb_kwt_struct_alloc(VALUE klass)
314
314
  /*
315
315
  * Blump.
316
316
  */
317
- void Init_ahocorasick() {
317
+ void Init_native() {
318
318
  rb_mAhoCorasick = rb_define_module("AhoCorasick");
319
319
  rb_cKeywordTree = rb_define_class_under(rb_mAhoCorasick, "KeywordTree", rb_cObject);
320
320
 
@@ -0,0 +1,7 @@
1
+
2
+ require 'ahocorasick/native'
3
+
4
+ module AhoCorasick
5
+ VERSION='0.5.0'
6
+ end
7
+
@@ -1,5 +1,8 @@
1
- require 'ext/ahocorasick'
1
+ %w(../lib ../ext).each do |path|
2
+ $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), path)))
3
+ end
2
4
 
5
+ require 'ahocorasick'
3
6
  include AhoCorasick
4
7
 
5
8
  describe KeywordTree do
@@ -15,6 +18,51 @@ describe KeywordTree do
15
18
  end
16
19
  end
17
20
 
21
+ describe "not bugs" do
22
+ it "should return 3 results" do
23
+ tree= KeywordTree.new
24
+ tree.add_string "data"
25
+ tree.add_string "database"
26
+ results= tree.find_all "move all the data to a new database"
27
+ results.size.should == 3
28
+ end
29
+ it "should also return 3 results" do
30
+ tree= KeywordTree.new
31
+ tree.add_string "database"
32
+ tree.add_string "data"
33
+ results= tree.find_all "move all the data to a new database"
34
+ results.size.should == 3
35
+ end
36
+ it "should return 2 results" do
37
+ tree= KeywordTree.new
38
+ tree.add_string "base"
39
+ tree.add_string "database"
40
+ results= tree.find_all "move all the data to a new database"
41
+ results.size.should == 2
42
+ end
43
+ it "should also return 2 results" do
44
+ tree= KeywordTree.new
45
+ tree.add_string "database"
46
+ tree.add_string "base"
47
+ results= tree.find_all "move all the data to a new database"
48
+ results.size.should == 2
49
+ end
50
+ it "should return 2 results" do
51
+ tree= KeywordTree.new
52
+ tree.add_string "data"
53
+ results= tree.find_all "move all the data to a new database"
54
+ results.size.should == 2
55
+ end
56
+ it "should return 1 result on duplicates" do
57
+ tree= KeywordTree.new
58
+ tree.add_string "database"
59
+ tree.add_string "database"
60
+ results= tree.find_all "move all the data to a new database"
61
+ results.size.should == 1
62
+ results[0][:id] == 2
63
+ end
64
+ end
65
+
18
66
  describe "How to create a new KeywordTree" do
19
67
  it "should create a new KeywordTree" do
20
68
  KeywordTree.new.class.should == KeywordTree
@@ -192,20 +240,28 @@ describe KeywordTree do
192
240
 
193
241
  describe "Benchmarks. Loading from a file" do
194
242
 
243
+ before(:each) do
244
+ @start= Time.now
245
+ end
246
+
247
+ after(:each) do
248
+ @start=nil
249
+ end
250
+
195
251
  it "should be fast to load a bunch of english words" do
196
- start= Time.now
197
252
  k= KeywordTree.from_file File.dirname(__FILE__) + "/data/en.words"
198
- puts "\n%d words loaded in %s seconds" % [k.size, (Time.now - start)]
199
- (Time.now-start).should < 0.2
253
+ puts "\n%d words loaded in %s seconds" % [k.size, (Time.now - @start)]
254
+ (Time.now-@start).should < 0.2
200
255
  end
201
256
 
202
257
  it "should be fast to find" do
203
- start= Time.now
258
+ # start= Time.now
204
259
  k= KeywordTree.from_file File.dirname(__FILE__) + "/data/en.words"
205
260
  load_time= Time.now
206
261
  results= k.find_all( File.read( File.dirname(__FILE__) + "/data/melville-moby_dick.txt" ) )
207
- puts "\n%d words re-loaded in %s seconds.\nGot %d results in %s seconds" % [k.size, (load_time - start), results.size, (Time.now-load_time)]
262
+ puts "\n%d words re-loaded in %s seconds.\nGot %d results in %s seconds" % [k.size, (load_time - @start), results.size, (Time.now-load_time)]
208
263
  (Time.now-load_time).should < 1.3
264
+ puts results.last.inspect
209
265
  end
210
266
  end
211
267
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aurelian-ruby-ahocorasick
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aurelian Oancea
@@ -18,14 +18,15 @@ email: oancea at gmail dot com
18
18
  executables: []
19
19
 
20
20
  extensions:
21
- - ext/extconf.rb
21
+ - ext/ahocorasick/extconf.rb
22
22
  extra_rdoc_files: []
23
23
 
24
24
  files:
25
- - ext/extconf.rb
26
- - ext/ruby-ahocorasick.c
27
- - ext/ac.h
28
- - ext/ac.c
25
+ - ext/ahocorasick/extconf.rb
26
+ - ext/ahocorasick/ruby-ahocorasick.c
27
+ - ext/ahocorasick/ac.h
28
+ - ext/ahocorasick/ac.c
29
+ - lib/ahocorasick.rb
29
30
  - examples/dict.rb
30
31
  - examples/test.rb
31
32
  - examples/elev.rb
@@ -40,7 +41,7 @@ rdoc_options:
40
41
  - --title
41
42
  - Ruby-AhoCorasick
42
43
  - --inline-source
43
- - ext/ruby-ahocorasick.c
44
+ - ext/ahocorasick/ruby-ahocorasick.c
44
45
  - README.textile
45
46
  - --main
46
47
  - README.textile
data/ext/extconf.rb DELETED
@@ -1,6 +0,0 @@
1
- require "mkmf"
2
-
3
- dir_config("ahocorasick")
4
-
5
- create_makefile("ahocorasick")
6
-