aurelian-ruby-ahocorasick 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.textile CHANGED
@@ -48,7 +48,11 @@ $ rake install
48
48
 
49
49
  h3. Rubygems - Stable Version
50
50
 
51
- There's no stable version right now.
51
+ Get version 0.4.5 (released on 19 November 2008) from "rubyforge":http://rubyforge.org/frs/?group_id=4024&release_id=28421 :
52
+
53
+ <pre>
54
+ $ gem install ruby-ahocorasick
55
+ </pre>
52
56
 
53
57
 
54
58
  h4. Notes
File without changes
File without changes
@@ -0,0 +1,8 @@
1
+ require "mkmf"
2
+
3
+ $CFLAGS << " -O3 -Wall -Wextra -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
4
+
5
+ dir_config("ahocorasick")
6
+
7
+ create_makefile("ahocorasick/native")
8
+
@@ -132,7 +132,7 @@ static VALUE
132
132
  rb_kwt_find_all(int argc, VALUE *argv, VALUE self)
133
133
  {
134
134
  char * remain; // returned by ac_search, the remaing text to search
135
- int lgt, id, ends_at, starts_at; // filled in by ac_search: the length of the result, the id, and starts_at/ends_at position
135
+ int lgt, id, ends_at; // filled in by ac_search: the length of the result, the id, and starts_at/ends_at position
136
136
  VALUE v_result; // one result, as hash
137
137
  VALUE v_results; // all the results, an array
138
138
 
@@ -143,12 +143,14 @@ rb_kwt_find_all(int argc, VALUE *argv, VALUE self)
143
143
  rb_scan_args(argc, argv, "1", &v_search);
144
144
  // it should be string.
145
145
  Check_Type(v_search, T_STRING);
146
+ v_search= StringValue( v_search );
147
+
146
148
  // get the structure
147
149
  KeywordTree(self, kwt_data);
148
150
  // freeze the tree, if not already
149
151
  if(kwt_data->is_frozen == 0) {
150
152
  if(ac_prep( kwt_data->tree ) == 0)
151
- rb_raise(rb_eRuntimeError, "Cannot freeze the tree");
153
+ rb_raise(rb_eRuntimeError, "Cannot freeze the tree!");
152
154
  kwt_data->is_frozen = 1;
153
155
  }
154
156
  // prepare the return value
@@ -157,15 +159,15 @@ rb_kwt_find_all(int argc, VALUE *argv, VALUE self)
157
159
  if(kwt_data->dictionary_size == 0)
158
160
  return v_results;
159
161
  // prepare the search
160
- ac_search_init(kwt_data->tree, RSTRING( v_search )->ptr, RSTRING( v_search )->len);
162
+ ac_search_init(kwt_data->tree, StringValuePtr(v_search), (int)NUM2INT(rb_funcall(v_search, rb_intern("length"), 0)));
161
163
  // loop trought the results
162
164
  while((remain= ac_search(kwt_data->tree, &lgt, &id, &ends_at)) != NULL) {
163
165
  // this is an individual result as a hash
164
166
  v_result= rb_hash_new();
165
- rb_hash_aset( v_result, sym_id, INT2FIX(id) );
166
- rb_hash_aset( v_result, sym_starts_at, INT2FIX( ends_at - lgt - 1 ) );
167
- rb_hash_aset( v_result, sym_ends_at, INT2FIX( ends_at - 1 ) );
168
- rb_hash_aset( v_result, sym_value, rb_str_new(remain, lgt) );
167
+ rb_hash_aset( v_result, sym_id, INT2NUM( (long)id ) );
168
+ rb_hash_aset( v_result, sym_starts_at, INT2NUM( (long)(ends_at - lgt - 1) ) );
169
+ rb_hash_aset( v_result, sym_ends_at, INT2NUM( (long)(ends_at - 1) ) );
170
+ rb_hash_aset( v_result, sym_value, rb_str_new(remain, (long)lgt) );
169
171
  rb_ary_push( v_results, v_result );
170
172
  }
171
173
  // reopen the tree
@@ -219,30 +221,29 @@ rb_kwt_add_string(int argc, VALUE *argv, VALUE self)
219
221
  {
220
222
  VALUE v_string, v_id;
221
223
  struct kwt_struct_data *kwt_data;
222
- char * string;
224
+ // char * string;
223
225
  int id;
224
226
 
225
227
  rb_scan_args(argc, argv, "11", &v_string, &v_id);
226
228
 
227
229
  Check_Type(v_string, T_STRING);
228
- string= RSTRING(v_string)->ptr;
230
+ // string= StringValuePtr(v_string);
229
231
  KeywordTree(self, kwt_data);
230
232
 
231
233
  if(kwt_data->is_frozen == 1)
232
- rb_raise(rb_eRuntimeError, "Cannot add `%s\" into a frozen tree.", string);
234
+ rb_raise(rb_eRuntimeError, "Cannot add `%s\" into a frozen tree.", StringValuePtr(v_string));
233
235
 
234
236
  if(v_id == Qnil) {
235
237
  id = kwt_data->last_id;
236
238
  } else if(TYPE(v_id) != T_FIXNUM) {
237
- rb_raise(rb_eRuntimeError, "Please use a number from 1 to K as id, or leave nil to auto-generate one. `%s\" given.", RSTRING(v_id)->ptr);
239
+ rb_raise(rb_eRuntimeError, "Please use a number from 1 to K as id, or leave nil to auto-generate one. `%s\" given.", StringValuePtr(v_id));
238
240
  } else if(NUM2INT(v_id) <= 0) {
239
241
  rb_raise(rb_eRuntimeError, "Please use a number from 1 to K as id, or leave nil to auto-generate one. `%d\" given.", NUM2INT(v_id));
240
242
  } else {
241
243
  id= NUM2INT(v_id);
242
244
  }
243
-
244
- if(ac_add_string(kwt_data->tree, string, strlen(string), id) == 0)
245
- rb_raise(rb_eRuntimeError, "Failed to add `%s\", duplicate id `%d\"?", string, id);
245
+ if(ac_add_string(kwt_data->tree, StringValuePtr(v_string), (int)NUM2INT(rb_funcall(v_string, rb_intern("length"), 0)), id) == 0)
246
+ rb_raise(rb_eRuntimeError, "Failed to add `%s\", duplicate id `%d\"?", StringValuePtr(v_string), id);
246
247
 
247
248
  kwt_data->last_id= id + 1;
248
249
  kwt_data->dictionary_size++;
@@ -273,24 +274,23 @@ rb_kwt_new_from_file(int argc, VALUE *argv, VALUE klass)
273
274
 
274
275
  struct kwt_struct_data *kwt_data;
275
276
  char word[1024];
276
- int id;
277
+ int id = 0;
277
278
  VALUE self;
278
- VALUE f_string;
279
+ VALUE filename;
279
280
  FILE *dictionary;
280
281
 
281
- rb_scan_args(argc, argv, "10", &f_string);
282
-
283
- id = 0;
284
- SafeStringValue( f_string );
282
+ rb_scan_args(argc, argv, "10", &filename);
283
+
284
+ SafeStringValue(filename);
285
285
  self= rb_class_new_instance( 0, NULL, klass );
286
286
  KeywordTree( self, kwt_data );
287
287
 
288
- dictionary = fopen( RSTRING( f_string )->ptr, "r" );
288
+ dictionary= fopen( StringValuePtr(filename), "r" );
289
289
  if(dictionary == NULL)
290
- rb_raise(rb_eRuntimeError, "Cannot open `%s\". No such file?", RSTRING(f_string)->ptr);
290
+ rb_raise(rb_eRuntimeError, "Cannot open `%s\". No such file?", StringValuePtr(filename));
291
291
 
292
292
  while(fgets(word, 1024, dictionary) != NULL) {
293
- ac_add_string(kwt_data->tree, word, strlen(word)-1, id++);
293
+ ac_add_string(kwt_data->tree, word, (int)(strlen(word)-1), id++);
294
294
  kwt_data->dictionary_size++;
295
295
  }
296
296
 
@@ -314,7 +314,7 @@ rb_kwt_struct_alloc(VALUE klass)
314
314
  /*
315
315
  * Blump.
316
316
  */
317
- void Init_ahocorasick() {
317
+ void Init_native() {
318
318
  rb_mAhoCorasick = rb_define_module("AhoCorasick");
319
319
  rb_cKeywordTree = rb_define_class_under(rb_mAhoCorasick, "KeywordTree", rb_cObject);
320
320
 
@@ -0,0 +1,7 @@
1
+
2
+ require 'ahocorasick/native'
3
+
4
+ module AhoCorasick
5
+ VERSION='0.5.0'
6
+ end
7
+
@@ -1,5 +1,8 @@
1
- require 'ext/ahocorasick'
1
+ %w(../lib ../ext).each do |path|
2
+ $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), path)))
3
+ end
2
4
 
5
+ require 'ahocorasick'
3
6
  include AhoCorasick
4
7
 
5
8
  describe KeywordTree do
@@ -15,6 +18,51 @@ describe KeywordTree do
15
18
  end
16
19
  end
17
20
 
21
+ describe "not bugs" do
22
+ it "should return 3 results" do
23
+ tree= KeywordTree.new
24
+ tree.add_string "data"
25
+ tree.add_string "database"
26
+ results= tree.find_all "move all the data to a new database"
27
+ results.size.should == 3
28
+ end
29
+ it "should also return 3 results" do
30
+ tree= KeywordTree.new
31
+ tree.add_string "database"
32
+ tree.add_string "data"
33
+ results= tree.find_all "move all the data to a new database"
34
+ results.size.should == 3
35
+ end
36
+ it "should return 2 results" do
37
+ tree= KeywordTree.new
38
+ tree.add_string "base"
39
+ tree.add_string "database"
40
+ results= tree.find_all "move all the data to a new database"
41
+ results.size.should == 2
42
+ end
43
+ it "should also return 2 results" do
44
+ tree= KeywordTree.new
45
+ tree.add_string "database"
46
+ tree.add_string "base"
47
+ results= tree.find_all "move all the data to a new database"
48
+ results.size.should == 2
49
+ end
50
+ it "should return 2 results" do
51
+ tree= KeywordTree.new
52
+ tree.add_string "data"
53
+ results= tree.find_all "move all the data to a new database"
54
+ results.size.should == 2
55
+ end
56
+ it "should return 1 result on duplicates" do
57
+ tree= KeywordTree.new
58
+ tree.add_string "database"
59
+ tree.add_string "database"
60
+ results= tree.find_all "move all the data to a new database"
61
+ results.size.should == 1
62
+ results[0][:id] == 2
63
+ end
64
+ end
65
+
18
66
  describe "How to create a new KeywordTree" do
19
67
  it "should create a new KeywordTree" do
20
68
  KeywordTree.new.class.should == KeywordTree
@@ -192,20 +240,28 @@ describe KeywordTree do
192
240
 
193
241
  describe "Benchmarks. Loading from a file" do
194
242
 
243
+ before(:each) do
244
+ @start= Time.now
245
+ end
246
+
247
+ after(:each) do
248
+ @start=nil
249
+ end
250
+
195
251
  it "should be fast to load a bunch of english words" do
196
- start= Time.now
197
252
  k= KeywordTree.from_file File.dirname(__FILE__) + "/data/en.words"
198
- puts "\n%d words loaded in %s seconds" % [k.size, (Time.now - start)]
199
- (Time.now-start).should < 0.2
253
+ puts "\n%d words loaded in %s seconds" % [k.size, (Time.now - @start)]
254
+ (Time.now-@start).should < 0.2
200
255
  end
201
256
 
202
257
  it "should be fast to find" do
203
- start= Time.now
258
+ # start= Time.now
204
259
  k= KeywordTree.from_file File.dirname(__FILE__) + "/data/en.words"
205
260
  load_time= Time.now
206
261
  results= k.find_all( File.read( File.dirname(__FILE__) + "/data/melville-moby_dick.txt" ) )
207
- puts "\n%d words re-loaded in %s seconds.\nGot %d results in %s seconds" % [k.size, (load_time - start), results.size, (Time.now-load_time)]
262
+ puts "\n%d words re-loaded in %s seconds.\nGot %d results in %s seconds" % [k.size, (load_time - @start), results.size, (Time.now-load_time)]
208
263
  (Time.now-load_time).should < 1.3
264
+ puts results.last.inspect
209
265
  end
210
266
  end
211
267
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aurelian-ruby-ahocorasick
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aurelian Oancea
@@ -18,14 +18,15 @@ email: oancea at gmail dot com
18
18
  executables: []
19
19
 
20
20
  extensions:
21
- - ext/extconf.rb
21
+ - ext/ahocorasick/extconf.rb
22
22
  extra_rdoc_files: []
23
23
 
24
24
  files:
25
- - ext/extconf.rb
26
- - ext/ruby-ahocorasick.c
27
- - ext/ac.h
28
- - ext/ac.c
25
+ - ext/ahocorasick/extconf.rb
26
+ - ext/ahocorasick/ruby-ahocorasick.c
27
+ - ext/ahocorasick/ac.h
28
+ - ext/ahocorasick/ac.c
29
+ - lib/ahocorasick.rb
29
30
  - examples/dict.rb
30
31
  - examples/test.rb
31
32
  - examples/elev.rb
@@ -40,7 +41,7 @@ rdoc_options:
40
41
  - --title
41
42
  - Ruby-AhoCorasick
42
43
  - --inline-source
43
- - ext/ruby-ahocorasick.c
44
+ - ext/ahocorasick/ruby-ahocorasick.c
44
45
  - README.textile
45
46
  - --main
46
47
  - README.textile
data/ext/extconf.rb DELETED
@@ -1,6 +0,0 @@
1
- require "mkmf"
2
-
3
- dir_config("ahocorasick")
4
-
5
- create_makefile("ahocorasick")
6
-