ferret 0.10.14 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/r_search.c CHANGED
@@ -160,9 +160,9 @@ frt_get_td(TopDocs *td, VALUE rsearcher)
160
160
  VALUE rtop_docs;
161
161
  VALUE hit_ary = rb_ary_new2(td->size);
162
162
 
163
- RARRAY(hit_ary)->len = td->size;
164
163
  for (i = 0; i < td->size; i++) {
165
164
  RARRAY(hit_ary)->ptr[i] = frt_get_hit(td->hits[i]);
165
+ RARRAY(hit_ary)->len++;
166
166
  }
167
167
 
168
168
  rtop_docs = rb_struct_new(cTopDocs,
@@ -1015,7 +1015,7 @@ frt_bq_add_query(int argc, VALUE *argv, VALUE self)
1015
1015
  rb_raise(rb_eArgError, "Cannot add %s to a BooleanQuery",
1016
1016
  rb_class2name(klass));
1017
1017
  }
1018
- return Qnil;
1018
+ return self;
1019
1019
  }
1020
1020
 
1021
1021
  /****************************************************************************
@@ -1206,7 +1206,7 @@ frt_phq_add(int argc, VALUE *argv, VALUE self)
1206
1206
  default:
1207
1207
  rb_raise(rb_eArgError, "You can only add a string or an array of "
1208
1208
  "strings to a PhraseQuery, not a %s\n",
1209
- RSTRING(rb_obj_as_string(rterm))->ptr);
1209
+ rs2s(rb_obj_as_string(rterm)));
1210
1210
  }
1211
1211
  return self;
1212
1212
  }
@@ -2258,14 +2258,14 @@ frt_sort_add(Sort *sort, VALUE rsf, bool reverse)
2258
2258
  break;
2259
2259
  case T_SYMBOL:
2260
2260
  rsf = rb_obj_as_string(rsf);
2261
- sf = sort_field_auto_new(RSTRING(rsf)->ptr, reverse);
2261
+ sf = sort_field_auto_new(rs2s(rsf), reverse);
2262
2262
  /* need to give it a ruby object so it'll be freed when the
2263
2263
  * sort is garbage collected */
2264
2264
  rsf = frt_get_sf(sf);
2265
2265
  sort_add_sort_field(sort, sf);
2266
2266
  break;
2267
2267
  case T_STRING:
2268
- frt_parse_sort_str(sort, RSTRING(rsf)->ptr);
2268
+ frt_parse_sort_str(sort, rs2s(rsf));
2269
2269
  break;
2270
2270
  default:
2271
2271
  rb_raise(rb_eArgError, "Unknown SortField Type");
@@ -2697,7 +2697,7 @@ frt_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id)
2697
2697
  * :num_excerpts:: Default: 2. Number of excerpts to return.
2698
2698
  * :pre_tag:: Default: "<b>". Tag to place to the left of the match.
2699
2699
  * You'll probably want to change this to a "<span>" tag
2700
- * with a class "\033[7m" for use in a terminal.
2700
+ * with a class. Try "\033[7m" for use in a terminal.
2701
2701
  * :post_tag:: Default: "</b>". This tag should close the +:pre_tag+.
2702
2702
  * Try tag "\033[m" in the terminal.
2703
2703
  * :ellipsis:: Default: "...". This is the string that is appended at
@@ -2733,13 +2733,13 @@ frt_sea_highlight(int argc, VALUE *argv, VALUE self)
2733
2733
  }
2734
2734
  }
2735
2735
  if (Qnil != (v = rb_hash_aref(roptions, sym_pre_tag))) {
2736
- pre_tag = RSTRING(rb_obj_as_string(v))->ptr;
2736
+ pre_tag = rs2s(rb_obj_as_string(v));
2737
2737
  }
2738
2738
  if (Qnil != (v = rb_hash_aref(roptions, sym_post_tag))) {
2739
- post_tag = RSTRING(rb_obj_as_string(v))->ptr;
2739
+ post_tag = rs2s(rb_obj_as_string(v));
2740
2740
  }
2741
2741
  if (Qnil != (v = rb_hash_aref(roptions, sym_ellipsis))) {
2742
- ellipsis = RSTRING(rb_obj_as_string(v))->ptr;
2742
+ ellipsis = rs2s(rb_obj_as_string(v));
2743
2743
  }
2744
2744
 
2745
2745
  if ((excerpts = searcher_highlight(sea,
@@ -2754,10 +2754,10 @@ frt_sea_highlight(int argc, VALUE *argv, VALUE self)
2754
2754
  const int size = ary_size(excerpts);
2755
2755
  int i;
2756
2756
  VALUE rexcerpts = rb_ary_new2(size);
2757
- RARRAY(rexcerpts)->len = size;
2758
2757
 
2759
2758
  for (i = 0; i < size; i++) {
2760
2759
  RARRAY(rexcerpts)->ptr[i] = rb_str_new2(excerpts[i]);
2760
+ RARRAY(rexcerpts)->len++;
2761
2761
  }
2762
2762
  ary_destroy(excerpts, &free);
2763
2763
  return rexcerpts;
data/ext/r_store.c CHANGED
@@ -191,7 +191,7 @@ frt_dir_exists(VALUE self, VALUE rfname)
191
191
  {
192
192
  Store *store = DATA_PTR(self);
193
193
  StringValue(rfname);
194
- return store->exists(store, RSTRING(rfname)->ptr) ? Qtrue : Qfalse;
194
+ return store->exists(store, rs2s(rfname)) ? Qtrue : Qfalse;
195
195
  }
196
196
 
197
197
  /*
@@ -205,7 +205,7 @@ frt_dir_touch(VALUE self, VALUE rfname)
205
205
  {
206
206
  Store *store = DATA_PTR(self);
207
207
  StringValue(rfname);
208
- store->touch(store, RSTRING(rfname)->ptr);
208
+ store->touch(store, rs2s(rfname));
209
209
  return Qnil;
210
210
  }
211
211
 
@@ -220,7 +220,7 @@ frt_dir_delete(VALUE self, VALUE rfname)
220
220
  {
221
221
  Store *store = DATA_PTR(self);
222
222
  StringValue(rfname);
223
- return (store->remove(store, RSTRING(rfname)->ptr) == 0) ? Qtrue : Qfalse;
223
+ return (store->remove(store, rs2s(rfname)) == 0) ? Qtrue : Qfalse;
224
224
  }
225
225
 
226
226
  /*
@@ -263,7 +263,7 @@ frt_dir_rename(VALUE self, VALUE rfrom, VALUE rto)
263
263
  Store *store = DATA_PTR(self);
264
264
  StringValue(rfrom);
265
265
  StringValue(rto);
266
- store->rename(store, RSTRING(rfrom)->ptr, RSTRING(rto)->ptr);
266
+ store->rename(store, rs2s(rfrom), rs2s(rto));
267
267
  return self;
268
268
  }
269
269
 
@@ -283,7 +283,7 @@ frt_dir_make_lock(VALUE self, VALUE rlock_name)
283
283
  Lock *lock;
284
284
  Store *store = DATA_PTR(self);
285
285
  StringValue(rlock_name);
286
- lock = open_lock(store, RSTRING(rlock_name)->ptr);
286
+ lock = open_lock(store, rs2s(rlock_name));
287
287
  rlock = Data_Wrap_Struct(cLock, &frt_lock_mark, &frt_lock_free, lock);
288
288
  object_add(lock, rlock);
289
289
  return rlock;
@@ -362,9 +362,9 @@ frt_fsdir_new(int argc, VALUE *argv, VALUE klass)
362
362
  }
363
363
  if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
364
364
  rb_raise(rb_eIOError, "No directory <%s> found. Use :create => true"
365
- " to create one.", RSTRING(rpath)->ptr);
365
+ " to create one.", rs2s(rpath));
366
366
  }
367
- store = open_fs_store(RSTRING(rpath)->ptr);
367
+ store = open_fs_store(rs2s(rpath));
368
368
  if (create) store->clear_all(store);
369
369
  if ((self = object_get(store)) == Qnil) {
370
370
  self = Data_Wrap_Struct(klass, NULL, &frt_dir_free, store);
data/ext/ram_store.c CHANGED
@@ -354,13 +354,14 @@ static const struct InStreamMethods RAM_IN_STREAM_METHODS = {
354
354
  static InStream *ram_open_input(Store *store, const char *filename)
355
355
  {
356
356
  RAMFile *rf = (RAMFile *)h_get(store->dir.ht, filename);
357
- InStream *is = is_new();
357
+ InStream *is = NULL;
358
358
 
359
359
  if (rf == NULL) {
360
- RAISE(IO_ERROR, "tried to open \"%s\" but it doesn't exist", filename);
360
+ RAISE(FILE_NOT_FOUND_ERROR,
361
+ "tried to open \"%s\" but it doesn't exist", filename);
361
362
  }
362
363
  REF(rf);
363
-
364
+ is = is_new();
364
365
  is->file.rf = rf;
365
366
  is->d.pointer = 0;
366
367
  is->m = &RAM_IN_STREAM_METHODS;
data/ext/search.c CHANGED
@@ -1,4 +1,5 @@
1
1
  #include <string.h>
2
+ #include <limits.h>
2
3
  #include "search.h"
3
4
  #include "array.h"
4
5
 
@@ -1021,7 +1022,7 @@ static TopDocs *isea_search_w(Searcher *self,
1021
1022
  filter_ft filter_func,
1022
1023
  bool load_fields)
1023
1024
  {
1024
- int max_size = first_doc + num_docs;
1025
+ int max_size = num_docs + (num_docs == INT_MAX ? 0 : first_doc);
1025
1026
  int i;
1026
1027
  Scorer *scorer;
1027
1028
  Hit **score_docs = NULL;
@@ -1539,7 +1540,7 @@ static TopDocs *msea_search_w(Searcher *self,
1539
1540
  filter_ft filter_func,
1540
1541
  bool load_fields)
1541
1542
  {
1542
- int max_size = first_doc + num_docs;
1543
+ int max_size = num_docs + (num_docs == INT_MAX ? 0 : first_doc);
1543
1544
  int i;
1544
1545
  int total_hits = 0;
1545
1546
  Hit **score_docs = NULL;
data/ext/store.c CHANGED
@@ -568,36 +568,52 @@ void is2os_copy_vints(InStream *is, OutStream *os, int cnt)
568
568
  /**
569
569
  * Test argument used to test the store->each function
570
570
  */
571
- struct FileNameConcatArg
571
+ struct FileNameListArg
572
572
  {
573
- char *p;
574
- char *end;
573
+ int count;
574
+ int size;
575
+ int total_len;
576
+ char **files;
575
577
  };
576
578
 
577
579
  /**
578
580
  * Test function used to test store->each function
579
581
  */
580
- static void concat_filenames(char *fname, void *arg)
582
+ static void add_file_name(char *fname, void *arg)
581
583
  {
582
- struct FileNameConcatArg *fnca = (struct FileNameConcatArg *)arg;
583
- if (fnca->p + strlen(fname) + 2 < fnca->end) {
584
- strcpy(fnca->p, fname);
585
- fnca->p += strlen(fname);
586
- *(fnca->p++) = ',';
587
- *(fnca->p++) = ' ';
584
+ struct FileNameListArg *fnl = (struct FileNameListArg *)arg;
585
+ if (fnl->count >= fnl->size) {
586
+ fnl->size *= 2;
587
+ REALLOC_N(fnl->files, char *, fnl->size);
588
588
  }
589
+ fnl->files[fnl->count++] = estrdup(fname);
590
+ fnl->total_len += strlen(fname) + 2;
589
591
  }
590
592
 
591
- char *store_to_s(Store *store, char *buf, int buf_size)
593
+ char *store_to_s(Store *store)
592
594
  {
593
- struct FileNameConcatArg fnca;
595
+ struct FileNameListArg fnl;
596
+ char *buf, *b;
597
+ int i;
598
+ fnl.count = 0;
599
+ fnl.size = 16;
600
+ fnl.total_len = 10;
601
+ fnl.files = ALLOC_N(char *, 16);
602
+
603
+ store->each(store, &add_file_name, &fnl);
604
+ qsort(fnl.files, fnl.count, sizeof(char *), &scmp);
605
+ b = buf = ALLOC_N(char, fnl.total_len);
606
+
607
+ for (i = 0; i < fnl.count; i++) {
608
+ char *fn = fnl.files[i];
609
+ int len = strlen(fn);
610
+ memcpy(b, fn, len);
611
+ b += len;
612
+ *b++ = '\n';
613
+ free(fn);
614
+ }
615
+ *b = '\0';
616
+ free(fnl.files);
594
617
 
595
- fnca.p = buf;
596
- fnca.end = buf + buf_size;
597
- store->each(store, &concat_filenames, &fnca);
598
- if (fnca.p > buf + 2) {
599
- fnca.p[-2] = '\0';
600
- }
601
618
  return buf;
602
619
  }
603
-
data/ext/store.h CHANGED
@@ -292,7 +292,7 @@ struct Store
292
292
  *
293
293
  * @param store self
294
294
  * @param filename the name of the input stream
295
- * @raise IO_ERROR if the input stream cannot be opened
295
+ * @raise FILE_NOT_FOUND_ERROR if the input stream cannot be opened
296
296
  */
297
297
  InStream *(*open_input)(Store *store, const char *filename);
298
298
 
@@ -728,11 +728,9 @@ extern void is2os_copy_vints(InStream *is, OutStream *os, int cnt);
728
728
  /**
729
729
  * Print the filenames in a store to a buffer.
730
730
  *
731
- * @param store the store to get the filenames from
732
- * @param buf the buffer to print the filenames to
733
- * @paran len the length of the buffer
731
+ * @param store the store to get the filenames from
734
732
  */
735
- extern char *store_to_s(Store *store, char *buf, int buf_size);
733
+ extern char *store_to_s(Store *store);
736
734
 
737
735
  extern Lock *open_lock(Store *store, char *lockname);
738
736
  extern void close_lock(Lock *lock);
data/lib/ferret/index.rb CHANGED
@@ -171,10 +171,10 @@ module Ferret::Index
171
171
  # num_excerpts:: Default: 2. Number of excerpts to return.
172
172
  # pre_tag:: Default: "<b>". Tag to place to the left of the
173
173
  # match. You'll probably want to change this to a
174
- # "<span>" tag with a class "\033[36m" for use in a
175
- # terminal.
174
+ # "<span>" tag with a class. Try "\033[36m" for use in
175
+ # a terminal.
176
176
  # post_tag:: Default: "</b>". This tag should close the
177
- # +:pre_tag+. Try tag "\033[m" in the terminal.
177
+ # +:pre_tag+. Try tag "\033[m" in the terminal.
178
178
  # ellipsis:: Default: "...". This is the string that is appended
179
179
  # at the beginning and end of excerpts (unless the
180
180
  # excerpt hits the start or end of the field.
@@ -673,7 +673,7 @@ module Ferret::Index
673
673
  latest = false
674
674
  begin
675
675
  latest = @reader.latest?
676
- rescue LockError => le
676
+ rescue Lock::LockError => le
677
677
  sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again
678
678
  latest = @reader.latest?
679
679
  end
@@ -1,3 +1,3 @@
1
1
  module Ferret
2
- VERSION = '0.10.14'
2
+ VERSION = '0.11.0'
3
3
  end
@@ -0,0 +1,76 @@
1
+ require File.dirname(__FILE__) + "/../test_helper"
2
+ require File.dirname(__FILE__) + "/../utils/number_to_spoken.rb"
3
+ require 'thread'
4
+
5
+ class IndexThreadSafetyReadWriteTest < Test::Unit::TestCase
6
+ include Ferret::Index
7
+ include Ferret::Document
8
+
9
+ INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
10
+ ITERATIONS = 10000
11
+ ANALYZER = Ferret::Analysis::Analyzer.new()
12
+
13
+ def setup
14
+ @index = Index.new(:path => 'index2',
15
+ :create => true,
16
+ :analyzer => ANALYZER,
17
+ :default_field => 'contents')
18
+ end
19
+
20
+ def search_thread()
21
+ ITERATIONS.times do
22
+ do_search()
23
+ sleep(rand(1))
24
+ end
25
+ rescue => e
26
+ puts e
27
+ puts e.backtrace
28
+ @index = nil
29
+ raise e
30
+ end
31
+
32
+ def index_thread()
33
+ ITERATIONS.times do
34
+ do_add_doc()
35
+ sleep(rand(1))
36
+ end
37
+ rescue => e
38
+ puts e
39
+ puts e.backtrace
40
+ @index = nil
41
+ raise e
42
+ end
43
+
44
+ def do_add_doc
45
+ d = Document.new()
46
+ n = rand(0xFFFFFFFF)
47
+ d << Field.new("id", n.to_s, Field::Store::YES, Field::Index::UNTOKENIZED)
48
+ d << Field.new("contents", n.to_spoken, Field::Store::NO, Field::Index::TOKENIZED)
49
+ puts("Adding #{n}")
50
+ begin
51
+ @index << d
52
+ rescue => e
53
+ puts e
54
+ puts e.backtrace
55
+ @index = nil
56
+ raise e
57
+ end
58
+ end
59
+
60
+ def do_search
61
+ n = rand(0xFFFFFFFF)
62
+ puts("Searching for #{n}")
63
+ hits = @index.search_each(n.to_spoken, :num_docs => 3) do |d, s|
64
+ puts "Hit for #{n}: #{@index[d]["id"]} - #{s}"
65
+ end
66
+ puts("Searched for #{n}: total = #{hits}")
67
+ end
68
+
69
+ def test_threading
70
+ threads = []
71
+ threads << Thread.new { search_thread }
72
+ threads << Thread.new { index_thread }
73
+
74
+ threads.each { |t| t.join }
75
+ end
76
+ end
@@ -1,20 +1,22 @@
1
1
  require File.dirname(__FILE__) + "/../test_helper"
2
- require File.dirname(__FILE__) + "/../utils/number_to_spoken.rb"
2
+ require File.join(File.dirname(__FILE__), "number_to_spoken.rb")
3
3
  require 'thread'
4
4
 
5
5
  class ThreadSafetyTest
6
6
  include Ferret::Index
7
7
  include Ferret::Search
8
8
  include Ferret::Store
9
- include Ferret::Document
9
+ include Ferret
10
10
 
11
11
  def initialize(options)
12
12
  @options = options
13
13
  end
14
14
 
15
15
  INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
16
- ANALYZER = Ferret::Analysis::Analyzer.new()
17
- ITERATIONS = 19
16
+ ANALYZER = Ferret::Analysis::WhiteSpaceAnalyzer.new()
17
+ ITERATIONS = 1000
18
+ QUERY_PARSER = Ferret::QueryParser.new(:analyzer => ANALYZER,
19
+ :default_field => 'contents')
18
20
  @@searcher = nil
19
21
 
20
22
  def run_index_thread(writer)
@@ -23,10 +25,8 @@ class ThreadSafetyTest
23
25
  use_compound_file = false
24
26
 
25
27
  (400*ITERATIONS).times do |i|
26
- d = Document.new()
27
28
  n = rand(0xFFFFFFFF)
28
- d << Field.new("id", n.to_s, Field::Store::YES, Field::Index::UNTOKENIZED)
29
- d << Field.new("contents", n.to_spoken, Field::Store::NO, Field::Index::TOKENIZED)
29
+ d = {:id => n.to_s, :contents => n.to_spoken}
30
30
  puts("Adding #{n}")
31
31
 
32
32
  # Switch between single and multiple file segments
@@ -37,7 +37,7 @@ class ThreadSafetyTest
37
37
 
38
38
  if (i % reopen_interval == 0)
39
39
  writer.close()
40
- writer = IndexWriter.new(INDEX_DIR, :analyzer => ANALYZER)
40
+ writer = IndexWriter.new(:path => INDEX_DIR, :analyzer => ANALYZER)
41
41
  end
42
42
  end
43
43
 
@@ -52,17 +52,17 @@ class ThreadSafetyTest
52
52
  reopen_interval = 10 + rand(20)
53
53
 
54
54
  unless use_global
55
- searcher = IndexSearcher.new(INDEX_DIR)
55
+ searcher = Searcher.new(INDEX_DIR)
56
56
  end
57
57
 
58
58
  (50*ITERATIONS).times do |i|
59
59
  search_for(rand(0xFFFFFFFF), (searcher.nil? ? @@searcher : searcher))
60
60
  if (i%reopen_interval == 0)
61
61
  if (searcher == nil)
62
- @@searcher = IndexSearcher.new(INDEX_DIR)
62
+ @@searcher = Searcher.new(INDEX_DIR)
63
63
  else
64
64
  searcher.close()
65
- searcher = IndexSearcher.new(INDEX_DIR)
65
+ searcher = Searcher.new(INDEX_DIR)
66
66
  end
67
67
  end
68
68
  end
@@ -74,30 +74,26 @@ class ThreadSafetyTest
74
74
 
75
75
  def search_for(n, searcher)
76
76
  puts("Searching for #{n}")
77
- hits =
78
- searcher.search(Ferret::QueryParser.parse(n.to_spoken, "contents", :analyzer => ANALYZER),
79
- :num_docs => 3)
80
- puts("Search for #{n}: total = #{hits.size}")
81
- hits.each do |d, s|
82
- puts "Hit for #{n}: #{searcher.reader.get_document(d)["id"]} - #{s}"
77
+ topdocs = searcher.search(QUERY_PARSER.parse(n.to_spoken), :limit => 3)
78
+ puts("Search for #{n}: total = #{topdocs.total_hits}")
79
+ topdocs.hits.each do |hit|
80
+ puts "Hit for #{n}: #{searcher.reader[hit.doc]["id"]} - #{hit.score}"
83
81
  end
84
82
  end
85
83
 
86
84
  def run_test_threads
87
-
88
85
  threads = []
89
86
  unless @options[:read_only]
90
- writer = IndexWriter.new(INDEX_DIR, :analyzer => ANALYZER,
87
+ writer = IndexWriter.new(:path => INDEX_DIR, :analyzer => ANALYZER,
91
88
  :create => !@options[:add])
92
89
 
93
90
  threads << Thread.new { run_index_thread(writer) }
94
-
95
91
  sleep(1)
96
92
  end
97
93
 
98
94
  threads << Thread.new { run_search_thread(false)}
99
95
 
100
- @@searcher = IndexSearcher.new(INDEX_DIR)
96
+ @@searcher = Searcher.new(INDEX_DIR)
101
97
  threads << Thread.new { run_search_thread(true)}
102
98
 
103
99
  threads << Thread.new { run_search_thread(true)}