ferret 0.10.14 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/r_search.c CHANGED
@@ -160,9 +160,9 @@ frt_get_td(TopDocs *td, VALUE rsearcher)
160
160
  VALUE rtop_docs;
161
161
  VALUE hit_ary = rb_ary_new2(td->size);
162
162
 
163
- RARRAY(hit_ary)->len = td->size;
164
163
  for (i = 0; i < td->size; i++) {
165
164
  RARRAY(hit_ary)->ptr[i] = frt_get_hit(td->hits[i]);
165
+ RARRAY(hit_ary)->len++;
166
166
  }
167
167
 
168
168
  rtop_docs = rb_struct_new(cTopDocs,
@@ -1015,7 +1015,7 @@ frt_bq_add_query(int argc, VALUE *argv, VALUE self)
1015
1015
  rb_raise(rb_eArgError, "Cannot add %s to a BooleanQuery",
1016
1016
  rb_class2name(klass));
1017
1017
  }
1018
- return Qnil;
1018
+ return self;
1019
1019
  }
1020
1020
 
1021
1021
  /****************************************************************************
@@ -1206,7 +1206,7 @@ frt_phq_add(int argc, VALUE *argv, VALUE self)
1206
1206
  default:
1207
1207
  rb_raise(rb_eArgError, "You can only add a string or an array of "
1208
1208
  "strings to a PhraseQuery, not a %s\n",
1209
- RSTRING(rb_obj_as_string(rterm))->ptr);
1209
+ rs2s(rb_obj_as_string(rterm)));
1210
1210
  }
1211
1211
  return self;
1212
1212
  }
@@ -2258,14 +2258,14 @@ frt_sort_add(Sort *sort, VALUE rsf, bool reverse)
2258
2258
  break;
2259
2259
  case T_SYMBOL:
2260
2260
  rsf = rb_obj_as_string(rsf);
2261
- sf = sort_field_auto_new(RSTRING(rsf)->ptr, reverse);
2261
+ sf = sort_field_auto_new(rs2s(rsf), reverse);
2262
2262
  /* need to give it a ruby object so it'll be freed when the
2263
2263
  * sort is garbage collected */
2264
2264
  rsf = frt_get_sf(sf);
2265
2265
  sort_add_sort_field(sort, sf);
2266
2266
  break;
2267
2267
  case T_STRING:
2268
- frt_parse_sort_str(sort, RSTRING(rsf)->ptr);
2268
+ frt_parse_sort_str(sort, rs2s(rsf));
2269
2269
  break;
2270
2270
  default:
2271
2271
  rb_raise(rb_eArgError, "Unknown SortField Type");
@@ -2697,7 +2697,7 @@ frt_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id)
2697
2697
  * :num_excerpts:: Default: 2. Number of excerpts to return.
2698
2698
  * :pre_tag:: Default: "<b>". Tag to place to the left of the match.
2699
2699
  * You'll probably want to change this to a "<span>" tag
2700
- * with a class "\033[7m" for use in a terminal.
2700
+ * with a class. Try "\033[7m" for use in a terminal.
2701
2701
  * :post_tag:: Default: "</b>". This tag should close the +:pre_tag+.
2702
2702
  * Try tag "\033[m" in the terminal.
2703
2703
  * :ellipsis:: Default: "...". This is the string that is appended at
@@ -2733,13 +2733,13 @@ frt_sea_highlight(int argc, VALUE *argv, VALUE self)
2733
2733
  }
2734
2734
  }
2735
2735
  if (Qnil != (v = rb_hash_aref(roptions, sym_pre_tag))) {
2736
- pre_tag = RSTRING(rb_obj_as_string(v))->ptr;
2736
+ pre_tag = rs2s(rb_obj_as_string(v));
2737
2737
  }
2738
2738
  if (Qnil != (v = rb_hash_aref(roptions, sym_post_tag))) {
2739
- post_tag = RSTRING(rb_obj_as_string(v))->ptr;
2739
+ post_tag = rs2s(rb_obj_as_string(v));
2740
2740
  }
2741
2741
  if (Qnil != (v = rb_hash_aref(roptions, sym_ellipsis))) {
2742
- ellipsis = RSTRING(rb_obj_as_string(v))->ptr;
2742
+ ellipsis = rs2s(rb_obj_as_string(v));
2743
2743
  }
2744
2744
 
2745
2745
  if ((excerpts = searcher_highlight(sea,
@@ -2754,10 +2754,10 @@ frt_sea_highlight(int argc, VALUE *argv, VALUE self)
2754
2754
  const int size = ary_size(excerpts);
2755
2755
  int i;
2756
2756
  VALUE rexcerpts = rb_ary_new2(size);
2757
- RARRAY(rexcerpts)->len = size;
2758
2757
 
2759
2758
  for (i = 0; i < size; i++) {
2760
2759
  RARRAY(rexcerpts)->ptr[i] = rb_str_new2(excerpts[i]);
2760
+ RARRAY(rexcerpts)->len++;
2761
2761
  }
2762
2762
  ary_destroy(excerpts, &free);
2763
2763
  return rexcerpts;
data/ext/r_store.c CHANGED
@@ -191,7 +191,7 @@ frt_dir_exists(VALUE self, VALUE rfname)
191
191
  {
192
192
  Store *store = DATA_PTR(self);
193
193
  StringValue(rfname);
194
- return store->exists(store, RSTRING(rfname)->ptr) ? Qtrue : Qfalse;
194
+ return store->exists(store, rs2s(rfname)) ? Qtrue : Qfalse;
195
195
  }
196
196
 
197
197
  /*
@@ -205,7 +205,7 @@ frt_dir_touch(VALUE self, VALUE rfname)
205
205
  {
206
206
  Store *store = DATA_PTR(self);
207
207
  StringValue(rfname);
208
- store->touch(store, RSTRING(rfname)->ptr);
208
+ store->touch(store, rs2s(rfname));
209
209
  return Qnil;
210
210
  }
211
211
 
@@ -220,7 +220,7 @@ frt_dir_delete(VALUE self, VALUE rfname)
220
220
  {
221
221
  Store *store = DATA_PTR(self);
222
222
  StringValue(rfname);
223
- return (store->remove(store, RSTRING(rfname)->ptr) == 0) ? Qtrue : Qfalse;
223
+ return (store->remove(store, rs2s(rfname)) == 0) ? Qtrue : Qfalse;
224
224
  }
225
225
 
226
226
  /*
@@ -263,7 +263,7 @@ frt_dir_rename(VALUE self, VALUE rfrom, VALUE rto)
263
263
  Store *store = DATA_PTR(self);
264
264
  StringValue(rfrom);
265
265
  StringValue(rto);
266
- store->rename(store, RSTRING(rfrom)->ptr, RSTRING(rto)->ptr);
266
+ store->rename(store, rs2s(rfrom), rs2s(rto));
267
267
  return self;
268
268
  }
269
269
 
@@ -283,7 +283,7 @@ frt_dir_make_lock(VALUE self, VALUE rlock_name)
283
283
  Lock *lock;
284
284
  Store *store = DATA_PTR(self);
285
285
  StringValue(rlock_name);
286
- lock = open_lock(store, RSTRING(rlock_name)->ptr);
286
+ lock = open_lock(store, rs2s(rlock_name));
287
287
  rlock = Data_Wrap_Struct(cLock, &frt_lock_mark, &frt_lock_free, lock);
288
288
  object_add(lock, rlock);
289
289
  return rlock;
@@ -362,9 +362,9 @@ frt_fsdir_new(int argc, VALUE *argv, VALUE klass)
362
362
  }
363
363
  if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
364
364
  rb_raise(rb_eIOError, "No directory <%s> found. Use :create => true"
365
- " to create one.", RSTRING(rpath)->ptr);
365
+ " to create one.", rs2s(rpath));
366
366
  }
367
- store = open_fs_store(RSTRING(rpath)->ptr);
367
+ store = open_fs_store(rs2s(rpath));
368
368
  if (create) store->clear_all(store);
369
369
  if ((self = object_get(store)) == Qnil) {
370
370
  self = Data_Wrap_Struct(klass, NULL, &frt_dir_free, store);
data/ext/ram_store.c CHANGED
@@ -354,13 +354,14 @@ static const struct InStreamMethods RAM_IN_STREAM_METHODS = {
354
354
  static InStream *ram_open_input(Store *store, const char *filename)
355
355
  {
356
356
  RAMFile *rf = (RAMFile *)h_get(store->dir.ht, filename);
357
- InStream *is = is_new();
357
+ InStream *is = NULL;
358
358
 
359
359
  if (rf == NULL) {
360
- RAISE(IO_ERROR, "tried to open \"%s\" but it doesn't exist", filename);
360
+ RAISE(FILE_NOT_FOUND_ERROR,
361
+ "tried to open \"%s\" but it doesn't exist", filename);
361
362
  }
362
363
  REF(rf);
363
-
364
+ is = is_new();
364
365
  is->file.rf = rf;
365
366
  is->d.pointer = 0;
366
367
  is->m = &RAM_IN_STREAM_METHODS;
data/ext/search.c CHANGED
@@ -1,4 +1,5 @@
1
1
  #include <string.h>
2
+ #include <limits.h>
2
3
  #include "search.h"
3
4
  #include "array.h"
4
5
 
@@ -1021,7 +1022,7 @@ static TopDocs *isea_search_w(Searcher *self,
1021
1022
  filter_ft filter_func,
1022
1023
  bool load_fields)
1023
1024
  {
1024
- int max_size = first_doc + num_docs;
1025
+ int max_size = num_docs + (num_docs == INT_MAX ? 0 : first_doc);
1025
1026
  int i;
1026
1027
  Scorer *scorer;
1027
1028
  Hit **score_docs = NULL;
@@ -1539,7 +1540,7 @@ static TopDocs *msea_search_w(Searcher *self,
1539
1540
  filter_ft filter_func,
1540
1541
  bool load_fields)
1541
1542
  {
1542
- int max_size = first_doc + num_docs;
1543
+ int max_size = num_docs + (num_docs == INT_MAX ? 0 : first_doc);
1543
1544
  int i;
1544
1545
  int total_hits = 0;
1545
1546
  Hit **score_docs = NULL;
data/ext/store.c CHANGED
@@ -568,36 +568,52 @@ void is2os_copy_vints(InStream *is, OutStream *os, int cnt)
568
568
  /**
569
569
  * Test argument used to test the store->each function
570
570
  */
571
- struct FileNameConcatArg
571
+ struct FileNameListArg
572
572
  {
573
- char *p;
574
- char *end;
573
+ int count;
574
+ int size;
575
+ int total_len;
576
+ char **files;
575
577
  };
576
578
 
577
579
  /**
578
580
  * Test function used to test store->each function
579
581
  */
580
- static void concat_filenames(char *fname, void *arg)
582
+ static void add_file_name(char *fname, void *arg)
581
583
  {
582
- struct FileNameConcatArg *fnca = (struct FileNameConcatArg *)arg;
583
- if (fnca->p + strlen(fname) + 2 < fnca->end) {
584
- strcpy(fnca->p, fname);
585
- fnca->p += strlen(fname);
586
- *(fnca->p++) = ',';
587
- *(fnca->p++) = ' ';
584
+ struct FileNameListArg *fnl = (struct FileNameListArg *)arg;
585
+ if (fnl->count >= fnl->size) {
586
+ fnl->size *= 2;
587
+ REALLOC_N(fnl->files, char *, fnl->size);
588
588
  }
589
+ fnl->files[fnl->count++] = estrdup(fname);
590
+ fnl->total_len += strlen(fname) + 2;
589
591
  }
590
592
 
591
- char *store_to_s(Store *store, char *buf, int buf_size)
593
+ char *store_to_s(Store *store)
592
594
  {
593
- struct FileNameConcatArg fnca;
595
+ struct FileNameListArg fnl;
596
+ char *buf, *b;
597
+ int i;
598
+ fnl.count = 0;
599
+ fnl.size = 16;
600
+ fnl.total_len = 10;
601
+ fnl.files = ALLOC_N(char *, 16);
602
+
603
+ store->each(store, &add_file_name, &fnl);
604
+ qsort(fnl.files, fnl.count, sizeof(char *), &scmp);
605
+ b = buf = ALLOC_N(char, fnl.total_len);
606
+
607
+ for (i = 0; i < fnl.count; i++) {
608
+ char *fn = fnl.files[i];
609
+ int len = strlen(fn);
610
+ memcpy(b, fn, len);
611
+ b += len;
612
+ *b++ = '\n';
613
+ free(fn);
614
+ }
615
+ *b = '\0';
616
+ free(fnl.files);
594
617
 
595
- fnca.p = buf;
596
- fnca.end = buf + buf_size;
597
- store->each(store, &concat_filenames, &fnca);
598
- if (fnca.p > buf + 2) {
599
- fnca.p[-2] = '\0';
600
- }
601
618
  return buf;
602
619
  }
603
-
data/ext/store.h CHANGED
@@ -292,7 +292,7 @@ struct Store
292
292
  *
293
293
  * @param store self
294
294
  * @param filename the name of the input stream
295
- * @raise IO_ERROR if the input stream cannot be opened
295
+ * @raise FILE_NOT_FOUND_ERROR if the input stream cannot be opened
296
296
  */
297
297
  InStream *(*open_input)(Store *store, const char *filename);
298
298
 
@@ -728,11 +728,9 @@ extern void is2os_copy_vints(InStream *is, OutStream *os, int cnt);
728
728
  /**
729
729
  * Print the filenames in a store to a buffer.
730
730
  *
731
- * @param store the store to get the filenames from
732
- * @param buf the buffer to print the filenames to
733
- * @paran len the length of the buffer
731
+ * @param store the store to get the filenames from
734
732
  */
735
- extern char *store_to_s(Store *store, char *buf, int buf_size);
733
+ extern char *store_to_s(Store *store);
736
734
 
737
735
  extern Lock *open_lock(Store *store, char *lockname);
738
736
  extern void close_lock(Lock *lock);
data/lib/ferret/index.rb CHANGED
@@ -171,10 +171,10 @@ module Ferret::Index
171
171
  # num_excerpts:: Default: 2. Number of excerpts to return.
172
172
  # pre_tag:: Default: "<b>". Tag to place to the left of the
173
173
  # match. You'll probably want to change this to a
174
- # "<span>" tag with a class "\033[36m" for use in a
175
- # terminal.
174
+ # "<span>" tag with a class. Try "\033[36m" for use in
175
+ # a terminal.
176
176
  # post_tag:: Default: "</b>". This tag should close the
177
- # +:pre_tag+. Try tag "\033[m" in the terminal.
177
+ # +:pre_tag+. Try tag "\033[m" in the terminal.
178
178
  # ellipsis:: Default: "...". This is the string that is appended
179
179
  # at the beginning and end of excerpts (unless the
180
180
  # excerpt hits the start or end of the field.
@@ -673,7 +673,7 @@ module Ferret::Index
673
673
  latest = false
674
674
  begin
675
675
  latest = @reader.latest?
676
- rescue LockError => le
676
+ rescue Lock::LockError => le
677
677
  sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again
678
678
  latest = @reader.latest?
679
679
  end
@@ -1,3 +1,3 @@
1
1
  module Ferret
2
- VERSION = '0.10.14'
2
+ VERSION = '0.11.0'
3
3
  end
@@ -0,0 +1,76 @@
1
+ require File.dirname(__FILE__) + "/../test_helper"
2
+ require File.dirname(__FILE__) + "/../utils/number_to_spoken.rb"
3
+ require 'thread'
4
+
5
+ class IndexThreadSafetyReadWriteTest < Test::Unit::TestCase
6
+ include Ferret::Index
7
+ include Ferret::Document
8
+
9
+ INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
10
+ ITERATIONS = 10000
11
+ ANALYZER = Ferret::Analysis::Analyzer.new()
12
+
13
+ def setup
14
+ @index = Index.new(:path => 'index2',
15
+ :create => true,
16
+ :analyzer => ANALYZER,
17
+ :default_field => 'contents')
18
+ end
19
+
20
+ def search_thread()
21
+ ITERATIONS.times do
22
+ do_search()
23
+ sleep(rand(1))
24
+ end
25
+ rescue => e
26
+ puts e
27
+ puts e.backtrace
28
+ @index = nil
29
+ raise e
30
+ end
31
+
32
+ def index_thread()
33
+ ITERATIONS.times do
34
+ do_add_doc()
35
+ sleep(rand(1))
36
+ end
37
+ rescue => e
38
+ puts e
39
+ puts e.backtrace
40
+ @index = nil
41
+ raise e
42
+ end
43
+
44
+ def do_add_doc
45
+ d = Document.new()
46
+ n = rand(0xFFFFFFFF)
47
+ d << Field.new("id", n.to_s, Field::Store::YES, Field::Index::UNTOKENIZED)
48
+ d << Field.new("contents", n.to_spoken, Field::Store::NO, Field::Index::TOKENIZED)
49
+ puts("Adding #{n}")
50
+ begin
51
+ @index << d
52
+ rescue => e
53
+ puts e
54
+ puts e.backtrace
55
+ @index = nil
56
+ raise e
57
+ end
58
+ end
59
+
60
+ def do_search
61
+ n = rand(0xFFFFFFFF)
62
+ puts("Searching for #{n}")
63
+ hits = @index.search_each(n.to_spoken, :num_docs => 3) do |d, s|
64
+ puts "Hit for #{n}: #{@index[d]["id"]} - #{s}"
65
+ end
66
+ puts("Searched for #{n}: total = #{hits}")
67
+ end
68
+
69
+ def test_threading
70
+ threads = []
71
+ threads << Thread.new { search_thread }
72
+ threads << Thread.new { index_thread }
73
+
74
+ threads.each { |t| t.join }
75
+ end
76
+ end
@@ -1,20 +1,22 @@
1
1
  require File.dirname(__FILE__) + "/../test_helper"
2
- require File.dirname(__FILE__) + "/../utils/number_to_spoken.rb"
2
+ require File.join(File.dirname(__FILE__), "number_to_spoken.rb")
3
3
  require 'thread'
4
4
 
5
5
  class ThreadSafetyTest
6
6
  include Ferret::Index
7
7
  include Ferret::Search
8
8
  include Ferret::Store
9
- include Ferret::Document
9
+ include Ferret
10
10
 
11
11
  def initialize(options)
12
12
  @options = options
13
13
  end
14
14
 
15
15
  INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
16
- ANALYZER = Ferret::Analysis::Analyzer.new()
17
- ITERATIONS = 19
16
+ ANALYZER = Ferret::Analysis::WhiteSpaceAnalyzer.new()
17
+ ITERATIONS = 1000
18
+ QUERY_PARSER = Ferret::QueryParser.new(:analyzer => ANALYZER,
19
+ :default_field => 'contents')
18
20
  @@searcher = nil
19
21
 
20
22
  def run_index_thread(writer)
@@ -23,10 +25,8 @@ class ThreadSafetyTest
23
25
  use_compound_file = false
24
26
 
25
27
  (400*ITERATIONS).times do |i|
26
- d = Document.new()
27
28
  n = rand(0xFFFFFFFF)
28
- d << Field.new("id", n.to_s, Field::Store::YES, Field::Index::UNTOKENIZED)
29
- d << Field.new("contents", n.to_spoken, Field::Store::NO, Field::Index::TOKENIZED)
29
+ d = {:id => n.to_s, :contents => n.to_spoken}
30
30
  puts("Adding #{n}")
31
31
 
32
32
  # Switch between single and multiple file segments
@@ -37,7 +37,7 @@ class ThreadSafetyTest
37
37
 
38
38
  if (i % reopen_interval == 0)
39
39
  writer.close()
40
- writer = IndexWriter.new(INDEX_DIR, :analyzer => ANALYZER)
40
+ writer = IndexWriter.new(:path => INDEX_DIR, :analyzer => ANALYZER)
41
41
  end
42
42
  end
43
43
 
@@ -52,17 +52,17 @@ class ThreadSafetyTest
52
52
  reopen_interval = 10 + rand(20)
53
53
 
54
54
  unless use_global
55
- searcher = IndexSearcher.new(INDEX_DIR)
55
+ searcher = Searcher.new(INDEX_DIR)
56
56
  end
57
57
 
58
58
  (50*ITERATIONS).times do |i|
59
59
  search_for(rand(0xFFFFFFFF), (searcher.nil? ? @@searcher : searcher))
60
60
  if (i%reopen_interval == 0)
61
61
  if (searcher == nil)
62
- @@searcher = IndexSearcher.new(INDEX_DIR)
62
+ @@searcher = Searcher.new(INDEX_DIR)
63
63
  else
64
64
  searcher.close()
65
- searcher = IndexSearcher.new(INDEX_DIR)
65
+ searcher = Searcher.new(INDEX_DIR)
66
66
  end
67
67
  end
68
68
  end
@@ -74,30 +74,26 @@ class ThreadSafetyTest
74
74
 
75
75
  def search_for(n, searcher)
76
76
  puts("Searching for #{n}")
77
- hits =
78
- searcher.search(Ferret::QueryParser.parse(n.to_spoken, "contents", :analyzer => ANALYZER),
79
- :num_docs => 3)
80
- puts("Search for #{n}: total = #{hits.size}")
81
- hits.each do |d, s|
82
- puts "Hit for #{n}: #{searcher.reader.get_document(d)["id"]} - #{s}"
77
+ topdocs = searcher.search(QUERY_PARSER.parse(n.to_spoken), :limit => 3)
78
+ puts("Search for #{n}: total = #{topdocs.total_hits}")
79
+ topdocs.hits.each do |hit|
80
+ puts "Hit for #{n}: #{searcher.reader[hit.doc]["id"]} - #{hit.score}"
83
81
  end
84
82
  end
85
83
 
86
84
  def run_test_threads
87
-
88
85
  threads = []
89
86
  unless @options[:read_only]
90
- writer = IndexWriter.new(INDEX_DIR, :analyzer => ANALYZER,
87
+ writer = IndexWriter.new(:path => INDEX_DIR, :analyzer => ANALYZER,
91
88
  :create => !@options[:add])
92
89
 
93
90
  threads << Thread.new { run_index_thread(writer) }
94
-
95
91
  sleep(1)
96
92
  end
97
93
 
98
94
  threads << Thread.new { run_search_thread(false)}
99
95
 
100
- @@searcher = IndexSearcher.new(INDEX_DIR)
96
+ @@searcher = Searcher.new(INDEX_DIR)
101
97
  threads << Thread.new { run_search_thread(true)}
102
98
 
103
99
  threads << Thread.new { run_search_thread(true)}