ferret 0.11.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -845,7 +845,7 @@ static Token *std_next(TokenStream *ts)
845
845
  char *t;
846
846
  char *start = NULL;
847
847
  char *num_end = NULL;
848
- char token[MAX_WORD_SIZE];
848
+ char token[MAX_WORD_SIZE + 1];
849
849
  int token_i = 0;
850
850
  int len;
851
851
  bool is_acronym;
@@ -925,6 +925,9 @@ static Token *std_next(TokenStream *ts)
925
925
  }
926
926
  ts->t = t + len;
927
927
  token[len] = 0;
928
+ Token *k = tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
929
+ (int)(ts->t - ts->text), 1);
930
+ return k;
928
931
  return tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
929
932
  (int)(ts->t - ts->text), 1);
930
933
  }
@@ -1174,7 +1177,7 @@ static TokenStream *mf_clone_i(TokenStream *orig_ts)
1174
1177
 
1175
1178
  static Token *mf_next(TokenStream *ts)
1176
1179
  {
1177
- char buf[MAX_WORD_SIZE];
1180
+ char buf[MAX_WORD_SIZE + 1];
1178
1181
  MultiMapper *mapper = MFilt(ts)->mapper;
1179
1182
  TokenFilter *tf = TkFilt(ts);
1180
1183
  Token *tk = tf->sub_ts->next(tf->sub_ts);
@@ -1299,7 +1302,7 @@ TokenStream *hyphen_filter_new(TokenStream *sub_ts)
1299
1302
 
1300
1303
  Token *mb_lcf_next(TokenStream *ts)
1301
1304
  {
1302
- wchar_t wbuf[MAX_WORD_SIZE], *wchr;
1305
+ wchar_t wbuf[MAX_WORD_SIZE + 1], *wchr;
1303
1306
  Token *tk = TkFilt(ts)->sub_ts->next(TkFilt(ts)->sub_ts);
1304
1307
 
1305
1308
  if (tk == NULL) {
@@ -6,6 +6,6 @@ if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
6
6
  else
7
7
  require 'mkmf'
8
8
  #$CFLAGS += " -fno-common"
9
- $CFLAGS += " -fno-common -D_FILE_OFFSET_BITS=64"
9
+ $CFLAGS += " -fno-stack-protector -fno-common -D_FILE_OFFSET_BITS=64"
10
10
  create_makefile("ferret_ext")
11
11
  end
@@ -335,7 +335,7 @@ static InStream *fs_open_input(Store *store, const char *filename)
335
335
  return is;
336
336
  }
337
337
 
338
- #define LOCK_OBTAIN_TIMEOUT 5
338
+ #define LOCK_OBTAIN_TIMEOUT 10
339
339
 
340
340
  #ifdef RUBY_BINDINGS
341
341
  struct timeval rb_time_interval _((VALUE));
@@ -6,7 +6,6 @@
6
6
  #include <assert.h>
7
7
  #include <math.h>
8
8
  #include <ctype.h>
9
- #include <unistd.h>
10
9
 
11
10
  const char *EMPTY_STRING = "";
12
11
 
@@ -359,14 +358,24 @@ void dummy_free(void *p)
359
358
 
360
359
  #ifdef FRT_IS_C99
361
360
  extern void usleep(unsigned long usec);
361
+ #else
362
+ # ifdef RUBY_BINDINGS
363
+ struct timeval rb_time_interval _((VALUE));
364
+ # else
365
+ # include <unistd.h>
366
+ # endif
362
367
  #endif
363
368
 
364
369
  extern void micro_sleep(const int micro_seconds)
365
370
  {
366
- #ifdef POSH_OS_WIN32
367
- Sleep(micro_seconds / 1000);
371
+ #ifdef RUBY_BINDINGS
372
+ rb_thread_wait_for(rb_time_interval(rb_float_new((double)micro_seconds/1000000.0)));
368
373
  #else
374
+ # ifdef POSH_OS_WIN32
375
+ Sleep(micro_seconds / 1000);
376
+ # else
369
377
  usleep(micro_seconds);
378
+ # endif
370
379
  #endif
371
380
  }
372
381
 
@@ -35,7 +35,7 @@ static void ste_reset(TermEnum *te);
35
35
  static char *ste_next(TermEnum *te);
36
36
 
37
37
  #define FORMAT 0
38
- #define SEGMENTS_GEN_FILE_NAME "segments.gen"
38
+ #define SEGMENTS_GEN_FILE_NAME "segments"
39
39
  #define MAX_EXT_LEN 10
40
40
 
41
41
  /* *** Must be three characters *** */
@@ -53,7 +53,7 @@ static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
53
53
 
54
54
  static char *u64_to_str36(char *buf, int buf_size, f_u64 u)
55
55
  {
56
- int i = buf_size--;
56
+ int i = buf_size - 1;
57
57
  buf[i] = '\0';
58
58
  for (i--; i >= 0; i--) {
59
59
  buf[i] = BASE36_DIGITMAP[u % 36];
@@ -826,7 +826,7 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
826
826
  }
827
827
 
828
828
  /* Method 2 (fallback if Method 1 isn't reliable): if the directory
829
- * listing seems to be stale, try loading the "segments.gen" file. */
829
+ * listing seems to be stale, try loading the "segments" file. */
830
830
  if (1 == method || (0 == method && last_gen == gen && retry)) {
831
831
  method = 1;
832
832
  for (i = 0; i < GEN_FILE_RETRY_COUNT; i++) {
@@ -835,7 +835,7 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
835
835
  gen_is = store->open_input(store, SEGMENTS_GEN_FILE_NAME);
836
836
  XCATCHALL
837
837
  HANDLED();
838
- /* TODO:LOG "segments.gen open: IO_ERROR"*/
838
+ /* TODO:LOG "segments open: IO_ERROR"*/
839
839
  XENDTRY
840
840
 
841
841
  if (NULL != gen_is) {
@@ -498,12 +498,12 @@ static Scorer *phw_scorer(Weight *self, IndexReader *ir)
498
498
  if (phq->slop == 0) { /* optimize exact (common) case */
499
499
  phsc = exact_phrase_scorer_new(self, tps, positions, pos_cnt,
500
500
  self->similarity,
501
- ir->get_norms(ir, field_num));
501
+ ir_get_norms_i(ir, field_num));
502
502
  }
503
503
  else {
504
504
  phsc = sloppy_phrase_scorer_new(self, tps, positions, pos_cnt,
505
505
  self->similarity, phq->slop,
506
- ir->get_norms(ir, field_num));
506
+ ir_get_norms_i(ir, field_num));
507
507
  }
508
508
  free(tps);
509
509
  return phsc;
@@ -1205,7 +1205,7 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
1205
1205
  store = open_ram_store();
1206
1206
  DEREF(store);
1207
1207
  }
1208
- if (!create && create_if_missing && !store->exists(store, "segments.gen")) {
1208
+ if (!create && create_if_missing && !store->exists(store, "segments")) {
1209
1209
  create = true;
1210
1210
  }
1211
1211
  if (create) {
@@ -1,6 +1,20 @@
1
1
  require 'monitor'
2
2
 
3
3
  module Ferret::Index
4
+ module SynchroLockMixin
5
+ def synchrolock
6
+ trys = 5
7
+ begin
8
+ synchronize {yield}
9
+ rescue Ferret::Store::Lock::LockError => e
10
+ if (trys -= 1) <= 0
11
+ raise e
12
+ else
13
+ retry
14
+ end
15
+ end
16
+ end
17
+ end
4
18
  # This is a simplified interface to the index. See the TUTORIAL for more
5
19
  # information on how to use this class.
6
20
  class Index
@@ -116,37 +130,34 @@ module Ferret::Index
116
130
  @dir = RAMDirectory.new
117
131
  end
118
132
 
133
+ @dir.extend(MonitorMixin).extend(SynchroLockMixin)
119
134
  options[:dir] = @dir
120
135
  options[:lock_retry_time]||= 2
121
- @dir.extend(MonitorMixin)
122
- @dir.synchronize do
123
- @options = options
124
- if (!@dir.exists?("segments")) || options[:create]
125
- IndexWriter.new(options).close
126
- end
127
- options[:analyzer]||= Ferret::Analysis::StandardAnalyzer.new
136
+ @options = options
137
+ if (!@dir.exists?("segments")) || options[:create]
138
+ IndexWriter.new(options).close
139
+ end
140
+ options[:analyzer]||= Ferret::Analysis::StandardAnalyzer.new
128
141
 
129
- @searcher = nil
130
- @writer = nil
131
- @reader = nil
142
+ @searcher = nil
143
+ @writer = nil
144
+ @reader = nil
132
145
 
133
- @options.delete(:create) # only create the first time if at all
134
- @auto_flush = @options[:auto_flush] || false
135
- if (@options[:id_field].nil? and
136
- @key.is_a?(Symbol))
137
- @id_field = @key
138
- else
139
- @id_field = @options[:id_field] || :id
140
- end
141
- @default_field = (@options[:default_field]||= :*)
142
- @default_input_field = options[:default_input_field] || @id_field
146
+ @options.delete(:create) # only create the first time if at all
147
+ @auto_flush = @options[:auto_flush] || false
148
+ if (@options[:id_field].nil? and @key.is_a?(Symbol))
149
+ @id_field = @key
150
+ else
151
+ @id_field = @options[:id_field] || :id
152
+ end
153
+ @default_field = (@options[:default_field]||= :*)
154
+ @default_input_field = options[:default_input_field] || @id_field
143
155
 
144
- if @default_input_field.respond_to?(:intern)
145
- @default_input_field = @default_input_field.intern
146
- end
147
- @open = true
148
- @qp = nil
156
+ if @default_input_field.respond_to?(:intern)
157
+ @default_input_field = @default_input_field.intern
149
158
  end
159
+ @open = true
160
+ @qp = nil
150
161
  if block
151
162
  yield self
152
163
  self.close
@@ -253,7 +264,8 @@ module Ferret::Index
253
264
  #
254
265
  # See FieldInfos for more information on how to set field properties.
255
266
  def add_document(doc, analyzer = nil)
256
- @dir.synchronize do
267
+ @dir.synchrolock do
268
+ ensure_writer_open()
257
269
  if doc.is_a?(String) or doc.is_a?(Array)
258
270
  doc = {@default_input_field => doc}
259
271
  end
@@ -399,7 +411,8 @@ module Ferret::Index
399
411
  #
400
412
  # id:: The number of the document to delete
401
413
  def delete(id)
402
- @dir.synchronize do
414
+ @dir.synchrolock do
415
+ ensure_writer_open()
403
416
  if id.is_a?(String) or id.is_a?(Symbol)
404
417
  ensure_writer_open()
405
418
  @writer.delete(@id_field, id.to_s)
@@ -420,7 +433,8 @@ module Ferret::Index
420
433
  # string (in which case it is parsed by the standard query parser)
421
434
  # or an actual query object.
422
435
  def query_delete(query)
423
- @dir.synchronize do
436
+ @dir.synchrolock do
437
+ ensure_writer_open()
424
438
  ensure_searcher_open()
425
439
  query = do_process_query(query)
426
440
  @searcher.search_each(query) do |doc, score|
@@ -447,7 +461,8 @@ module Ferret::Index
447
461
  # the :key attribute.
448
462
  # new_doc:: The document to replace the old document with
449
463
  def update(id, new_doc)
450
- @dir.synchronize do
464
+ @dir.synchrolock do
465
+ ensure_writer_open()
451
466
  delete(id)
452
467
  if id.is_a?(String) or id.is_a?(Symbol)
453
468
  @writer.commit
@@ -484,7 +499,8 @@ module Ferret::Index
484
499
  # #=> {:id => "28", :title => "My Oh My", :artist => "David Gray"}
485
500
  #
486
501
  def query_update(query, new_val)
487
- @dir.synchronize do
502
+ @dir.synchrolock do
503
+ ensure_writer_open()
488
504
  ensure_searcher_open()
489
505
  docs_to_add = []
490
506
  query = do_process_query(query)
@@ -534,7 +550,7 @@ module Ferret::Index
534
550
  # optimizes the index. This should only be called when the index will no
535
551
  # longer be updated very often, but will be read a lot.
536
552
  def optimize()
537
- @dir.synchronize do
553
+ @dir.synchrolock do
538
554
  ensure_writer_open()
539
555
  @writer.optimize()
540
556
  @writer.close()
@@ -562,7 +578,8 @@ module Ferret::Index
562
578
  #
563
579
  # After this completes, the index is optimized.
564
580
  def add_indexes(indexes)
565
- @dir.synchronize do
581
+ @dir.synchrolock do
582
+ ensure_writer_open()
566
583
  indexes = [indexes].flatten # make sure we have an array
567
584
  return if indexes.size == 0 # nothing to do
568
585
  if indexes[0].is_a?(Index)
@@ -604,7 +621,7 @@ module Ferret::Index
604
621
  elsif directory.is_a?(Ferret::Store::Directory)
605
622
  @dir = directory
606
623
  end
607
- @dir.extend(MonitorMixin)
624
+ @dir.extend(MonitorMixin).extend(SynchroLockMixin)
608
625
  @options[:dir] = @dir
609
626
  @options[:create_if_missing] = true
610
627
  add_indexes([old_dir])
@@ -646,7 +663,7 @@ module Ferret::Index
646
663
  # Returns the field_infos object so that you can add new fields to the
647
664
  # index.
648
665
  def field_infos
649
- @dir.synchronize do
666
+ @dir.synchrolock do
650
667
  ensure_writer_open()
651
668
  return @writer.field_infos
652
669
  end
@@ -1,3 +1,3 @@
1
1
  module Ferret
2
- VERSION = '0.11.0'
2
+ VERSION = '0.11.1'
3
3
  end
@@ -6,8 +6,8 @@ class IndexThreadSafetyTest < Test::Unit::TestCase
6
6
  include Ferret::Index
7
7
 
8
8
  INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
9
- ITERATIONS = 1000
10
- NUM_THREADS = 2
9
+ ITERATIONS = 100
10
+ NUM_THREADS = 3
11
11
  ANALYZER = Ferret::Analysis::StandardAnalyzer.new()
12
12
 
13
13
  def setup
@@ -35,6 +35,7 @@ class IndexThreadSafetyTest < Test::Unit::TestCase
35
35
  else
36
36
  do_add_doc(index)
37
37
  end
38
+ index.commit
38
39
  end
39
40
  end
40
41
 
@@ -21,10 +21,10 @@ class IndexWriterTest < Test::Unit::TestCase
21
21
  assert(! wlock.locked?)
22
22
  assert(! clock.locked?)
23
23
  iw = IndexWriter.new(:dir => @dir, :create => true)
24
- assert(@dir.exists?("segments.gen"))
24
+ assert(@dir.exists?("segments"))
25
25
  assert(wlock.locked?)
26
26
  iw.close()
27
- assert(@dir.exists?("segments.gen"))
27
+ assert(@dir.exists?("segments"))
28
28
  assert(! wlock.locked?)
29
29
  assert(! clock.locked?)
30
30
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.11.0
7
- date: 2007-02-25 00:00:00 +11:00
6
+ version: 0.11.1
7
+ date: 2007-02-27 00:00:00 +11:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib