ferret 0.11.0 → 0.11.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -845,7 +845,7 @@ static Token *std_next(TokenStream *ts)
845
845
  char *t;
846
846
  char *start = NULL;
847
847
  char *num_end = NULL;
848
- char token[MAX_WORD_SIZE];
848
+ char token[MAX_WORD_SIZE + 1];
849
849
  int token_i = 0;
850
850
  int len;
851
851
  bool is_acronym;
@@ -925,6 +925,9 @@ static Token *std_next(TokenStream *ts)
925
925
  }
926
926
  ts->t = t + len;
927
927
  token[len] = 0;
928
+ Token *k = tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
929
+ (int)(ts->t - ts->text), 1);
930
+ return k;
928
931
  return tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
929
932
  (int)(ts->t - ts->text), 1);
930
933
  }
@@ -1174,7 +1177,7 @@ static TokenStream *mf_clone_i(TokenStream *orig_ts)
1174
1177
 
1175
1178
  static Token *mf_next(TokenStream *ts)
1176
1179
  {
1177
- char buf[MAX_WORD_SIZE];
1180
+ char buf[MAX_WORD_SIZE + 1];
1178
1181
  MultiMapper *mapper = MFilt(ts)->mapper;
1179
1182
  TokenFilter *tf = TkFilt(ts);
1180
1183
  Token *tk = tf->sub_ts->next(tf->sub_ts);
@@ -1299,7 +1302,7 @@ TokenStream *hyphen_filter_new(TokenStream *sub_ts)
1299
1302
 
1300
1303
  Token *mb_lcf_next(TokenStream *ts)
1301
1304
  {
1302
- wchar_t wbuf[MAX_WORD_SIZE], *wchr;
1305
+ wchar_t wbuf[MAX_WORD_SIZE + 1], *wchr;
1303
1306
  Token *tk = TkFilt(ts)->sub_ts->next(TkFilt(ts)->sub_ts);
1304
1307
 
1305
1308
  if (tk == NULL) {
@@ -6,6 +6,6 @@ if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
6
6
  else
7
7
  require 'mkmf'
8
8
  #$CFLAGS += " -fno-common"
9
- $CFLAGS += " -fno-common -D_FILE_OFFSET_BITS=64"
9
+ $CFLAGS += " -fno-stack-protector -fno-common -D_FILE_OFFSET_BITS=64"
10
10
  create_makefile("ferret_ext")
11
11
  end
@@ -335,7 +335,7 @@ static InStream *fs_open_input(Store *store, const char *filename)
335
335
  return is;
336
336
  }
337
337
 
338
- #define LOCK_OBTAIN_TIMEOUT 5
338
+ #define LOCK_OBTAIN_TIMEOUT 10
339
339
 
340
340
  #ifdef RUBY_BINDINGS
341
341
  struct timeval rb_time_interval _((VALUE));
@@ -6,7 +6,6 @@
6
6
  #include <assert.h>
7
7
  #include <math.h>
8
8
  #include <ctype.h>
9
- #include <unistd.h>
10
9
 
11
10
  const char *EMPTY_STRING = "";
12
11
 
@@ -359,14 +358,24 @@ void dummy_free(void *p)
359
358
 
360
359
  #ifdef FRT_IS_C99
361
360
  extern void usleep(unsigned long usec);
361
+ #else
362
+ # ifdef RUBY_BINDINGS
363
+ struct timeval rb_time_interval _((VALUE));
364
+ # else
365
+ # include <unistd.h>
366
+ # endif
362
367
  #endif
363
368
 
364
369
  extern void micro_sleep(const int micro_seconds)
365
370
  {
366
- #ifdef POSH_OS_WIN32
367
- Sleep(micro_seconds / 1000);
371
+ #ifdef RUBY_BINDINGS
372
+ rb_thread_wait_for(rb_time_interval(rb_float_new((double)micro_seconds/1000000.0)));
368
373
  #else
374
+ # ifdef POSH_OS_WIN32
375
+ Sleep(micro_seconds / 1000);
376
+ # else
369
377
  usleep(micro_seconds);
378
+ # endif
370
379
  #endif
371
380
  }
372
381
 
@@ -35,7 +35,7 @@ static void ste_reset(TermEnum *te);
35
35
  static char *ste_next(TermEnum *te);
36
36
 
37
37
  #define FORMAT 0
38
- #define SEGMENTS_GEN_FILE_NAME "segments.gen"
38
+ #define SEGMENTS_GEN_FILE_NAME "segments"
39
39
  #define MAX_EXT_LEN 10
40
40
 
41
41
  /* *** Must be three characters *** */
@@ -53,7 +53,7 @@ static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
53
53
 
54
54
  static char *u64_to_str36(char *buf, int buf_size, f_u64 u)
55
55
  {
56
- int i = buf_size--;
56
+ int i = buf_size - 1;
57
57
  buf[i] = '\0';
58
58
  for (i--; i >= 0; i--) {
59
59
  buf[i] = BASE36_DIGITMAP[u % 36];
@@ -826,7 +826,7 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
826
826
  }
827
827
 
828
828
  /* Method 2 (fallback if Method 1 isn't reliable): if the directory
829
- * listing seems to be stale, try loading the "segments.gen" file. */
829
+ * listing seems to be stale, try loading the "segments" file. */
830
830
  if (1 == method || (0 == method && last_gen == gen && retry)) {
831
831
  method = 1;
832
832
  for (i = 0; i < GEN_FILE_RETRY_COUNT; i++) {
@@ -835,7 +835,7 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
835
835
  gen_is = store->open_input(store, SEGMENTS_GEN_FILE_NAME);
836
836
  XCATCHALL
837
837
  HANDLED();
838
- /* TODO:LOG "segments.gen open: IO_ERROR"*/
838
+ /* TODO:LOG "segments open: IO_ERROR"*/
839
839
  XENDTRY
840
840
 
841
841
  if (NULL != gen_is) {
@@ -498,12 +498,12 @@ static Scorer *phw_scorer(Weight *self, IndexReader *ir)
498
498
  if (phq->slop == 0) { /* optimize exact (common) case */
499
499
  phsc = exact_phrase_scorer_new(self, tps, positions, pos_cnt,
500
500
  self->similarity,
501
- ir->get_norms(ir, field_num));
501
+ ir_get_norms_i(ir, field_num));
502
502
  }
503
503
  else {
504
504
  phsc = sloppy_phrase_scorer_new(self, tps, positions, pos_cnt,
505
505
  self->similarity, phq->slop,
506
- ir->get_norms(ir, field_num));
506
+ ir_get_norms_i(ir, field_num));
507
507
  }
508
508
  free(tps);
509
509
  return phsc;
@@ -1205,7 +1205,7 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
1205
1205
  store = open_ram_store();
1206
1206
  DEREF(store);
1207
1207
  }
1208
- if (!create && create_if_missing && !store->exists(store, "segments.gen")) {
1208
+ if (!create && create_if_missing && !store->exists(store, "segments")) {
1209
1209
  create = true;
1210
1210
  }
1211
1211
  if (create) {
@@ -1,6 +1,20 @@
1
1
  require 'monitor'
2
2
 
3
3
  module Ferret::Index
4
+ module SynchroLockMixin
5
+ def synchrolock
6
+ trys = 5
7
+ begin
8
+ synchronize {yield}
9
+ rescue Ferret::Store::Lock::LockError => e
10
+ if (trys -= 1) <= 0
11
+ raise e
12
+ else
13
+ retry
14
+ end
15
+ end
16
+ end
17
+ end
4
18
  # This is a simplified interface to the index. See the TUTORIAL for more
5
19
  # information on how to use this class.
6
20
  class Index
@@ -116,37 +130,34 @@ module Ferret::Index
116
130
  @dir = RAMDirectory.new
117
131
  end
118
132
 
133
+ @dir.extend(MonitorMixin).extend(SynchroLockMixin)
119
134
  options[:dir] = @dir
120
135
  options[:lock_retry_time]||= 2
121
- @dir.extend(MonitorMixin)
122
- @dir.synchronize do
123
- @options = options
124
- if (!@dir.exists?("segments")) || options[:create]
125
- IndexWriter.new(options).close
126
- end
127
- options[:analyzer]||= Ferret::Analysis::StandardAnalyzer.new
136
+ @options = options
137
+ if (!@dir.exists?("segments")) || options[:create]
138
+ IndexWriter.new(options).close
139
+ end
140
+ options[:analyzer]||= Ferret::Analysis::StandardAnalyzer.new
128
141
 
129
- @searcher = nil
130
- @writer = nil
131
- @reader = nil
142
+ @searcher = nil
143
+ @writer = nil
144
+ @reader = nil
132
145
 
133
- @options.delete(:create) # only create the first time if at all
134
- @auto_flush = @options[:auto_flush] || false
135
- if (@options[:id_field].nil? and
136
- @key.is_a?(Symbol))
137
- @id_field = @key
138
- else
139
- @id_field = @options[:id_field] || :id
140
- end
141
- @default_field = (@options[:default_field]||= :*)
142
- @default_input_field = options[:default_input_field] || @id_field
146
+ @options.delete(:create) # only create the first time if at all
147
+ @auto_flush = @options[:auto_flush] || false
148
+ if (@options[:id_field].nil? and @key.is_a?(Symbol))
149
+ @id_field = @key
150
+ else
151
+ @id_field = @options[:id_field] || :id
152
+ end
153
+ @default_field = (@options[:default_field]||= :*)
154
+ @default_input_field = options[:default_input_field] || @id_field
143
155
 
144
- if @default_input_field.respond_to?(:intern)
145
- @default_input_field = @default_input_field.intern
146
- end
147
- @open = true
148
- @qp = nil
156
+ if @default_input_field.respond_to?(:intern)
157
+ @default_input_field = @default_input_field.intern
149
158
  end
159
+ @open = true
160
+ @qp = nil
150
161
  if block
151
162
  yield self
152
163
  self.close
@@ -253,7 +264,8 @@ module Ferret::Index
253
264
  #
254
265
  # See FieldInfos for more information on how to set field properties.
255
266
  def add_document(doc, analyzer = nil)
256
- @dir.synchronize do
267
+ @dir.synchrolock do
268
+ ensure_writer_open()
257
269
  if doc.is_a?(String) or doc.is_a?(Array)
258
270
  doc = {@default_input_field => doc}
259
271
  end
@@ -399,7 +411,8 @@ module Ferret::Index
399
411
  #
400
412
  # id:: The number of the document to delete
401
413
  def delete(id)
402
- @dir.synchronize do
414
+ @dir.synchrolock do
415
+ ensure_writer_open()
403
416
  if id.is_a?(String) or id.is_a?(Symbol)
404
417
  ensure_writer_open()
405
418
  @writer.delete(@id_field, id.to_s)
@@ -420,7 +433,8 @@ module Ferret::Index
420
433
  # string (in which case it is parsed by the standard query parser)
421
434
  # or an actual query object.
422
435
  def query_delete(query)
423
- @dir.synchronize do
436
+ @dir.synchrolock do
437
+ ensure_writer_open()
424
438
  ensure_searcher_open()
425
439
  query = do_process_query(query)
426
440
  @searcher.search_each(query) do |doc, score|
@@ -447,7 +461,8 @@ module Ferret::Index
447
461
  # the :key attribute.
448
462
  # new_doc:: The document to replace the old document with
449
463
  def update(id, new_doc)
450
- @dir.synchronize do
464
+ @dir.synchrolock do
465
+ ensure_writer_open()
451
466
  delete(id)
452
467
  if id.is_a?(String) or id.is_a?(Symbol)
453
468
  @writer.commit
@@ -484,7 +499,8 @@ module Ferret::Index
484
499
  # #=> {:id => "28", :title => "My Oh My", :artist => "David Gray"}
485
500
  #
486
501
  def query_update(query, new_val)
487
- @dir.synchronize do
502
+ @dir.synchrolock do
503
+ ensure_writer_open()
488
504
  ensure_searcher_open()
489
505
  docs_to_add = []
490
506
  query = do_process_query(query)
@@ -534,7 +550,7 @@ module Ferret::Index
534
550
  # optimizes the index. This should only be called when the index will no
535
551
  # longer be updated very often, but will be read a lot.
536
552
  def optimize()
537
- @dir.synchronize do
553
+ @dir.synchrolock do
538
554
  ensure_writer_open()
539
555
  @writer.optimize()
540
556
  @writer.close()
@@ -562,7 +578,8 @@ module Ferret::Index
562
578
  #
563
579
  # After this completes, the index is optimized.
564
580
  def add_indexes(indexes)
565
- @dir.synchronize do
581
+ @dir.synchrolock do
582
+ ensure_writer_open()
566
583
  indexes = [indexes].flatten # make sure we have an array
567
584
  return if indexes.size == 0 # nothing to do
568
585
  if indexes[0].is_a?(Index)
@@ -604,7 +621,7 @@ module Ferret::Index
604
621
  elsif directory.is_a?(Ferret::Store::Directory)
605
622
  @dir = directory
606
623
  end
607
- @dir.extend(MonitorMixin)
624
+ @dir.extend(MonitorMixin).extend(SynchroLockMixin)
608
625
  @options[:dir] = @dir
609
626
  @options[:create_if_missing] = true
610
627
  add_indexes([old_dir])
@@ -646,7 +663,7 @@ module Ferret::Index
646
663
  # Returns the field_infos object so that you can add new fields to the
647
664
  # index.
648
665
  def field_infos
649
- @dir.synchronize do
666
+ @dir.synchrolock do
650
667
  ensure_writer_open()
651
668
  return @writer.field_infos
652
669
  end
@@ -1,3 +1,3 @@
1
1
  module Ferret
2
- VERSION = '0.11.0'
2
+ VERSION = '0.11.1'
3
3
  end
@@ -6,8 +6,8 @@ class IndexThreadSafetyTest < Test::Unit::TestCase
6
6
  include Ferret::Index
7
7
 
8
8
  INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
9
- ITERATIONS = 1000
10
- NUM_THREADS = 2
9
+ ITERATIONS = 100
10
+ NUM_THREADS = 3
11
11
  ANALYZER = Ferret::Analysis::StandardAnalyzer.new()
12
12
 
13
13
  def setup
@@ -35,6 +35,7 @@ class IndexThreadSafetyTest < Test::Unit::TestCase
35
35
  else
36
36
  do_add_doc(index)
37
37
  end
38
+ index.commit
38
39
  end
39
40
  end
40
41
 
@@ -21,10 +21,10 @@ class IndexWriterTest < Test::Unit::TestCase
21
21
  assert(! wlock.locked?)
22
22
  assert(! clock.locked?)
23
23
  iw = IndexWriter.new(:dir => @dir, :create => true)
24
- assert(@dir.exists?("segments.gen"))
24
+ assert(@dir.exists?("segments"))
25
25
  assert(wlock.locked?)
26
26
  iw.close()
27
- assert(@dir.exists?("segments.gen"))
27
+ assert(@dir.exists?("segments"))
28
28
  assert(! wlock.locked?)
29
29
  assert(! clock.locked?)
30
30
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.11.0
7
- date: 2007-02-25 00:00:00 +11:00
6
+ version: 0.11.1
7
+ date: 2007-02-27 00:00:00 +11:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib