ferret 0.11.0 → 0.11.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/analysis.c +6 -3
- data/ext/extconf.rb +1 -1
- data/ext/fs_store.c +1 -1
- data/ext/global.c +12 -3
- data/ext/index.c +4 -4
- data/ext/q_phrase.c +2 -2
- data/ext/r_index.c +1 -1
- data/lib/ferret/index.rb +51 -34
- data/lib/ferret_version.rb +1 -1
- data/test/threading/thread_safety_index_test.rb +3 -2
- data/test/unit/index/tc_index_writer.rb +2 -2
- metadata +2 -2
data/ext/analysis.c
CHANGED
@@ -845,7 +845,7 @@ static Token *std_next(TokenStream *ts)
|
|
845
845
|
char *t;
|
846
846
|
char *start = NULL;
|
847
847
|
char *num_end = NULL;
|
848
|
-
char token[MAX_WORD_SIZE];
|
848
|
+
char token[MAX_WORD_SIZE + 1];
|
849
849
|
int token_i = 0;
|
850
850
|
int len;
|
851
851
|
bool is_acronym;
|
@@ -925,6 +925,9 @@ static Token *std_next(TokenStream *ts)
|
|
925
925
|
}
|
926
926
|
ts->t = t + len;
|
927
927
|
token[len] = 0;
|
928
|
+
Token *k = tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
|
929
|
+
(int)(ts->t - ts->text), 1);
|
930
|
+
return k;
|
928
931
|
return tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
|
929
932
|
(int)(ts->t - ts->text), 1);
|
930
933
|
}
|
@@ -1174,7 +1177,7 @@ static TokenStream *mf_clone_i(TokenStream *orig_ts)
|
|
1174
1177
|
|
1175
1178
|
static Token *mf_next(TokenStream *ts)
|
1176
1179
|
{
|
1177
|
-
char buf[MAX_WORD_SIZE];
|
1180
|
+
char buf[MAX_WORD_SIZE + 1];
|
1178
1181
|
MultiMapper *mapper = MFilt(ts)->mapper;
|
1179
1182
|
TokenFilter *tf = TkFilt(ts);
|
1180
1183
|
Token *tk = tf->sub_ts->next(tf->sub_ts);
|
@@ -1299,7 +1302,7 @@ TokenStream *hyphen_filter_new(TokenStream *sub_ts)
|
|
1299
1302
|
|
1300
1303
|
Token *mb_lcf_next(TokenStream *ts)
|
1301
1304
|
{
|
1302
|
-
wchar_t wbuf[MAX_WORD_SIZE], *wchr;
|
1305
|
+
wchar_t wbuf[MAX_WORD_SIZE + 1], *wchr;
|
1303
1306
|
Token *tk = TkFilt(ts)->sub_ts->next(TkFilt(ts)->sub_ts);
|
1304
1307
|
|
1305
1308
|
if (tk == NULL) {
|
data/ext/extconf.rb
CHANGED
data/ext/fs_store.c
CHANGED
data/ext/global.c
CHANGED
@@ -6,7 +6,6 @@
|
|
6
6
|
#include <assert.h>
|
7
7
|
#include <math.h>
|
8
8
|
#include <ctype.h>
|
9
|
-
#include <unistd.h>
|
10
9
|
|
11
10
|
const char *EMPTY_STRING = "";
|
12
11
|
|
@@ -359,14 +358,24 @@ void dummy_free(void *p)
|
|
359
358
|
|
360
359
|
#ifdef FRT_IS_C99
|
361
360
|
extern void usleep(unsigned long usec);
|
361
|
+
#else
|
362
|
+
# ifdef RUBY_BINDINGS
|
363
|
+
struct timeval rb_time_interval _((VALUE));
|
364
|
+
# else
|
365
|
+
# include <unistd.h>
|
366
|
+
# endif
|
362
367
|
#endif
|
363
368
|
|
364
369
|
extern void micro_sleep(const int micro_seconds)
|
365
370
|
{
|
366
|
-
#ifdef
|
367
|
-
|
371
|
+
#ifdef RUBY_BINDINGS
|
372
|
+
rb_thread_wait_for(rb_time_interval(rb_float_new((double)micro_seconds/1000000.0)));
|
368
373
|
#else
|
374
|
+
# ifdef POSH_OS_WIN32
|
375
|
+
Sleep(micro_seconds / 1000);
|
376
|
+
# else
|
369
377
|
usleep(micro_seconds);
|
378
|
+
# endif
|
370
379
|
#endif
|
371
380
|
}
|
372
381
|
|
data/ext/index.c
CHANGED
@@ -35,7 +35,7 @@ static void ste_reset(TermEnum *te);
|
|
35
35
|
static char *ste_next(TermEnum *te);
|
36
36
|
|
37
37
|
#define FORMAT 0
|
38
|
-
#define SEGMENTS_GEN_FILE_NAME "segments
|
38
|
+
#define SEGMENTS_GEN_FILE_NAME "segments"
|
39
39
|
#define MAX_EXT_LEN 10
|
40
40
|
|
41
41
|
/* *** Must be three characters *** */
|
@@ -53,7 +53,7 @@ static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
|
|
53
53
|
|
54
54
|
static char *u64_to_str36(char *buf, int buf_size, f_u64 u)
|
55
55
|
{
|
56
|
-
int i = buf_size
|
56
|
+
int i = buf_size - 1;
|
57
57
|
buf[i] = '\0';
|
58
58
|
for (i--; i >= 0; i--) {
|
59
59
|
buf[i] = BASE36_DIGITMAP[u % 36];
|
@@ -826,7 +826,7 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
|
|
826
826
|
}
|
827
827
|
|
828
828
|
/* Method 2 (fallback if Method 1 isn't reliable): if the directory
|
829
|
-
* listing seems to be stale, try loading the "segments
|
829
|
+
* listing seems to be stale, try loading the "segments" file. */
|
830
830
|
if (1 == method || (0 == method && last_gen == gen && retry)) {
|
831
831
|
method = 1;
|
832
832
|
for (i = 0; i < GEN_FILE_RETRY_COUNT; i++) {
|
@@ -835,7 +835,7 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
|
|
835
835
|
gen_is = store->open_input(store, SEGMENTS_GEN_FILE_NAME);
|
836
836
|
XCATCHALL
|
837
837
|
HANDLED();
|
838
|
-
/* TODO:LOG "segments
|
838
|
+
/* TODO:LOG "segments open: IO_ERROR"*/
|
839
839
|
XENDTRY
|
840
840
|
|
841
841
|
if (NULL != gen_is) {
|
data/ext/q_phrase.c
CHANGED
@@ -498,12 +498,12 @@ static Scorer *phw_scorer(Weight *self, IndexReader *ir)
|
|
498
498
|
if (phq->slop == 0) { /* optimize exact (common) case */
|
499
499
|
phsc = exact_phrase_scorer_new(self, tps, positions, pos_cnt,
|
500
500
|
self->similarity,
|
501
|
-
|
501
|
+
ir_get_norms_i(ir, field_num));
|
502
502
|
}
|
503
503
|
else {
|
504
504
|
phsc = sloppy_phrase_scorer_new(self, tps, positions, pos_cnt,
|
505
505
|
self->similarity, phq->slop,
|
506
|
-
|
506
|
+
ir_get_norms_i(ir, field_num));
|
507
507
|
}
|
508
508
|
free(tps);
|
509
509
|
return phsc;
|
data/ext/r_index.c
CHANGED
@@ -1205,7 +1205,7 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
1205
1205
|
store = open_ram_store();
|
1206
1206
|
DEREF(store);
|
1207
1207
|
}
|
1208
|
-
if (!create && create_if_missing && !store->exists(store, "segments
|
1208
|
+
if (!create && create_if_missing && !store->exists(store, "segments")) {
|
1209
1209
|
create = true;
|
1210
1210
|
}
|
1211
1211
|
if (create) {
|
data/lib/ferret/index.rb
CHANGED
@@ -1,6 +1,20 @@
|
|
1
1
|
require 'monitor'
|
2
2
|
|
3
3
|
module Ferret::Index
|
4
|
+
module SynchroLockMixin
|
5
|
+
def synchrolock
|
6
|
+
trys = 5
|
7
|
+
begin
|
8
|
+
synchronize {yield}
|
9
|
+
rescue Ferret::Store::Lock::LockError => e
|
10
|
+
if (trys -= 1) <= 0
|
11
|
+
raise e
|
12
|
+
else
|
13
|
+
retry
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
4
18
|
# This is a simplified interface to the index. See the TUTORIAL for more
|
5
19
|
# information on how to use this class.
|
6
20
|
class Index
|
@@ -116,37 +130,34 @@ module Ferret::Index
|
|
116
130
|
@dir = RAMDirectory.new
|
117
131
|
end
|
118
132
|
|
133
|
+
@dir.extend(MonitorMixin).extend(SynchroLockMixin)
|
119
134
|
options[:dir] = @dir
|
120
135
|
options[:lock_retry_time]||= 2
|
121
|
-
@
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
end
|
127
|
-
options[:analyzer]||= Ferret::Analysis::StandardAnalyzer.new
|
136
|
+
@options = options
|
137
|
+
if (!@dir.exists?("segments")) || options[:create]
|
138
|
+
IndexWriter.new(options).close
|
139
|
+
end
|
140
|
+
options[:analyzer]||= Ferret::Analysis::StandardAnalyzer.new
|
128
141
|
|
129
|
-
|
130
|
-
|
131
|
-
|
142
|
+
@searcher = nil
|
143
|
+
@writer = nil
|
144
|
+
@reader = nil
|
132
145
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
@default_input_field = options[:default_input_field] || @id_field
|
146
|
+
@options.delete(:create) # only create the first time if at all
|
147
|
+
@auto_flush = @options[:auto_flush] || false
|
148
|
+
if (@options[:id_field].nil? and @key.is_a?(Symbol))
|
149
|
+
@id_field = @key
|
150
|
+
else
|
151
|
+
@id_field = @options[:id_field] || :id
|
152
|
+
end
|
153
|
+
@default_field = (@options[:default_field]||= :*)
|
154
|
+
@default_input_field = options[:default_input_field] || @id_field
|
143
155
|
|
144
|
-
|
145
|
-
|
146
|
-
end
|
147
|
-
@open = true
|
148
|
-
@qp = nil
|
156
|
+
if @default_input_field.respond_to?(:intern)
|
157
|
+
@default_input_field = @default_input_field.intern
|
149
158
|
end
|
159
|
+
@open = true
|
160
|
+
@qp = nil
|
150
161
|
if block
|
151
162
|
yield self
|
152
163
|
self.close
|
@@ -253,7 +264,8 @@ module Ferret::Index
|
|
253
264
|
#
|
254
265
|
# See FieldInfos for more information on how to set field properties.
|
255
266
|
def add_document(doc, analyzer = nil)
|
256
|
-
@dir.
|
267
|
+
@dir.synchrolock do
|
268
|
+
ensure_writer_open()
|
257
269
|
if doc.is_a?(String) or doc.is_a?(Array)
|
258
270
|
doc = {@default_input_field => doc}
|
259
271
|
end
|
@@ -399,7 +411,8 @@ module Ferret::Index
|
|
399
411
|
#
|
400
412
|
# id:: The number of the document to delete
|
401
413
|
def delete(id)
|
402
|
-
@dir.
|
414
|
+
@dir.synchrolock do
|
415
|
+
ensure_writer_open()
|
403
416
|
if id.is_a?(String) or id.is_a?(Symbol)
|
404
417
|
ensure_writer_open()
|
405
418
|
@writer.delete(@id_field, id.to_s)
|
@@ -420,7 +433,8 @@ module Ferret::Index
|
|
420
433
|
# string (in which case it is parsed by the standard query parser)
|
421
434
|
# or an actual query object.
|
422
435
|
def query_delete(query)
|
423
|
-
@dir.
|
436
|
+
@dir.synchrolock do
|
437
|
+
ensure_writer_open()
|
424
438
|
ensure_searcher_open()
|
425
439
|
query = do_process_query(query)
|
426
440
|
@searcher.search_each(query) do |doc, score|
|
@@ -447,7 +461,8 @@ module Ferret::Index
|
|
447
461
|
# the :key attribute.
|
448
462
|
# new_doc:: The document to replace the old document with
|
449
463
|
def update(id, new_doc)
|
450
|
-
@dir.
|
464
|
+
@dir.synchrolock do
|
465
|
+
ensure_writer_open()
|
451
466
|
delete(id)
|
452
467
|
if id.is_a?(String) or id.is_a?(Symbol)
|
453
468
|
@writer.commit
|
@@ -484,7 +499,8 @@ module Ferret::Index
|
|
484
499
|
# #=> {:id => "28", :title => "My Oh My", :artist => "David Gray"}
|
485
500
|
#
|
486
501
|
def query_update(query, new_val)
|
487
|
-
@dir.
|
502
|
+
@dir.synchrolock do
|
503
|
+
ensure_writer_open()
|
488
504
|
ensure_searcher_open()
|
489
505
|
docs_to_add = []
|
490
506
|
query = do_process_query(query)
|
@@ -534,7 +550,7 @@ module Ferret::Index
|
|
534
550
|
# optimizes the index. This should only be called when the index will no
|
535
551
|
# longer be updated very often, but will be read a lot.
|
536
552
|
def optimize()
|
537
|
-
@dir.
|
553
|
+
@dir.synchrolock do
|
538
554
|
ensure_writer_open()
|
539
555
|
@writer.optimize()
|
540
556
|
@writer.close()
|
@@ -562,7 +578,8 @@ module Ferret::Index
|
|
562
578
|
#
|
563
579
|
# After this completes, the index is optimized.
|
564
580
|
def add_indexes(indexes)
|
565
|
-
@dir.
|
581
|
+
@dir.synchrolock do
|
582
|
+
ensure_writer_open()
|
566
583
|
indexes = [indexes].flatten # make sure we have an array
|
567
584
|
return if indexes.size == 0 # nothing to do
|
568
585
|
if indexes[0].is_a?(Index)
|
@@ -604,7 +621,7 @@ module Ferret::Index
|
|
604
621
|
elsif directory.is_a?(Ferret::Store::Directory)
|
605
622
|
@dir = directory
|
606
623
|
end
|
607
|
-
@dir.extend(MonitorMixin)
|
624
|
+
@dir.extend(MonitorMixin).extend(SynchroLockMixin)
|
608
625
|
@options[:dir] = @dir
|
609
626
|
@options[:create_if_missing] = true
|
610
627
|
add_indexes([old_dir])
|
@@ -646,7 +663,7 @@ module Ferret::Index
|
|
646
663
|
# Returns the field_infos object so that you can add new fields to the
|
647
664
|
# index.
|
648
665
|
def field_infos
|
649
|
-
@dir.
|
666
|
+
@dir.synchrolock do
|
650
667
|
ensure_writer_open()
|
651
668
|
return @writer.field_infos
|
652
669
|
end
|
data/lib/ferret_version.rb
CHANGED
@@ -6,8 +6,8 @@ class IndexThreadSafetyTest < Test::Unit::TestCase
|
|
6
6
|
include Ferret::Index
|
7
7
|
|
8
8
|
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
9
|
-
ITERATIONS =
|
10
|
-
NUM_THREADS =
|
9
|
+
ITERATIONS = 100
|
10
|
+
NUM_THREADS = 3
|
11
11
|
ANALYZER = Ferret::Analysis::StandardAnalyzer.new()
|
12
12
|
|
13
13
|
def setup
|
@@ -35,6 +35,7 @@ class IndexThreadSafetyTest < Test::Unit::TestCase
|
|
35
35
|
else
|
36
36
|
do_add_doc(index)
|
37
37
|
end
|
38
|
+
index.commit
|
38
39
|
end
|
39
40
|
end
|
40
41
|
|
@@ -21,10 +21,10 @@ class IndexWriterTest < Test::Unit::TestCase
|
|
21
21
|
assert(! wlock.locked?)
|
22
22
|
assert(! clock.locked?)
|
23
23
|
iw = IndexWriter.new(:dir => @dir, :create => true)
|
24
|
-
assert(@dir.exists?("segments
|
24
|
+
assert(@dir.exists?("segments"))
|
25
25
|
assert(wlock.locked?)
|
26
26
|
iw.close()
|
27
|
-
assert(@dir.exists?("segments
|
27
|
+
assert(@dir.exists?("segments"))
|
28
28
|
assert(! wlock.locked?)
|
29
29
|
assert(! clock.locked?)
|
30
30
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.11.
|
7
|
-
date: 2007-02-
|
6
|
+
version: 0.11.1
|
7
|
+
date: 2007-02-27 00:00:00 +11:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|