ferret 0.11.0 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/analysis.c +6 -3
- data/ext/extconf.rb +1 -1
- data/ext/fs_store.c +1 -1
- data/ext/global.c +12 -3
- data/ext/index.c +4 -4
- data/ext/q_phrase.c +2 -2
- data/ext/r_index.c +1 -1
- data/lib/ferret/index.rb +51 -34
- data/lib/ferret_version.rb +1 -1
- data/test/threading/thread_safety_index_test.rb +3 -2
- data/test/unit/index/tc_index_writer.rb +2 -2
- metadata +2 -2
data/ext/analysis.c
CHANGED
@@ -845,7 +845,7 @@ static Token *std_next(TokenStream *ts)
|
|
845
845
|
char *t;
|
846
846
|
char *start = NULL;
|
847
847
|
char *num_end = NULL;
|
848
|
-
char token[MAX_WORD_SIZE];
|
848
|
+
char token[MAX_WORD_SIZE + 1];
|
849
849
|
int token_i = 0;
|
850
850
|
int len;
|
851
851
|
bool is_acronym;
|
@@ -925,6 +925,9 @@ static Token *std_next(TokenStream *ts)
|
|
925
925
|
}
|
926
926
|
ts->t = t + len;
|
927
927
|
token[len] = 0;
|
928
|
+
Token *k = tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
|
929
|
+
(int)(ts->t - ts->text), 1);
|
930
|
+
return k;
|
928
931
|
return tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
|
929
932
|
(int)(ts->t - ts->text), 1);
|
930
933
|
}
|
@@ -1174,7 +1177,7 @@ static TokenStream *mf_clone_i(TokenStream *orig_ts)
|
|
1174
1177
|
|
1175
1178
|
static Token *mf_next(TokenStream *ts)
|
1176
1179
|
{
|
1177
|
-
char buf[MAX_WORD_SIZE];
|
1180
|
+
char buf[MAX_WORD_SIZE + 1];
|
1178
1181
|
MultiMapper *mapper = MFilt(ts)->mapper;
|
1179
1182
|
TokenFilter *tf = TkFilt(ts);
|
1180
1183
|
Token *tk = tf->sub_ts->next(tf->sub_ts);
|
@@ -1299,7 +1302,7 @@ TokenStream *hyphen_filter_new(TokenStream *sub_ts)
|
|
1299
1302
|
|
1300
1303
|
Token *mb_lcf_next(TokenStream *ts)
|
1301
1304
|
{
|
1302
|
-
wchar_t wbuf[MAX_WORD_SIZE], *wchr;
|
1305
|
+
wchar_t wbuf[MAX_WORD_SIZE + 1], *wchr;
|
1303
1306
|
Token *tk = TkFilt(ts)->sub_ts->next(TkFilt(ts)->sub_ts);
|
1304
1307
|
|
1305
1308
|
if (tk == NULL) {
|
data/ext/extconf.rb
CHANGED
data/ext/fs_store.c
CHANGED
data/ext/global.c
CHANGED
@@ -6,7 +6,6 @@
|
|
6
6
|
#include <assert.h>
|
7
7
|
#include <math.h>
|
8
8
|
#include <ctype.h>
|
9
|
-
#include <unistd.h>
|
10
9
|
|
11
10
|
const char *EMPTY_STRING = "";
|
12
11
|
|
@@ -359,14 +358,24 @@ void dummy_free(void *p)
|
|
359
358
|
|
360
359
|
#ifdef FRT_IS_C99
|
361
360
|
extern void usleep(unsigned long usec);
|
361
|
+
#else
|
362
|
+
# ifdef RUBY_BINDINGS
|
363
|
+
struct timeval rb_time_interval _((VALUE));
|
364
|
+
# else
|
365
|
+
# include <unistd.h>
|
366
|
+
# endif
|
362
367
|
#endif
|
363
368
|
|
364
369
|
extern void micro_sleep(const int micro_seconds)
|
365
370
|
{
|
366
|
-
#ifdef
|
367
|
-
|
371
|
+
#ifdef RUBY_BINDINGS
|
372
|
+
rb_thread_wait_for(rb_time_interval(rb_float_new((double)micro_seconds/1000000.0)));
|
368
373
|
#else
|
374
|
+
# ifdef POSH_OS_WIN32
|
375
|
+
Sleep(micro_seconds / 1000);
|
376
|
+
# else
|
369
377
|
usleep(micro_seconds);
|
378
|
+
# endif
|
370
379
|
#endif
|
371
380
|
}
|
372
381
|
|
data/ext/index.c
CHANGED
@@ -35,7 +35,7 @@ static void ste_reset(TermEnum *te);
|
|
35
35
|
static char *ste_next(TermEnum *te);
|
36
36
|
|
37
37
|
#define FORMAT 0
|
38
|
-
#define SEGMENTS_GEN_FILE_NAME "segments
|
38
|
+
#define SEGMENTS_GEN_FILE_NAME "segments"
|
39
39
|
#define MAX_EXT_LEN 10
|
40
40
|
|
41
41
|
/* *** Must be three characters *** */
|
@@ -53,7 +53,7 @@ static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
|
|
53
53
|
|
54
54
|
static char *u64_to_str36(char *buf, int buf_size, f_u64 u)
|
55
55
|
{
|
56
|
-
int i = buf_size
|
56
|
+
int i = buf_size - 1;
|
57
57
|
buf[i] = '\0';
|
58
58
|
for (i--; i >= 0; i--) {
|
59
59
|
buf[i] = BASE36_DIGITMAP[u % 36];
|
@@ -826,7 +826,7 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
|
|
826
826
|
}
|
827
827
|
|
828
828
|
/* Method 2 (fallback if Method 1 isn't reliable): if the directory
|
829
|
-
* listing seems to be stale, try loading the "segments
|
829
|
+
* listing seems to be stale, try loading the "segments" file. */
|
830
830
|
if (1 == method || (0 == method && last_gen == gen && retry)) {
|
831
831
|
method = 1;
|
832
832
|
for (i = 0; i < GEN_FILE_RETRY_COUNT; i++) {
|
@@ -835,7 +835,7 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
|
|
835
835
|
gen_is = store->open_input(store, SEGMENTS_GEN_FILE_NAME);
|
836
836
|
XCATCHALL
|
837
837
|
HANDLED();
|
838
|
-
/* TODO:LOG "segments
|
838
|
+
/* TODO:LOG "segments open: IO_ERROR"*/
|
839
839
|
XENDTRY
|
840
840
|
|
841
841
|
if (NULL != gen_is) {
|
data/ext/q_phrase.c
CHANGED
@@ -498,12 +498,12 @@ static Scorer *phw_scorer(Weight *self, IndexReader *ir)
|
|
498
498
|
if (phq->slop == 0) { /* optimize exact (common) case */
|
499
499
|
phsc = exact_phrase_scorer_new(self, tps, positions, pos_cnt,
|
500
500
|
self->similarity,
|
501
|
-
|
501
|
+
ir_get_norms_i(ir, field_num));
|
502
502
|
}
|
503
503
|
else {
|
504
504
|
phsc = sloppy_phrase_scorer_new(self, tps, positions, pos_cnt,
|
505
505
|
self->similarity, phq->slop,
|
506
|
-
|
506
|
+
ir_get_norms_i(ir, field_num));
|
507
507
|
}
|
508
508
|
free(tps);
|
509
509
|
return phsc;
|
data/ext/r_index.c
CHANGED
@@ -1205,7 +1205,7 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
1205
1205
|
store = open_ram_store();
|
1206
1206
|
DEREF(store);
|
1207
1207
|
}
|
1208
|
-
if (!create && create_if_missing && !store->exists(store, "segments
|
1208
|
+
if (!create && create_if_missing && !store->exists(store, "segments")) {
|
1209
1209
|
create = true;
|
1210
1210
|
}
|
1211
1211
|
if (create) {
|
data/lib/ferret/index.rb
CHANGED
@@ -1,6 +1,20 @@
|
|
1
1
|
require 'monitor'
|
2
2
|
|
3
3
|
module Ferret::Index
|
4
|
+
module SynchroLockMixin
|
5
|
+
def synchrolock
|
6
|
+
trys = 5
|
7
|
+
begin
|
8
|
+
synchronize {yield}
|
9
|
+
rescue Ferret::Store::Lock::LockError => e
|
10
|
+
if (trys -= 1) <= 0
|
11
|
+
raise e
|
12
|
+
else
|
13
|
+
retry
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
4
18
|
# This is a simplified interface to the index. See the TUTORIAL for more
|
5
19
|
# information on how to use this class.
|
6
20
|
class Index
|
@@ -116,37 +130,34 @@ module Ferret::Index
|
|
116
130
|
@dir = RAMDirectory.new
|
117
131
|
end
|
118
132
|
|
133
|
+
@dir.extend(MonitorMixin).extend(SynchroLockMixin)
|
119
134
|
options[:dir] = @dir
|
120
135
|
options[:lock_retry_time]||= 2
|
121
|
-
@
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
end
|
127
|
-
options[:analyzer]||= Ferret::Analysis::StandardAnalyzer.new
|
136
|
+
@options = options
|
137
|
+
if (!@dir.exists?("segments")) || options[:create]
|
138
|
+
IndexWriter.new(options).close
|
139
|
+
end
|
140
|
+
options[:analyzer]||= Ferret::Analysis::StandardAnalyzer.new
|
128
141
|
|
129
|
-
|
130
|
-
|
131
|
-
|
142
|
+
@searcher = nil
|
143
|
+
@writer = nil
|
144
|
+
@reader = nil
|
132
145
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
@default_input_field = options[:default_input_field] || @id_field
|
146
|
+
@options.delete(:create) # only create the first time if at all
|
147
|
+
@auto_flush = @options[:auto_flush] || false
|
148
|
+
if (@options[:id_field].nil? and @key.is_a?(Symbol))
|
149
|
+
@id_field = @key
|
150
|
+
else
|
151
|
+
@id_field = @options[:id_field] || :id
|
152
|
+
end
|
153
|
+
@default_field = (@options[:default_field]||= :*)
|
154
|
+
@default_input_field = options[:default_input_field] || @id_field
|
143
155
|
|
144
|
-
|
145
|
-
|
146
|
-
end
|
147
|
-
@open = true
|
148
|
-
@qp = nil
|
156
|
+
if @default_input_field.respond_to?(:intern)
|
157
|
+
@default_input_field = @default_input_field.intern
|
149
158
|
end
|
159
|
+
@open = true
|
160
|
+
@qp = nil
|
150
161
|
if block
|
151
162
|
yield self
|
152
163
|
self.close
|
@@ -253,7 +264,8 @@ module Ferret::Index
|
|
253
264
|
#
|
254
265
|
# See FieldInfos for more information on how to set field properties.
|
255
266
|
def add_document(doc, analyzer = nil)
|
256
|
-
@dir.
|
267
|
+
@dir.synchrolock do
|
268
|
+
ensure_writer_open()
|
257
269
|
if doc.is_a?(String) or doc.is_a?(Array)
|
258
270
|
doc = {@default_input_field => doc}
|
259
271
|
end
|
@@ -399,7 +411,8 @@ module Ferret::Index
|
|
399
411
|
#
|
400
412
|
# id:: The number of the document to delete
|
401
413
|
def delete(id)
|
402
|
-
@dir.
|
414
|
+
@dir.synchrolock do
|
415
|
+
ensure_writer_open()
|
403
416
|
if id.is_a?(String) or id.is_a?(Symbol)
|
404
417
|
ensure_writer_open()
|
405
418
|
@writer.delete(@id_field, id.to_s)
|
@@ -420,7 +433,8 @@ module Ferret::Index
|
|
420
433
|
# string (in which case it is parsed by the standard query parser)
|
421
434
|
# or an actual query object.
|
422
435
|
def query_delete(query)
|
423
|
-
@dir.
|
436
|
+
@dir.synchrolock do
|
437
|
+
ensure_writer_open()
|
424
438
|
ensure_searcher_open()
|
425
439
|
query = do_process_query(query)
|
426
440
|
@searcher.search_each(query) do |doc, score|
|
@@ -447,7 +461,8 @@ module Ferret::Index
|
|
447
461
|
# the :key attribute.
|
448
462
|
# new_doc:: The document to replace the old document with
|
449
463
|
def update(id, new_doc)
|
450
|
-
@dir.
|
464
|
+
@dir.synchrolock do
|
465
|
+
ensure_writer_open()
|
451
466
|
delete(id)
|
452
467
|
if id.is_a?(String) or id.is_a?(Symbol)
|
453
468
|
@writer.commit
|
@@ -484,7 +499,8 @@ module Ferret::Index
|
|
484
499
|
# #=> {:id => "28", :title => "My Oh My", :artist => "David Gray"}
|
485
500
|
#
|
486
501
|
def query_update(query, new_val)
|
487
|
-
@dir.
|
502
|
+
@dir.synchrolock do
|
503
|
+
ensure_writer_open()
|
488
504
|
ensure_searcher_open()
|
489
505
|
docs_to_add = []
|
490
506
|
query = do_process_query(query)
|
@@ -534,7 +550,7 @@ module Ferret::Index
|
|
534
550
|
# optimizes the index. This should only be called when the index will no
|
535
551
|
# longer be updated very often, but will be read a lot.
|
536
552
|
def optimize()
|
537
|
-
@dir.
|
553
|
+
@dir.synchrolock do
|
538
554
|
ensure_writer_open()
|
539
555
|
@writer.optimize()
|
540
556
|
@writer.close()
|
@@ -562,7 +578,8 @@ module Ferret::Index
|
|
562
578
|
#
|
563
579
|
# After this completes, the index is optimized.
|
564
580
|
def add_indexes(indexes)
|
565
|
-
@dir.
|
581
|
+
@dir.synchrolock do
|
582
|
+
ensure_writer_open()
|
566
583
|
indexes = [indexes].flatten # make sure we have an array
|
567
584
|
return if indexes.size == 0 # nothing to do
|
568
585
|
if indexes[0].is_a?(Index)
|
@@ -604,7 +621,7 @@ module Ferret::Index
|
|
604
621
|
elsif directory.is_a?(Ferret::Store::Directory)
|
605
622
|
@dir = directory
|
606
623
|
end
|
607
|
-
@dir.extend(MonitorMixin)
|
624
|
+
@dir.extend(MonitorMixin).extend(SynchroLockMixin)
|
608
625
|
@options[:dir] = @dir
|
609
626
|
@options[:create_if_missing] = true
|
610
627
|
add_indexes([old_dir])
|
@@ -646,7 +663,7 @@ module Ferret::Index
|
|
646
663
|
# Returns the field_infos object so that you can add new fields to the
|
647
664
|
# index.
|
648
665
|
def field_infos
|
649
|
-
@dir.
|
666
|
+
@dir.synchrolock do
|
650
667
|
ensure_writer_open()
|
651
668
|
return @writer.field_infos
|
652
669
|
end
|
data/lib/ferret_version.rb
CHANGED
@@ -6,8 +6,8 @@ class IndexThreadSafetyTest < Test::Unit::TestCase
|
|
6
6
|
include Ferret::Index
|
7
7
|
|
8
8
|
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
9
|
-
ITERATIONS =
|
10
|
-
NUM_THREADS =
|
9
|
+
ITERATIONS = 100
|
10
|
+
NUM_THREADS = 3
|
11
11
|
ANALYZER = Ferret::Analysis::StandardAnalyzer.new()
|
12
12
|
|
13
13
|
def setup
|
@@ -35,6 +35,7 @@ class IndexThreadSafetyTest < Test::Unit::TestCase
|
|
35
35
|
else
|
36
36
|
do_add_doc(index)
|
37
37
|
end
|
38
|
+
index.commit
|
38
39
|
end
|
39
40
|
end
|
40
41
|
|
@@ -21,10 +21,10 @@ class IndexWriterTest < Test::Unit::TestCase
|
|
21
21
|
assert(! wlock.locked?)
|
22
22
|
assert(! clock.locked?)
|
23
23
|
iw = IndexWriter.new(:dir => @dir, :create => true)
|
24
|
-
assert(@dir.exists?("segments
|
24
|
+
assert(@dir.exists?("segments"))
|
25
25
|
assert(wlock.locked?)
|
26
26
|
iw.close()
|
27
|
-
assert(@dir.exists?("segments
|
27
|
+
assert(@dir.exists?("segments"))
|
28
28
|
assert(! wlock.locked?)
|
29
29
|
assert(! clock.locked?)
|
30
30
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.11.
|
7
|
-
date: 2007-02-
|
6
|
+
version: 0.11.1
|
7
|
+
date: 2007-02-27 00:00:00 +11:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|