ferret 0.11.2 → 0.11.3
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/analysis.c +0 -3
- data/ext/compound_io.c +3 -4
- data/ext/ferret.c +2 -1
- data/ext/index.c +41 -22
- data/ext/r_analysis.c +22 -11
- data/ext/search.c +1 -1
- data/lib/ferret_version.rb +1 -1
- metadata +3 -3
data/ext/analysis.c
CHANGED
@@ -925,9 +925,6 @@ static Token *std_next(TokenStream *ts)
|
|
925
925
|
}
|
926
926
|
ts->t = t + len;
|
927
927
|
token[len] = 0;
|
928
|
-
Token *k = tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
|
929
|
-
(int)(ts->t - ts->text), 1);
|
930
|
-
return k;
|
931
928
|
return tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
|
932
929
|
(int)(ts->t - ts->text), 1);
|
933
930
|
}
|
data/ext/compound_io.c
CHANGED
@@ -216,11 +216,10 @@ Store *open_cmpd_store(Store *store, const char *name)
|
|
216
216
|
char *fname;
|
217
217
|
FileEntry *entry = NULL;
|
218
218
|
Store *new_store = NULL;
|
219
|
-
CompoundStore *cmpd = NULL;
|
220
|
-
InStream *is = NULL;
|
219
|
+
CompoundStore *volatile cmpd = NULL;
|
220
|
+
InStream *volatile is = NULL;
|
221
221
|
|
222
222
|
TRY
|
223
|
-
new_store = store_new();
|
224
223
|
cmpd = ALLOC_AND_ZERO(CompoundStore);
|
225
224
|
|
226
225
|
cmpd->store = store;
|
@@ -245,7 +244,6 @@ Store *open_cmpd_store(Store *store, const char *name)
|
|
245
244
|
h_set(cmpd->entries, fname, entry);
|
246
245
|
}
|
247
246
|
XCATCHALL
|
248
|
-
store_destroy(new_store);
|
249
247
|
if (is) is_close(is);
|
250
248
|
if (cmpd->entries) h_destroy(cmpd->entries);
|
251
249
|
free(cmpd);
|
@@ -256,6 +254,7 @@ Store *open_cmpd_store(Store *store, const char *name)
|
|
256
254
|
entry->length = is_length(is) - entry->offset;
|
257
255
|
}
|
258
256
|
|
257
|
+
new_store = store_new();
|
259
258
|
new_store->dir.cmpd = cmpd;
|
260
259
|
new_store->touch = &cmpd_touch;
|
261
260
|
new_store->exists = &cmpd_exists;
|
data/ext/ferret.c
CHANGED
@@ -65,7 +65,8 @@ void
|
|
65
65
|
object_add2(void *key, VALUE obj, const char *file, int line)
|
66
66
|
{
|
67
67
|
if (h_get(object_map, key))
|
68
|
-
printf("failed adding %d. %s:%d\n",
|
68
|
+
printf("failed adding %x to %d; already contains %x. %s:%d\n",
|
69
|
+
(int)obj, (int)key, (int)h_get(object_map, key), file, line);
|
69
70
|
//printf("adding %d. now contains %d %s:%d\n", (int)key, ++hash_cnt, file, line);
|
70
71
|
h_set(object_map, key, (void *)obj);
|
71
72
|
}
|
data/ext/index.c
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
#include <string.h>
|
7
7
|
#include <limits.h>
|
8
8
|
#include <ctype.h>
|
9
|
+
#include <unistd.h>
|
9
10
|
|
10
11
|
#define GET_LOCK(lock, name, store, err_msg) do {\
|
11
12
|
lock = store->open_lock(store, name);\
|
@@ -850,15 +851,9 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
|
|
850
851
|
is_close(gen_is);
|
851
852
|
XENDTRY
|
852
853
|
/* TODO:LOG "fallback check: " + gen0 + "; " + gen1 */
|
853
|
-
if (gen0 == gen1) {
|
854
|
+
if (gen0 == gen1 && gen0 > gen) {
|
854
855
|
/* The file is consistent. */
|
855
|
-
|
856
|
-
/* TODO:LOG "fallback to '" +
|
857
|
-
* IndexFileNames.SEGMENTS_GEN + "' check: now
|
858
|
-
* try generation " + gen0 + " > " + gen */
|
859
|
-
gen = gen0;
|
860
|
-
}
|
861
|
-
goto method_two_loop_end;
|
856
|
+
gen = gen0;
|
862
857
|
}
|
863
858
|
break;
|
864
859
|
}
|
@@ -866,7 +861,6 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
|
|
866
861
|
micro_sleep(50000);
|
867
862
|
}
|
868
863
|
}
|
869
|
-
method_two_loop_end:
|
870
864
|
|
871
865
|
/* Method 3 (fallback if Methods 2 & 3 are not reliable): since both
|
872
866
|
* directory cache and file contents cache seem to be stale, just
|
@@ -888,9 +882,11 @@ method_two_loop_end:
|
|
888
882
|
/* OK, we've tried the same segments_N file twice in a row, so
|
889
883
|
* this must be a real error. We throw the original exception
|
890
884
|
* we got. */
|
891
|
-
RAISE(IO_ERROR,
|
885
|
+
RAISE(IO_ERROR,
|
886
|
+
"Error reading the segment infos. Store listing was\n");
|
892
887
|
}
|
893
888
|
else {
|
889
|
+
micro_sleep(50000);
|
894
890
|
retry = true;
|
895
891
|
}
|
896
892
|
}
|
@@ -905,8 +901,34 @@ method_two_loop_end:
|
|
905
901
|
run(store, fsf);
|
906
902
|
RETURN_EARLY();
|
907
903
|
return;
|
908
|
-
case IO_ERROR: case FILE_NOT_FOUND_ERROR:
|
904
|
+
case IO_ERROR: case FILE_NOT_FOUND_ERROR: case EOF_ERROR:
|
909
905
|
HANDLED();
|
906
|
+
/*
|
907
|
+
if (gen != sis_current_segment_generation(store)) {
|
908
|
+
fprintf(stderr, "%lld != %lld\n",
|
909
|
+
gen, sis_current_segment_generation(store));
|
910
|
+
fprintf(stderr, "%s\n", xcontext.msg);
|
911
|
+
}
|
912
|
+
else {
|
913
|
+
char *sl = store_to_s(store);
|
914
|
+
bool done = false;
|
915
|
+
fprintf(stderr, "%s\n>>>\n%s", xcontext.msg, sl);
|
916
|
+
free(sl);
|
917
|
+
while (!done) {
|
918
|
+
TRY
|
919
|
+
sis_put(sis_read(store), stderr);
|
920
|
+
done = true;
|
921
|
+
XCATCHALL
|
922
|
+
HANDLED();
|
923
|
+
XENDTRY
|
924
|
+
}
|
925
|
+
}
|
926
|
+
|
927
|
+
char *sl = store_to_s(store);
|
928
|
+
fprintf(stderr, "%s\n>>>\n%s", xcontext.msg, sl);
|
929
|
+
free(sl);
|
930
|
+
*/
|
931
|
+
|
910
932
|
/* Save the original root cause: */
|
911
933
|
/* TODO:LOG "primary Exception on '" + segmentFileName + "': " +
|
912
934
|
* err + "'; will retry: retry=" + retry + "; gen = " + gen */
|
@@ -931,7 +953,7 @@ method_two_loop_end:
|
|
931
953
|
RETURN_EARLY();
|
932
954
|
RETURN_EARLY();
|
933
955
|
return;
|
934
|
-
case IO_ERROR: case FILE_NOT_FOUND_ERROR:
|
956
|
+
case IO_ERROR: case FILE_NOT_FOUND_ERROR: case EOF_ERROR:
|
935
957
|
HANDLED();
|
936
958
|
/* TODO:LOG "secondary Exception on '" +
|
937
959
|
* prev_seg_file_name + "': " + err2 + "'; will retry"*/
|
@@ -1022,9 +1044,10 @@ void sis_read_i(Store *store, FindSegmentsFile *fsf)
|
|
1022
1044
|
int i;
|
1023
1045
|
bool success = false;
|
1024
1046
|
char seg_file_name[SEGMENT_NAME_MAX_LENGTH];
|
1025
|
-
InStream *is = NULL;
|
1026
|
-
SegmentInfos *sis = ALLOC_AND_ZERO(SegmentInfos);
|
1047
|
+
InStream *volatile is = NULL;
|
1048
|
+
SegmentInfos *volatile sis = ALLOC_AND_ZERO(SegmentInfos);
|
1027
1049
|
segfn_for_generation(seg_file_name, fsf->generation);
|
1050
|
+
fsf->p_return = NULL;
|
1028
1051
|
TRY
|
1029
1052
|
is = store->open_input(store, seg_file_name);
|
1030
1053
|
sis->store = store;
|
@@ -3727,10 +3750,6 @@ void ir_commit_i(IndexReader *ir)
|
|
3727
3750
|
char curr_seg_fn[MAX_FILE_PATH];
|
3728
3751
|
mutex_lock(&ir->store->mutex);
|
3729
3752
|
|
3730
|
-
/* Should not be necessary: no prior commit should have left
|
3731
|
-
* pending files, so just defensive: */
|
3732
|
-
if (ir->deleter) deleter_clear_pending_deletions(ir->deleter);
|
3733
|
-
|
3734
3753
|
sis_curr_seg_file_name(curr_seg_fn, ir->store);
|
3735
3754
|
|
3736
3755
|
ir->commit_i(ir);
|
@@ -4696,8 +4715,8 @@ IndexReader *mr_open(IndexReader **sub_readers, const int r_cnt)
|
|
4696
4715
|
static void ir_open_i(Store *store, FindSegmentsFile *fsf)
|
4697
4716
|
{
|
4698
4717
|
volatile bool success = false;
|
4699
|
-
IndexReader *ir = NULL;
|
4700
|
-
SegmentInfos *sis = NULL;
|
4718
|
+
IndexReader *volatile ir = NULL;
|
4719
|
+
SegmentInfos *volatile sis = NULL;
|
4701
4720
|
TRY
|
4702
4721
|
do {
|
4703
4722
|
FieldInfos *fis;
|
@@ -5820,7 +5839,6 @@ static void iw_commit_compound_file(IndexWriter *iw, SegmentInfo *si)
|
|
5820
5839
|
sprintf(cfs_name, "%s.cfs", si->name);
|
5821
5840
|
|
5822
5841
|
iw_create_compound_file(iw->store, iw->fis, si, cfs_name, iw->deleter);
|
5823
|
-
deleter_commit_pending_deletions(iw->deleter);
|
5824
5842
|
}
|
5825
5843
|
|
5826
5844
|
static void iw_merge_segments(IndexWriter *iw, const int min_seg,
|
@@ -5841,7 +5859,6 @@ static void iw_merge_segments(IndexWriter *iw, const int min_seg,
|
|
5841
5859
|
for (i = min_seg; i < max_seg; i++) {
|
5842
5860
|
si_delete_files(sis->segs[i], iw->fis, iw->deleter);
|
5843
5861
|
}
|
5844
|
-
deleter_commit_pending_deletions(iw->deleter);
|
5845
5862
|
|
5846
5863
|
sis_del_from_to(sis, min_seg, max_seg);
|
5847
5864
|
|
@@ -5851,6 +5868,7 @@ static void iw_merge_segments(IndexWriter *iw, const int min_seg,
|
|
5851
5868
|
}
|
5852
5869
|
|
5853
5870
|
sis_write(sis, iw->store, iw->deleter);
|
5871
|
+
deleter_commit_pending_deletions(iw->deleter);
|
5854
5872
|
|
5855
5873
|
mutex_unlock(&iw->store->mutex);
|
5856
5874
|
|
@@ -5910,6 +5928,7 @@ static void iw_flush_ram_segment(IndexWriter *iw)
|
|
5910
5928
|
}
|
5911
5929
|
/* commit the segments file and the fields file */
|
5912
5930
|
sis_write(iw->sis, iw->store, iw->deleter);
|
5931
|
+
deleter_commit_pending_deletions(iw->deleter);
|
5913
5932
|
|
5914
5933
|
mutex_unlock(&iw->store->mutex);
|
5915
5934
|
|
data/ext/r_analysis.c
CHANGED
@@ -40,6 +40,7 @@ static VALUE cTokenStream;
|
|
40
40
|
static ID id_next;
|
41
41
|
static ID id_reset;
|
42
42
|
static ID id_clone;
|
43
|
+
static ID id_text;
|
43
44
|
|
44
45
|
/* Analyzer Methods */
|
45
46
|
static ID id_token_stream;
|
@@ -450,7 +451,9 @@ frt_ts_set_text(VALUE self, VALUE rtext)
|
|
450
451
|
Data_Get_Struct(self, TokenStream, ts);
|
451
452
|
StringValue(rtext);
|
452
453
|
ts->reset(ts, rs2s(rtext));
|
453
|
-
|
454
|
+
|
455
|
+
/* prevent garbage collection */
|
456
|
+
rb_ivar_set(self, id_text, rtext);
|
454
457
|
|
455
458
|
return rtext;
|
456
459
|
}
|
@@ -538,7 +541,10 @@ typedef struct CWrappedTokenStream {
|
|
538
541
|
static void
|
539
542
|
cwrts_destroy_i(TokenStream *ts)
|
540
543
|
{
|
541
|
-
|
544
|
+
if (object_get(&ts->text) != Qnil) {
|
545
|
+
object_del(&ts->text);
|
546
|
+
}
|
547
|
+
rb_hash_delete(object_space, ((VALUE)ts)|1);
|
542
548
|
/*printf("rb_hash_size = %d\n", frt_rb_hash_size(object_space)); */
|
543
549
|
free(ts);
|
544
550
|
}
|
@@ -554,6 +560,7 @@ static TokenStream *
|
|
554
560
|
cwrts_reset(TokenStream *ts, char *text)
|
555
561
|
{
|
556
562
|
ts->t = ts->text = text;
|
563
|
+
Xj
|
557
564
|
rb_funcall(CWTS(ts)->rts, id_reset, 1, rb_str_new2(text));
|
558
565
|
return ts;
|
559
566
|
}
|
@@ -563,7 +570,7 @@ cwrts_clone_i(TokenStream *orig_ts)
|
|
563
570
|
{
|
564
571
|
TokenStream *new_ts = ts_clone_size(orig_ts, sizeof(CWrappedTokenStream));
|
565
572
|
VALUE rts = CWTS(new_ts)->rts = rb_funcall(CWTS(orig_ts)->rts, id_clone, 0);
|
566
|
-
rb_hash_aset(object_space, ((
|
573
|
+
rb_hash_aset(object_space, ((VALUE)new_ts)|1, rts);
|
567
574
|
return new_ts;
|
568
575
|
}
|
569
576
|
|
@@ -583,7 +590,7 @@ frt_get_cwrapped_rts(VALUE rts)
|
|
583
590
|
ts->clone_i = &cwrts_clone_i;
|
584
591
|
ts->destroy_i = &cwrts_destroy_i;
|
585
592
|
/* prevent from being garbage collected */
|
586
|
-
rb_hash_aset(object_space, ((
|
593
|
+
rb_hash_aset(object_space, ((VALUE)ts)|1, rts);
|
587
594
|
ts->ref_cnt = 1;
|
588
595
|
}
|
589
596
|
return ts;
|
@@ -621,7 +628,10 @@ typedef struct RegExpTokenStream {
|
|
621
628
|
static void
|
622
629
|
rets_destroy_i(TokenStream *ts)
|
623
630
|
{
|
624
|
-
|
631
|
+
if (object_get(&ts->text) != Qnil) {
|
632
|
+
object_del(&ts->text);
|
633
|
+
}
|
634
|
+
rb_hash_delete(object_space, ((VALUE)ts)|1);
|
625
635
|
/*printf("rb_hash_size = %d\n", frt_rb_hash_size(object_space)); */
|
626
636
|
free(ts);
|
627
637
|
}
|
@@ -658,7 +668,7 @@ frt_rets_set_text(VALUE self, VALUE rtext)
|
|
658
668
|
TokenStream *ts;
|
659
669
|
GET_TS(ts, self);
|
660
670
|
|
661
|
-
rb_hash_aset(object_space, ((
|
671
|
+
rb_hash_aset(object_space, ((VALUE)ts)|1, rtext);
|
662
672
|
StringValue(rtext);
|
663
673
|
RETS(ts)->rtext = rtext;
|
664
674
|
RETS(ts)->curr_ind = 0;
|
@@ -730,7 +740,7 @@ rets_new(VALUE rtext, VALUE regex, VALUE proc)
|
|
730
740
|
|
731
741
|
if (rtext != Qnil) {
|
732
742
|
rtext = StringValue(rtext);
|
733
|
-
rb_hash_aset(object_space, ((
|
743
|
+
rb_hash_aset(object_space, ((VALUE)ts)|1, rtext);
|
734
744
|
}
|
735
745
|
ts->reset = &rets_reset;
|
736
746
|
ts->next = &rets_next;
|
@@ -1121,7 +1131,7 @@ typedef struct CWrappedAnalyzer
|
|
1121
1131
|
static void
|
1122
1132
|
cwa_destroy_i(Analyzer *a)
|
1123
1133
|
{
|
1124
|
-
rb_hash_delete(object_space, ((
|
1134
|
+
rb_hash_delete(object_space, ((VALUE)a)|1);
|
1125
1135
|
/*printf("rb_hash_size = %d\n", frt_rb_hash_size(object_space)); */
|
1126
1136
|
free(a);
|
1127
1137
|
}
|
@@ -1149,7 +1159,7 @@ frt_get_cwrapped_analyzer(VALUE ranalyzer)
|
|
1149
1159
|
a->ref_cnt = 1;
|
1150
1160
|
((CWrappedAnalyzer *)a)->ranalyzer = ranalyzer;
|
1151
1161
|
/* prevent from being garbage collected */
|
1152
|
-
rb_hash_aset(object_space, ((
|
1162
|
+
rb_hash_aset(object_space, ((VALUE)a)|1, ranalyzer);
|
1153
1163
|
}
|
1154
1164
|
return a;
|
1155
1165
|
}
|
@@ -1509,11 +1519,11 @@ frt_re_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rtext)
|
|
1509
1519
|
object_set(&ts->text, rtext);
|
1510
1520
|
if (ts->next == &rets_next) {
|
1511
1521
|
RETS(ts)->rtext = rtext;
|
1512
|
-
rb_hash_aset(object_space, ((
|
1522
|
+
rb_hash_aset(object_space, ((VALUE)ts)|1, rtext);
|
1513
1523
|
}
|
1514
1524
|
else {
|
1515
1525
|
RETS(((TokenFilter*)ts)->sub_ts)->rtext = rtext;
|
1516
|
-
rb_hash_aset(object_space, ((
|
1526
|
+
rb_hash_aset(object_space, ((VALUE)((TokenFilter*)ts)->sub_ts)|1, rtext);
|
1517
1527
|
}
|
1518
1528
|
return get_rb_token_stream(ts);
|
1519
1529
|
}
|
@@ -2363,6 +2373,7 @@ Init_Analysis(void)
|
|
2363
2373
|
id_next = rb_intern("next");
|
2364
2374
|
id_reset = rb_intern("text=");
|
2365
2375
|
id_clone = rb_intern("clone");
|
2376
|
+
id_text = rb_intern("@text");
|
2366
2377
|
|
2367
2378
|
/* Analyzer Methods */
|
2368
2379
|
id_token_stream = rb_intern("token_stream");
|
data/ext/search.c
CHANGED
@@ -1040,7 +1040,7 @@ static TopDocs *isea_search_w(Searcher *self,
|
|
1040
1040
|
sea_check_args(num_docs, first_doc);
|
1041
1041
|
|
1042
1042
|
scorer = weight->scorer(weight, ISEA(self)->ir);
|
1043
|
-
if (!scorer) {
|
1043
|
+
if (!scorer || 0 == ISEA(self)->ir->num_docs(ISEA(self)->ir)) {
|
1044
1044
|
return td_new(0, 0, NULL, 0.0);
|
1045
1045
|
}
|
1046
1046
|
|
data/lib/ferret_version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.11.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.11.3
|
7
|
+
date: 2007-03-03 00:00:00 +11:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -36,6 +36,7 @@ files:
|
|
36
36
|
- TODO
|
37
37
|
- MIT-LICENSE
|
38
38
|
- README
|
39
|
+
- ext/q_multi_term.c
|
39
40
|
- ext/r_qparser.c
|
40
41
|
- ext/r_utils.c
|
41
42
|
- ext/r_analysis.c
|
@@ -44,7 +45,6 @@ files:
|
|
44
45
|
- ext/r_index.c
|
45
46
|
- ext/ferret.h
|
46
47
|
- ext/r_store.c
|
47
|
-
- ext/q_multi_term.c
|
48
48
|
- ext/hashset.c
|
49
49
|
- ext/q_match_all.c
|
50
50
|
- ext/bitvector.c
|