ferret 0.11.2 → 0.11.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/analysis.c +0 -3
- data/ext/compound_io.c +3 -4
- data/ext/ferret.c +2 -1
- data/ext/index.c +41 -22
- data/ext/r_analysis.c +22 -11
- data/ext/search.c +1 -1
- data/lib/ferret_version.rb +1 -1
- metadata +3 -3
data/ext/analysis.c
CHANGED
@@ -925,9 +925,6 @@ static Token *std_next(TokenStream *ts)
|
|
925
925
|
}
|
926
926
|
ts->t = t + len;
|
927
927
|
token[len] = 0;
|
928
|
-
Token *k = tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
|
929
|
-
(int)(ts->t - ts->text), 1);
|
930
|
-
return k;
|
931
928
|
return tk_set(&(CTS(ts)->token), token, len, (int)(start - ts->text),
|
932
929
|
(int)(ts->t - ts->text), 1);
|
933
930
|
}
|
data/ext/compound_io.c
CHANGED
@@ -216,11 +216,10 @@ Store *open_cmpd_store(Store *store, const char *name)
|
|
216
216
|
char *fname;
|
217
217
|
FileEntry *entry = NULL;
|
218
218
|
Store *new_store = NULL;
|
219
|
-
CompoundStore *cmpd = NULL;
|
220
|
-
InStream *is = NULL;
|
219
|
+
CompoundStore *volatile cmpd = NULL;
|
220
|
+
InStream *volatile is = NULL;
|
221
221
|
|
222
222
|
TRY
|
223
|
-
new_store = store_new();
|
224
223
|
cmpd = ALLOC_AND_ZERO(CompoundStore);
|
225
224
|
|
226
225
|
cmpd->store = store;
|
@@ -245,7 +244,6 @@ Store *open_cmpd_store(Store *store, const char *name)
|
|
245
244
|
h_set(cmpd->entries, fname, entry);
|
246
245
|
}
|
247
246
|
XCATCHALL
|
248
|
-
store_destroy(new_store);
|
249
247
|
if (is) is_close(is);
|
250
248
|
if (cmpd->entries) h_destroy(cmpd->entries);
|
251
249
|
free(cmpd);
|
@@ -256,6 +254,7 @@ Store *open_cmpd_store(Store *store, const char *name)
|
|
256
254
|
entry->length = is_length(is) - entry->offset;
|
257
255
|
}
|
258
256
|
|
257
|
+
new_store = store_new();
|
259
258
|
new_store->dir.cmpd = cmpd;
|
260
259
|
new_store->touch = &cmpd_touch;
|
261
260
|
new_store->exists = &cmpd_exists;
|
data/ext/ferret.c
CHANGED
@@ -65,7 +65,8 @@ void
|
|
65
65
|
object_add2(void *key, VALUE obj, const char *file, int line)
|
66
66
|
{
|
67
67
|
if (h_get(object_map, key))
|
68
|
-
printf("failed adding %d. %s:%d\n",
|
68
|
+
printf("failed adding %x to %d; already contains %x. %s:%d\n",
|
69
|
+
(int)obj, (int)key, (int)h_get(object_map, key), file, line);
|
69
70
|
//printf("adding %d. now contains %d %s:%d\n", (int)key, ++hash_cnt, file, line);
|
70
71
|
h_set(object_map, key, (void *)obj);
|
71
72
|
}
|
data/ext/index.c
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
#include <string.h>
|
7
7
|
#include <limits.h>
|
8
8
|
#include <ctype.h>
|
9
|
+
#include <unistd.h>
|
9
10
|
|
10
11
|
#define GET_LOCK(lock, name, store, err_msg) do {\
|
11
12
|
lock = store->open_lock(store, name);\
|
@@ -850,15 +851,9 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
|
|
850
851
|
is_close(gen_is);
|
851
852
|
XENDTRY
|
852
853
|
/* TODO:LOG "fallback check: " + gen0 + "; " + gen1 */
|
853
|
-
if (gen0 == gen1) {
|
854
|
+
if (gen0 == gen1 && gen0 > gen) {
|
854
855
|
/* The file is consistent. */
|
855
|
-
|
856
|
-
/* TODO:LOG "fallback to '" +
|
857
|
-
* IndexFileNames.SEGMENTS_GEN + "' check: now
|
858
|
-
* try generation " + gen0 + " > " + gen */
|
859
|
-
gen = gen0;
|
860
|
-
}
|
861
|
-
goto method_two_loop_end;
|
856
|
+
gen = gen0;
|
862
857
|
}
|
863
858
|
break;
|
864
859
|
}
|
@@ -866,7 +861,6 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
|
|
866
861
|
micro_sleep(50000);
|
867
862
|
}
|
868
863
|
}
|
869
|
-
method_two_loop_end:
|
870
864
|
|
871
865
|
/* Method 3 (fallback if Methods 2 & 3 are not reliable): since both
|
872
866
|
* directory cache and file contents cache seem to be stale, just
|
@@ -888,9 +882,11 @@ method_two_loop_end:
|
|
888
882
|
/* OK, we've tried the same segments_N file twice in a row, so
|
889
883
|
* this must be a real error. We throw the original exception
|
890
884
|
* we got. */
|
891
|
-
RAISE(IO_ERROR,
|
885
|
+
RAISE(IO_ERROR,
|
886
|
+
"Error reading the segment infos. Store listing was\n");
|
892
887
|
}
|
893
888
|
else {
|
889
|
+
micro_sleep(50000);
|
894
890
|
retry = true;
|
895
891
|
}
|
896
892
|
}
|
@@ -905,8 +901,34 @@ method_two_loop_end:
|
|
905
901
|
run(store, fsf);
|
906
902
|
RETURN_EARLY();
|
907
903
|
return;
|
908
|
-
case IO_ERROR: case FILE_NOT_FOUND_ERROR:
|
904
|
+
case IO_ERROR: case FILE_NOT_FOUND_ERROR: case EOF_ERROR:
|
909
905
|
HANDLED();
|
906
|
+
/*
|
907
|
+
if (gen != sis_current_segment_generation(store)) {
|
908
|
+
fprintf(stderr, "%lld != %lld\n",
|
909
|
+
gen, sis_current_segment_generation(store));
|
910
|
+
fprintf(stderr, "%s\n", xcontext.msg);
|
911
|
+
}
|
912
|
+
else {
|
913
|
+
char *sl = store_to_s(store);
|
914
|
+
bool done = false;
|
915
|
+
fprintf(stderr, "%s\n>>>\n%s", xcontext.msg, sl);
|
916
|
+
free(sl);
|
917
|
+
while (!done) {
|
918
|
+
TRY
|
919
|
+
sis_put(sis_read(store), stderr);
|
920
|
+
done = true;
|
921
|
+
XCATCHALL
|
922
|
+
HANDLED();
|
923
|
+
XENDTRY
|
924
|
+
}
|
925
|
+
}
|
926
|
+
|
927
|
+
char *sl = store_to_s(store);
|
928
|
+
fprintf(stderr, "%s\n>>>\n%s", xcontext.msg, sl);
|
929
|
+
free(sl);
|
930
|
+
*/
|
931
|
+
|
910
932
|
/* Save the original root cause: */
|
911
933
|
/* TODO:LOG "primary Exception on '" + segmentFileName + "': " +
|
912
934
|
* err + "'; will retry: retry=" + retry + "; gen = " + gen */
|
@@ -931,7 +953,7 @@ method_two_loop_end:
|
|
931
953
|
RETURN_EARLY();
|
932
954
|
RETURN_EARLY();
|
933
955
|
return;
|
934
|
-
case IO_ERROR: case FILE_NOT_FOUND_ERROR:
|
956
|
+
case IO_ERROR: case FILE_NOT_FOUND_ERROR: case EOF_ERROR:
|
935
957
|
HANDLED();
|
936
958
|
/* TODO:LOG "secondary Exception on '" +
|
937
959
|
* prev_seg_file_name + "': " + err2 + "'; will retry"*/
|
@@ -1022,9 +1044,10 @@ void sis_read_i(Store *store, FindSegmentsFile *fsf)
|
|
1022
1044
|
int i;
|
1023
1045
|
bool success = false;
|
1024
1046
|
char seg_file_name[SEGMENT_NAME_MAX_LENGTH];
|
1025
|
-
InStream *is = NULL;
|
1026
|
-
SegmentInfos *sis = ALLOC_AND_ZERO(SegmentInfos);
|
1047
|
+
InStream *volatile is = NULL;
|
1048
|
+
SegmentInfos *volatile sis = ALLOC_AND_ZERO(SegmentInfos);
|
1027
1049
|
segfn_for_generation(seg_file_name, fsf->generation);
|
1050
|
+
fsf->p_return = NULL;
|
1028
1051
|
TRY
|
1029
1052
|
is = store->open_input(store, seg_file_name);
|
1030
1053
|
sis->store = store;
|
@@ -3727,10 +3750,6 @@ void ir_commit_i(IndexReader *ir)
|
|
3727
3750
|
char curr_seg_fn[MAX_FILE_PATH];
|
3728
3751
|
mutex_lock(&ir->store->mutex);
|
3729
3752
|
|
3730
|
-
/* Should not be necessary: no prior commit should have left
|
3731
|
-
* pending files, so just defensive: */
|
3732
|
-
if (ir->deleter) deleter_clear_pending_deletions(ir->deleter);
|
3733
|
-
|
3734
3753
|
sis_curr_seg_file_name(curr_seg_fn, ir->store);
|
3735
3754
|
|
3736
3755
|
ir->commit_i(ir);
|
@@ -4696,8 +4715,8 @@ IndexReader *mr_open(IndexReader **sub_readers, const int r_cnt)
|
|
4696
4715
|
static void ir_open_i(Store *store, FindSegmentsFile *fsf)
|
4697
4716
|
{
|
4698
4717
|
volatile bool success = false;
|
4699
|
-
IndexReader *ir = NULL;
|
4700
|
-
SegmentInfos *sis = NULL;
|
4718
|
+
IndexReader *volatile ir = NULL;
|
4719
|
+
SegmentInfos *volatile sis = NULL;
|
4701
4720
|
TRY
|
4702
4721
|
do {
|
4703
4722
|
FieldInfos *fis;
|
@@ -5820,7 +5839,6 @@ static void iw_commit_compound_file(IndexWriter *iw, SegmentInfo *si)
|
|
5820
5839
|
sprintf(cfs_name, "%s.cfs", si->name);
|
5821
5840
|
|
5822
5841
|
iw_create_compound_file(iw->store, iw->fis, si, cfs_name, iw->deleter);
|
5823
|
-
deleter_commit_pending_deletions(iw->deleter);
|
5824
5842
|
}
|
5825
5843
|
|
5826
5844
|
static void iw_merge_segments(IndexWriter *iw, const int min_seg,
|
@@ -5841,7 +5859,6 @@ static void iw_merge_segments(IndexWriter *iw, const int min_seg,
|
|
5841
5859
|
for (i = min_seg; i < max_seg; i++) {
|
5842
5860
|
si_delete_files(sis->segs[i], iw->fis, iw->deleter);
|
5843
5861
|
}
|
5844
|
-
deleter_commit_pending_deletions(iw->deleter);
|
5845
5862
|
|
5846
5863
|
sis_del_from_to(sis, min_seg, max_seg);
|
5847
5864
|
|
@@ -5851,6 +5868,7 @@ static void iw_merge_segments(IndexWriter *iw, const int min_seg,
|
|
5851
5868
|
}
|
5852
5869
|
|
5853
5870
|
sis_write(sis, iw->store, iw->deleter);
|
5871
|
+
deleter_commit_pending_deletions(iw->deleter);
|
5854
5872
|
|
5855
5873
|
mutex_unlock(&iw->store->mutex);
|
5856
5874
|
|
@@ -5910,6 +5928,7 @@ static void iw_flush_ram_segment(IndexWriter *iw)
|
|
5910
5928
|
}
|
5911
5929
|
/* commit the segments file and the fields file */
|
5912
5930
|
sis_write(iw->sis, iw->store, iw->deleter);
|
5931
|
+
deleter_commit_pending_deletions(iw->deleter);
|
5913
5932
|
|
5914
5933
|
mutex_unlock(&iw->store->mutex);
|
5915
5934
|
|
data/ext/r_analysis.c
CHANGED
@@ -40,6 +40,7 @@ static VALUE cTokenStream;
|
|
40
40
|
static ID id_next;
|
41
41
|
static ID id_reset;
|
42
42
|
static ID id_clone;
|
43
|
+
static ID id_text;
|
43
44
|
|
44
45
|
/* Analyzer Methods */
|
45
46
|
static ID id_token_stream;
|
@@ -450,7 +451,9 @@ frt_ts_set_text(VALUE self, VALUE rtext)
|
|
450
451
|
Data_Get_Struct(self, TokenStream, ts);
|
451
452
|
StringValue(rtext);
|
452
453
|
ts->reset(ts, rs2s(rtext));
|
453
|
-
|
454
|
+
|
455
|
+
/* prevent garbage collection */
|
456
|
+
rb_ivar_set(self, id_text, rtext);
|
454
457
|
|
455
458
|
return rtext;
|
456
459
|
}
|
@@ -538,7 +541,10 @@ typedef struct CWrappedTokenStream {
|
|
538
541
|
static void
|
539
542
|
cwrts_destroy_i(TokenStream *ts)
|
540
543
|
{
|
541
|
-
|
544
|
+
if (object_get(&ts->text) != Qnil) {
|
545
|
+
object_del(&ts->text);
|
546
|
+
}
|
547
|
+
rb_hash_delete(object_space, ((VALUE)ts)|1);
|
542
548
|
/*printf("rb_hash_size = %d\n", frt_rb_hash_size(object_space)); */
|
543
549
|
free(ts);
|
544
550
|
}
|
@@ -554,6 +560,7 @@ static TokenStream *
|
|
554
560
|
cwrts_reset(TokenStream *ts, char *text)
|
555
561
|
{
|
556
562
|
ts->t = ts->text = text;
|
563
|
+
Xj
|
557
564
|
rb_funcall(CWTS(ts)->rts, id_reset, 1, rb_str_new2(text));
|
558
565
|
return ts;
|
559
566
|
}
|
@@ -563,7 +570,7 @@ cwrts_clone_i(TokenStream *orig_ts)
|
|
563
570
|
{
|
564
571
|
TokenStream *new_ts = ts_clone_size(orig_ts, sizeof(CWrappedTokenStream));
|
565
572
|
VALUE rts = CWTS(new_ts)->rts = rb_funcall(CWTS(orig_ts)->rts, id_clone, 0);
|
566
|
-
rb_hash_aset(object_space, ((
|
573
|
+
rb_hash_aset(object_space, ((VALUE)new_ts)|1, rts);
|
567
574
|
return new_ts;
|
568
575
|
}
|
569
576
|
|
@@ -583,7 +590,7 @@ frt_get_cwrapped_rts(VALUE rts)
|
|
583
590
|
ts->clone_i = &cwrts_clone_i;
|
584
591
|
ts->destroy_i = &cwrts_destroy_i;
|
585
592
|
/* prevent from being garbage collected */
|
586
|
-
rb_hash_aset(object_space, ((
|
593
|
+
rb_hash_aset(object_space, ((VALUE)ts)|1, rts);
|
587
594
|
ts->ref_cnt = 1;
|
588
595
|
}
|
589
596
|
return ts;
|
@@ -621,7 +628,10 @@ typedef struct RegExpTokenStream {
|
|
621
628
|
static void
|
622
629
|
rets_destroy_i(TokenStream *ts)
|
623
630
|
{
|
624
|
-
|
631
|
+
if (object_get(&ts->text) != Qnil) {
|
632
|
+
object_del(&ts->text);
|
633
|
+
}
|
634
|
+
rb_hash_delete(object_space, ((VALUE)ts)|1);
|
625
635
|
/*printf("rb_hash_size = %d\n", frt_rb_hash_size(object_space)); */
|
626
636
|
free(ts);
|
627
637
|
}
|
@@ -658,7 +668,7 @@ frt_rets_set_text(VALUE self, VALUE rtext)
|
|
658
668
|
TokenStream *ts;
|
659
669
|
GET_TS(ts, self);
|
660
670
|
|
661
|
-
rb_hash_aset(object_space, ((
|
671
|
+
rb_hash_aset(object_space, ((VALUE)ts)|1, rtext);
|
662
672
|
StringValue(rtext);
|
663
673
|
RETS(ts)->rtext = rtext;
|
664
674
|
RETS(ts)->curr_ind = 0;
|
@@ -730,7 +740,7 @@ rets_new(VALUE rtext, VALUE regex, VALUE proc)
|
|
730
740
|
|
731
741
|
if (rtext != Qnil) {
|
732
742
|
rtext = StringValue(rtext);
|
733
|
-
rb_hash_aset(object_space, ((
|
743
|
+
rb_hash_aset(object_space, ((VALUE)ts)|1, rtext);
|
734
744
|
}
|
735
745
|
ts->reset = &rets_reset;
|
736
746
|
ts->next = &rets_next;
|
@@ -1121,7 +1131,7 @@ typedef struct CWrappedAnalyzer
|
|
1121
1131
|
static void
|
1122
1132
|
cwa_destroy_i(Analyzer *a)
|
1123
1133
|
{
|
1124
|
-
rb_hash_delete(object_space, ((
|
1134
|
+
rb_hash_delete(object_space, ((VALUE)a)|1);
|
1125
1135
|
/*printf("rb_hash_size = %d\n", frt_rb_hash_size(object_space)); */
|
1126
1136
|
free(a);
|
1127
1137
|
}
|
@@ -1149,7 +1159,7 @@ frt_get_cwrapped_analyzer(VALUE ranalyzer)
|
|
1149
1159
|
a->ref_cnt = 1;
|
1150
1160
|
((CWrappedAnalyzer *)a)->ranalyzer = ranalyzer;
|
1151
1161
|
/* prevent from being garbage collected */
|
1152
|
-
rb_hash_aset(object_space, ((
|
1162
|
+
rb_hash_aset(object_space, ((VALUE)a)|1, ranalyzer);
|
1153
1163
|
}
|
1154
1164
|
return a;
|
1155
1165
|
}
|
@@ -1509,11 +1519,11 @@ frt_re_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rtext)
|
|
1509
1519
|
object_set(&ts->text, rtext);
|
1510
1520
|
if (ts->next == &rets_next) {
|
1511
1521
|
RETS(ts)->rtext = rtext;
|
1512
|
-
rb_hash_aset(object_space, ((
|
1522
|
+
rb_hash_aset(object_space, ((VALUE)ts)|1, rtext);
|
1513
1523
|
}
|
1514
1524
|
else {
|
1515
1525
|
RETS(((TokenFilter*)ts)->sub_ts)->rtext = rtext;
|
1516
|
-
rb_hash_aset(object_space, ((
|
1526
|
+
rb_hash_aset(object_space, ((VALUE)((TokenFilter*)ts)->sub_ts)|1, rtext);
|
1517
1527
|
}
|
1518
1528
|
return get_rb_token_stream(ts);
|
1519
1529
|
}
|
@@ -2363,6 +2373,7 @@ Init_Analysis(void)
|
|
2363
2373
|
id_next = rb_intern("next");
|
2364
2374
|
id_reset = rb_intern("text=");
|
2365
2375
|
id_clone = rb_intern("clone");
|
2376
|
+
id_text = rb_intern("@text");
|
2366
2377
|
|
2367
2378
|
/* Analyzer Methods */
|
2368
2379
|
id_token_stream = rb_intern("token_stream");
|
data/ext/search.c
CHANGED
@@ -1040,7 +1040,7 @@ static TopDocs *isea_search_w(Searcher *self,
|
|
1040
1040
|
sea_check_args(num_docs, first_doc);
|
1041
1041
|
|
1042
1042
|
scorer = weight->scorer(weight, ISEA(self)->ir);
|
1043
|
-
if (!scorer) {
|
1043
|
+
if (!scorer || 0 == ISEA(self)->ir->num_docs(ISEA(self)->ir)) {
|
1044
1044
|
return td_new(0, 0, NULL, 0.0);
|
1045
1045
|
}
|
1046
1046
|
|
data/lib/ferret_version.rb
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.11.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.11.3
|
7
|
+
date: 2007-03-03 00:00:00 +11:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -36,6 +36,7 @@ files:
|
|
36
36
|
- TODO
|
37
37
|
- MIT-LICENSE
|
38
38
|
- README
|
39
|
+
- ext/q_multi_term.c
|
39
40
|
- ext/r_qparser.c
|
40
41
|
- ext/r_utils.c
|
41
42
|
- ext/r_analysis.c
|
@@ -44,7 +45,6 @@ files:
|
|
44
45
|
- ext/r_index.c
|
45
46
|
- ext/ferret.h
|
46
47
|
- ext/r_store.c
|
47
|
-
- ext/q_multi_term.c
|
48
48
|
- ext/hashset.c
|
49
49
|
- ext/q_match_all.c
|
50
50
|
- ext/bitvector.c
|