isomorfeus-ferret 0.17.3 → 0.17.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/ext/isomorfeus_ferret_ext/frb_index.c +48 -67
  3. data/ext/isomorfeus_ferret_ext/frb_search.c +47 -47
  4. data/ext/isomorfeus_ferret_ext/frt_document.h +3 -6
  5. data/ext/isomorfeus_ferret_ext/frt_field_index.c +1 -1
  6. data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -2
  7. data/ext/isomorfeus_ferret_ext/frt_ind.c +2 -2
  8. data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
  9. data/ext/isomorfeus_ferret_ext/frt_index.c +46 -62
  10. data/ext/isomorfeus_ferret_ext/frt_index.h +3 -3
  11. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +48 -48
  12. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +2 -2
  13. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +4 -4
  14. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +10 -10
  15. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +26 -26
  16. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -12
  17. data/ext/isomorfeus_ferret_ext/frt_q_range.c +2 -2
  18. data/ext/isomorfeus_ferret_ext/frt_q_span.c +144 -145
  19. data/ext/isomorfeus_ferret_ext/frt_q_term.c +9 -9
  20. data/ext/isomorfeus_ferret_ext/frt_search.c +31 -31
  21. data/ext/isomorfeus_ferret_ext/frt_search.h +6 -6
  22. data/ext/isomorfeus_ferret_ext/frt_similarity.c +1 -1
  23. data/ext/isomorfeus_ferret_ext/frt_sort.c +20 -20
  24. data/ext/isomorfeus_ferret_ext/test.c +1 -1
  25. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -2
  26. data/ext/isomorfeus_ferret_ext/test_filter.c +5 -6
  27. data/ext/isomorfeus_ferret_ext/test_index.c +30 -32
  28. data/ext/isomorfeus_ferret_ext/test_search.c +7 -7
  29. data/ext/isomorfeus_ferret_ext/test_sort.c +3 -3
  30. data/ext/isomorfeus_ferret_ext/test_threading.c +1 -1
  31. data/lib/isomorfeus/ferret/index/index.rb +7 -7
  32. data/lib/isomorfeus/ferret/version.rb +1 -1
  33. metadata +12 -6
@@ -25,8 +25,8 @@ static bool fqsc_next(FrtScorer *self) {
25
25
  FrtScorer *sub_sc = FQSc(self)->sub_scorer;
26
26
  FrtBitVector *bv = FQSc(self)->bv;
27
27
  while (sub_sc->next(sub_sc)) {
28
- self->doc = sub_sc->doc;
29
- if (frt_bv_get(bv, self->doc)) return true;
28
+ self->doc_num = sub_sc->doc_num;
29
+ if (frt_bv_get(bv, self->doc_num)) return true;
30
30
  }
31
31
  return false;
32
32
  }
@@ -36,8 +36,8 @@ static bool fqsc_skip_to(FrtScorer *self, int doc_num) {
36
36
  FrtBitVector *bv = FQSc(self)->bv;
37
37
  if (sub_sc->skip_to(sub_sc, doc_num)) {
38
38
  do {
39
- self->doc = sub_sc->doc;
40
- if (frt_bv_get(bv, self->doc)) {
39
+ self->doc_num = sub_sc->doc_num;
40
+ if (frt_bv_get(bv, self->doc_num)) {
41
41
  return true;
42
42
  }
43
43
  } while (sub_sc->next(sub_sc));
@@ -10,10 +10,10 @@
10
10
  #define MASc(scorer) ((MatchAllScorer *)(scorer))
11
11
 
12
12
  typedef struct MatchAllScorer {
13
- FrtScorer super;
14
- FrtIndexReader *ir;
15
- int max_doc;
16
- float score;
13
+ FrtScorer super;
14
+ FrtIndexReader *ir;
15
+ int max_doc_num;
16
+ float score;
17
17
  } MatchAllScorer;
18
18
 
19
19
  static float masc_score(FrtScorer *self) {
@@ -21,9 +21,9 @@ static float masc_score(FrtScorer *self) {
21
21
  }
22
22
 
23
23
  static bool masc_next(FrtScorer *self) {
24
- while (self->doc < (MASc(self)->max_doc - 1)) {
25
- self->doc++;
26
- if (!MASc(self)->ir->is_deleted(MASc(self)->ir, self->doc)) {
24
+ while (self->doc_num < (MASc(self)->max_doc_num - 1)) {
25
+ self->doc_num++;
26
+ if (!MASc(self)->ir->is_deleted(MASc(self)->ir, self->doc_num)) {
27
27
  return true;
28
28
  }
29
29
  }
@@ -31,7 +31,7 @@ static bool masc_next(FrtScorer *self) {
31
31
  }
32
32
 
33
33
  static bool masc_skip_to(FrtScorer *self, int doc_num) {
34
- self->doc = doc_num - 1;
34
+ self->doc_num = doc_num - 1;
35
35
  return masc_next(self);
36
36
  }
37
37
 
@@ -50,9 +50,9 @@ static FrtScorer *masc_new(FrtWeight *weight, FrtIndexReader *ir) {
50
50
  FrtScorer *self = frt_scorer_new(MatchAllScorer, weight->similarity);
51
51
  MASc(self)->ir = ir;
52
52
  FRT_REF(ir);
53
- MASc(self)->max_doc = ir->max_doc(ir);
53
+ MASc(self)->max_doc_num = ir->max_doc_num(ir);
54
54
  MASc(self)->score = weight->value;
55
- self->doc = -1;
55
+ self->doc_num = -1;
56
56
  self->score = &masc_score;
57
57
  self->next = &masc_next;
58
58
  self->skip_to = &masc_skip_to;
@@ -54,33 +54,33 @@ static BoostedTerm *boosted_term_new(const char *term, float boost) {
54
54
  #define TDE_READ_SIZE 16
55
55
 
56
56
  typedef struct TermDocEnumWrapper {
57
- const char *term;
57
+ const char *term;
58
58
  FrtTermDocEnum *tde;
59
- float boost;
60
- int doc;
61
- int freq;
62
- int docs[TDE_READ_SIZE];
63
- int freqs[TDE_READ_SIZE];
64
- int pointer;
65
- int pointer_max;
59
+ float boost;
60
+ int doc_num;
61
+ int freq;
62
+ int doc_nums[TDE_READ_SIZE];
63
+ int freqs[TDE_READ_SIZE];
64
+ int pointer;
65
+ int pointer_max;
66
66
  } TermDocEnumWrapper;
67
67
 
68
68
  static bool tdew_less_than(const TermDocEnumWrapper *tdew1, const TermDocEnumWrapper *tdew2) {
69
- return (tdew1->doc < tdew2->doc);
69
+ return (tdew1->doc_num < tdew2->doc_num);
70
70
  }
71
71
 
72
72
  static bool tdew_next(TermDocEnumWrapper *self) {
73
73
  self->pointer++;
74
74
  if (self->pointer >= self->pointer_max) {
75
75
  /* refill buffer */
76
- self->pointer_max = self->tde->read(self->tde, self->docs, self->freqs, TDE_READ_SIZE);
76
+ self->pointer_max = self->tde->read(self->tde, self->doc_nums, self->freqs, TDE_READ_SIZE);
77
77
  if (self->pointer_max != 0) {
78
78
  self->pointer = 0;
79
79
  } else {
80
80
  return false;
81
81
  }
82
82
  }
83
- self->doc = self->docs[self->pointer];
83
+ self->doc_num = self->doc_nums[self->pointer];
84
84
  self->freq = self->freqs[self->pointer];
85
85
  return true;
86
86
  }
@@ -89,8 +89,8 @@ static bool tdew_skip_to(TermDocEnumWrapper *self, int doc_num) {
89
89
  FrtTermDocEnum *tde = self->tde;
90
90
 
91
91
  while (++(self->pointer) < self->pointer_max) {
92
- if (self->docs[self->pointer] >= doc_num) {
93
- self->doc = self->docs[self->pointer];
92
+ if (self->doc_nums[self->pointer] >= doc_num) {
93
+ self->doc_num = self->doc_nums[self->pointer];
94
94
  self->freq = self->freqs[self->pointer];
95
95
  return true;
96
96
  }
@@ -100,7 +100,7 @@ static bool tdew_skip_to(TermDocEnumWrapper *self, int doc_num) {
100
100
  if (tde->skip_to(tde, doc_num)) {
101
101
  self->pointer_max = 1;
102
102
  self->pointer = 0;
103
- self->docs[0] = self->doc = tde->doc_num(tde);
103
+ self->doc_nums[0] = self->doc_num = tde->doc_num(tde);
104
104
  self->freqs[0] = self->freq = tde->freq(tde);
105
105
  return true;
106
106
  } else {
@@ -118,7 +118,7 @@ static TermDocEnumWrapper *tdew_new(const char *term, FrtTermDocEnum *tde, float
118
118
  self->term = term;
119
119
  self->tde = tde;
120
120
  self->boost = boost;
121
- self->doc = -1;
121
+ self->doc_num = -1;
122
122
  return self;
123
123
  }
124
124
 
@@ -144,11 +144,11 @@ typedef struct MultiTermScorer {
144
144
 
145
145
  static float multi_tsc_score(FrtScorer *self) {
146
146
  return MTSc(self)->total_score * MTSc(self)->weight_value
147
- * frt_sim_decode_norm(self->similarity, MTSc(self)->norms[self->doc]);
147
+ * frt_sim_decode_norm(self->similarity, MTSc(self)->norms[self->doc_num]);
148
148
  }
149
149
 
150
150
  static bool multi_tsc_next(FrtScorer *self) {
151
- int curr_doc;
151
+ int curr_doc_num;
152
152
  float total_score = 0.0f;
153
153
  TermDocEnumWrapper *tdew;
154
154
  MultiTermScorer *mtsc = MTSc(self);
@@ -170,7 +170,7 @@ static bool multi_tsc_next(FrtScorer *self) {
170
170
  return false;
171
171
  }
172
172
 
173
- self->doc = curr_doc = tdew->doc;
173
+ self->doc_num = curr_doc_num = tdew->doc_num;
174
174
  do {
175
175
  int freq = tdew->freq;
176
176
  if (freq < SCORE_CACHE_SIZE) {
@@ -186,7 +186,7 @@ static bool multi_tsc_next(FrtScorer *self) {
186
186
  }
187
187
 
188
188
  } while (((tdew = (TermDocEnumWrapper *)frt_pq_top(tdew_pq)) != NULL)
189
- && tdew->doc == curr_doc);
189
+ && tdew->doc_num == curr_doc_num);
190
190
  mtsc->total_score = total_score;
191
191
  return true;
192
192
  }
@@ -207,11 +207,11 @@ static bool multi_tsc_advance_to(FrtScorer *self, int target_doc_num) {
207
207
  MTSc(self)->tdew_pq = tdew_pq;
208
208
  }
209
209
  if (tdew_pq->size == 0) {
210
- self->doc = -1;
210
+ self->doc_num = -1;
211
211
  return false;
212
212
  }
213
213
  while ((tdew = (TermDocEnumWrapper *)frt_pq_top(tdew_pq)) != NULL
214
- && (target_doc_num > tdew->doc)) {
214
+ && (target_doc_num > tdew->doc_num)) {
215
215
  if (tdew_skip_to(tdew, target_doc_num)) {
216
216
  frt_pq_down(tdew_pq);
217
217
  } else {
@@ -231,11 +231,11 @@ static FrtExplanation *multi_tsc_explain(FrtScorer *self, int doc_num) {
231
231
  TermDocEnumWrapper *tdew;
232
232
 
233
233
  if (multi_tsc_advance_to(self, doc_num) &&
234
- (tdew = (TermDocEnumWrapper *)frt_pq_top(mtsc->tdew_pq))->doc == doc_num) {
234
+ (tdew = (TermDocEnumWrapper *)frt_pq_top(mtsc->tdew_pq))->doc_num == doc_num) {
235
235
 
236
236
  FrtPriorityQueue *tdew_pq = MTSc(self)->tdew_pq;
237
237
  FrtExplanation *expl = frt_expl_new(0.0f, "The sum of:");
238
- int curr_doc = self->doc = tdew->doc;
238
+ int curr_doc_num = self->doc_num = tdew->doc_num;
239
239
  float total_score = 0.0f;
240
240
 
241
241
  do {
@@ -256,7 +256,7 @@ static FrtExplanation *multi_tsc_explain(FrtScorer *self, int doc_num) {
256
256
  }
257
257
 
258
258
  } while (((tdew = (TermDocEnumWrapper *)frt_pq_top(tdew_pq)) != NULL)
259
- && tdew->doc == curr_doc);
259
+ && tdew->doc_num == curr_doc_num);
260
260
  expl->value = total_score;
261
261
  return expl;
262
262
  } else {
@@ -444,7 +444,7 @@ static FrtExplanation *multi_tw_explain(FrtWeight *self, FrtIndexReader *ir, int
444
444
  static FrtWeight *multi_tw_new(FrtQuery *query, FrtSearcher *searcher) {
445
445
  int i;
446
446
  int doc_freq = 0;
447
- FrtWeight *self = w_new(FrtWeight, query);
447
+ FrtWeight *self = w_new(FrtWeight, query);
448
448
  FrtPriorityQueue *bt_pq = MTQ(query)->boosted_terms;
449
449
 
450
450
  self->scorer = &multi_tw_scorer;
@@ -460,7 +460,7 @@ static FrtWeight *multi_tw_new(FrtQuery *query, FrtSearcher *searcher) {
460
460
  ((BoostedTerm *)bt_pq->heap[i])->term);
461
461
  }
462
462
  self->idf += frt_sim_idf(self->similarity, doc_freq,
463
- searcher->max_doc(searcher));
463
+ searcher->max_doc_num(searcher));
464
464
 
465
465
  return self;
466
466
  }
@@ -42,7 +42,7 @@ typedef struct PhPos {
42
42
  FrtTermDocEnum *tpe;
43
43
  int offset;
44
44
  int count;
45
- int doc;
45
+ int doc_num;
46
46
  int position;
47
47
  } PhPos;
48
48
 
@@ -53,10 +53,10 @@ static bool pp_next(PhPos *self) {
53
53
  if (!tpe->next(tpe)) {
54
54
  tpe->close(tpe); /* close stream */
55
55
  self->tpe = NULL;
56
- self->doc = INT_MAX; /* sentinel value */
56
+ self->doc_num = INT_MAX; /* sentinel value */
57
57
  return false;
58
58
  }
59
- self->doc = tpe->doc_num(tpe);
59
+ self->doc_num = tpe->doc_num(tpe);
60
60
  self->position = 0;
61
61
  return true;
62
62
  }
@@ -68,10 +68,10 @@ static bool pp_skip_to(PhPos *self, int doc_num) {
68
68
  if (!tpe->skip_to(tpe, doc_num)) {
69
69
  tpe->close(tpe); /* close stream */
70
70
  self->tpe = NULL;
71
- self->doc = INT_MAX; /* sentinel value */
71
+ self->doc_num = INT_MAX; /* sentinel value */
72
72
  return false;
73
73
  }
74
- self->doc = tpe->doc_num(tpe);
74
+ self->doc_num = tpe->doc_num(tpe);
75
75
  self->position = 0;
76
76
  return true;
77
77
  }
@@ -95,7 +95,7 @@ static bool pp_first_position(PhPos *self) {
95
95
 
96
96
  #define PP_pp(p) (*(PhPos **)p)
97
97
  static int pp_cmp(const void *const p1, const void *const p2) {
98
- int cmp = PP_pp(p1)->doc - PP_pp(p2)->doc;
98
+ int cmp = PP_pp(p1)->doc_num - PP_pp(p2)->doc_num;
99
99
  if (cmp == 0) {
100
100
  cmp = PP_pp(p1)->position - PP_pp(p2)->position;
101
101
  if (cmp == 0) {
@@ -128,7 +128,7 @@ static PhPos *pp_new(FrtTermDocEnum *tpe, int offset) {
128
128
  PhPos *self = FRT_ALLOC(PhPos);
129
129
 
130
130
  self->tpe = tpe;
131
- self->count = self->doc = self->position = -1;
131
+ self->count = self->doc_num = self->position = -1;
132
132
  self->offset = offset;
133
133
 
134
134
  return self;
@@ -179,9 +179,9 @@ static bool phsc_do_next(FrtScorer *self) {
179
179
  PhPos *last = phrase_positions[FRT_PREV_NUM(pp_first_idx, pp_cnt)];
180
180
  while (phsc->more) {
181
181
  /* find doc with all the terms */
182
- while (phsc->more && first->doc < last->doc) {
182
+ while (phsc->more && first->doc_num < last->doc_num) {
183
183
  /* skip first upto last */
184
- phsc->more = pp_skip_to(first, last->doc);
184
+ phsc->more = pp_skip_to(first, last->doc_num);
185
185
  last = first;
186
186
  pp_first_idx = FRT_NEXT_NUM(pp_first_idx, pp_cnt);
187
187
  first = phrase_positions[pp_first_idx];
@@ -200,7 +200,7 @@ static bool phsc_do_next(FrtScorer *self) {
200
200
  last = phrase_positions[FRT_PREV_NUM(pp_first_idx, pp_cnt)];
201
201
  phsc->more = pp_next(last); /* trigger further scanning */
202
202
  } else {
203
- self->doc = first->doc;
203
+ self->doc_num = first->doc_num;
204
204
  return true; /* found a match */
205
205
  }
206
206
 
@@ -215,7 +215,7 @@ static float phsc_score(FrtScorer *self) {
215
215
  /* normalize */
216
216
  return raw_score * frt_sim_decode_norm(
217
217
  self->similarity,
218
- phsc->norms[self->doc]);
218
+ phsc->norms[self->doc_num]);
219
219
  }
220
220
 
221
221
  static bool phsc_next(FrtScorer *self) {
@@ -253,7 +253,7 @@ static FrtExplanation *phsc_explain(FrtScorer *self, int doc_num) {
253
253
 
254
254
  phsc_skip_to(self, doc_num);
255
255
 
256
- phrase_freq = (self->doc == doc_num) ? phsc->freq : 0.0f;
256
+ phrase_freq = (self->doc_num == doc_num) ? phsc->freq : 0.0f;
257
257
  return frt_expl_new(frt_sim_tf(self->similarity, phrase_freq),
258
258
  "tf(phrase_freq=%f)", phrase_freq);
259
259
  }
@@ -186,7 +186,7 @@ static char *frt_rfilt_to_s(FrtFilter *filt) {
186
186
  }
187
187
 
188
188
  static FrtBitVector *frt_rfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir) {
189
- FrtBitVector *bv = frt_bv_new_capa(ir->max_doc(ir));
189
+ FrtBitVector *bv = frt_bv_new_capa(ir->max_doc_num(ir));
190
190
  FrtRange *range = RF(filt)->range;
191
191
  FrtFieldInfo *fi = frt_fis_get_field(ir->fis, range->field);
192
192
  /* the field info exists we need to add docs to the bit vector, otherwise
@@ -318,7 +318,7 @@ static FrtBitVector *frt_trfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir) {
318
318
  if ((!lt || (sscanf(lt, "%lg%n", &lnum, &len) && (int)strlen(lt) == len)) &&
319
319
  (!ut || (sscanf(ut, "%lg%n", &unum, &len) && (int)strlen(ut) == len)))
320
320
  {
321
- FrtBitVector *bv = frt_bv_new_capa(ir->max_doc(ir));
321
+ FrtBitVector *bv = frt_bv_new_capa(ir->max_doc_num(ir));
322
322
  FrtFieldInfo *fi = frt_fis_get_field(ir->fis, range->field);
323
323
  /* the field info exists we need to add docs to the bit vector,
324
324
  * otherwise we just return an empty bit vector */