isomorfeus-ferret 0.17.2 → 0.17.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (133) hide show
  1. checksums.yaml +4 -4
  2. data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
  3. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
  5. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
  6. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
  7. data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
  8. data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
  9. data/ext/isomorfeus_ferret_ext/frb_index.c +161 -187
  10. data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
  11. data/ext/isomorfeus_ferret_ext/frb_search.c +77 -69
  12. data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
  13. data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
  14. data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
  15. data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
  16. data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
  17. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
  18. data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
  19. data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
  20. data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
  21. data/ext/isomorfeus_ferret_ext/frt_document.h +5 -33
  22. data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
  23. data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
  24. data/ext/isomorfeus_ferret_ext/frt_field_index.c +14 -33
  25. data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
  26. data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
  27. data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
  28. data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
  29. data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
  30. data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -2
  31. data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
  32. data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
  33. data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
  34. data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
  35. data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
  36. data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
  37. data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
  38. data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
  39. data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
  40. data/ext/isomorfeus_ferret_ext/frt_ind.c +21 -39
  41. data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
  42. data/ext/isomorfeus_ferret_ext/frt_index.c +334 -848
  43. data/ext/isomorfeus_ferret_ext/frt_index.h +4 -105
  44. data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
  45. data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
  46. data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
  47. data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
  48. data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
  49. data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
  50. data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
  51. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
  52. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
  53. data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
  54. data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
  55. data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
  56. data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
  57. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
  58. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
  59. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +131 -217
  60. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +2 -2
  61. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +4 -4
  62. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +18 -26
  63. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +27 -28
  64. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
  65. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +64 -116
  66. data/ext/isomorfeus_ferret_ext/frt_q_range.c +8 -14
  67. data/ext/isomorfeus_ferret_ext/frt_q_span.c +251 -365
  68. data/ext/isomorfeus_ferret_ext/frt_q_term.c +9 -9
  69. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
  70. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
  71. data/ext/isomorfeus_ferret_ext/frt_search.c +109 -191
  72. data/ext/isomorfeus_ferret_ext/frt_search.h +6 -6
  73. data/ext/isomorfeus_ferret_ext/frt_similarity.c +12 -23
  74. data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
  75. data/ext/isomorfeus_ferret_ext/frt_sort.c +20 -20
  76. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
  77. data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
  78. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
  79. data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
  80. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
  81. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
  82. data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
  83. data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
  84. data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
  85. data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
  86. data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
  87. data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
  88. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
  89. data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
  90. data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
  91. data/ext/isomorfeus_ferret_ext/test.c +41 -88
  92. data/ext/isomorfeus_ferret_ext/test.h +3 -6
  93. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
  94. data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
  95. data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
  96. data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
  97. data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
  98. data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
  99. data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
  100. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +16 -25
  101. data/ext/isomorfeus_ferret_ext/test_filter.c +22 -33
  102. data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
  103. data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
  104. data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
  105. data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
  106. data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
  107. data/ext/isomorfeus_ferret_ext/test_index.c +307 -519
  108. data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
  109. data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
  110. data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
  111. data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
  112. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
  113. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
  114. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
  115. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
  116. data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
  117. data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
  118. data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
  119. data/ext/isomorfeus_ferret_ext/test_search.c +66 -115
  120. data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
  121. data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
  122. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -27
  123. data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
  124. data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
  125. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
  126. data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
  127. data/ext/isomorfeus_ferret_ext/test_threading.c +15 -21
  128. data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
  129. data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
  130. data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
  131. data/lib/isomorfeus/ferret/index/index.rb +8 -8
  132. data/lib/isomorfeus/ferret/version.rb +1 -1
  133. metadata +32 -6
@@ -1,8 +1,6 @@
1
1
  #include "frt_index.h"
2
2
  #include "isomorfeus_ferret.h"
3
3
 
4
- extern VALUE rb_hash_update(int argc, VALUE *argv, VALUE self);
5
-
6
4
  extern VALUE sym_each;
7
5
  extern ID id_eql;
8
6
 
@@ -97,7 +95,7 @@ static VALUE frb_ld_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
97
95
  rLazyDoc *rld = DATA_PTR(self);
98
96
  VALUE rdata;
99
97
  if (lazy_df->size == 1) {
100
- char *data = frt_lazy_df_get_data(lazy_df, 0);
98
+ const char *data = frt_lazy_df_get_data(lazy_df, 0);
101
99
  rdata = rb_str_new(data, lazy_df->data[0].length);
102
100
  rb_enc_associate(rdata, lazy_df->data[0].encoding);
103
101
  } else {
@@ -105,7 +103,7 @@ static VALUE frb_ld_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
105
103
  VALUE rstr;
106
104
  rdata = rb_ary_new2(lazy_df->size);
107
105
  for (i = 0; i < lazy_df->size; i++) {
108
- char *data = frt_lazy_df_get_data(lazy_df, i);
106
+ const char *data = frt_lazy_df_get_data(lazy_df, i);
109
107
  rstr = rb_str_new(data, lazy_df->data[i].length);
110
108
  rb_enc_associate(rstr, lazy_df->data[i].encoding);
111
109
  rb_ary_store(rdata, i, rstr);
@@ -127,7 +125,7 @@ static VALUE frb_ld_load(VALUE self) {
127
125
  if (ld->loaded) return self;
128
126
  int i;
129
127
  FrtLazyDocField *lazy_df;
130
- for (i = 0; i < ld->size; i++) {
128
+ for (i = 0; i < ld->field_count; i++) {
131
129
  lazy_df = ld->fields[i];
132
130
  if (!(lazy_df->loaded)) frb_ld_df_load(self, ID2SYM(lazy_df->name), lazy_df);
133
131
  }
@@ -148,8 +146,8 @@ static VALUE frb_ld_fields(VALUE self) {
148
146
  VALUE rfields = rb_ivar_get(self, id_fields);
149
147
  if (rfields == Qnil) {
150
148
  int i;
151
- rfields = rb_ary_new2(ld->size);
152
- for (i = 0; i < ld->size; i++) {
149
+ rfields = rb_ary_new2(ld->field_count);
150
+ for (i = 0; i < ld->field_count; i++) {
153
151
  rb_ary_store(rfields, i, ID2SYM(ld->fields[i]->name));
154
152
  }
155
153
  rb_ivar_set(self, id_fields, rfields);
@@ -208,9 +206,9 @@ static VALUE frb_ld_equal(VALUE self, VALUE other) {
208
206
  rLazyDoc *other_rld;
209
207
  TypedData_Get_Struct(other, rLazyDoc, &frb_ld_t, other_rld);
210
208
  other_h = frb_ld_to_h(other);
211
- other_size = other_rld->doc->size;
209
+ other_size = other_rld->doc->field_count;
212
210
  }
213
- if (ld->size == other_size) {
211
+ if (ld->field_count == other_size) {
214
212
  VALUE self_h = frb_ld_to_h(self);
215
213
  return rb_funcall(self_h, id_equal, 1, other_h);
216
214
  }
@@ -278,7 +276,7 @@ static VALUE frb_ld_any(int argc, VALUE *argv, VALUE self) {
278
276
  FrtLazyDoc *ld = rld->doc;
279
277
  if (argc == 0) {
280
278
  if (!rb_block_given_p()) {
281
- return (ld->size > 0) ? Qtrue : Qfalse;
279
+ return (ld->field_count > 0) ? Qtrue : Qfalse;
282
280
  } else {
283
281
  if (!ld->loaded) frb_ld_load(self);
284
282
  VALUE res = Qnil;
@@ -375,7 +373,7 @@ static VALUE frb_ld_each_value(VALUE self) {
375
373
 
376
374
  static VALUE frb_ld_empty(VALUE self) {
377
375
  FrtLazyDoc *ld = ((rLazyDoc *)DATA_PTR(self))->doc;
378
- return (ld->size == 0) ? Qtrue : Qfalse;
376
+ return (ld->field_count == 0) ? Qtrue : Qfalse;
379
377
  }
380
378
 
381
379
  static VALUE frb_ld_eql(VALUE self, VALUE other) {
@@ -389,9 +387,9 @@ static VALUE frb_ld_eql(VALUE self, VALUE other) {
389
387
  } else {
390
388
  TypedData_Get_Struct(other, rLazyDoc, &frb_ld_t, other_rld);
391
389
  other_h = frb_ld_to_h(other);
392
- other_size = other_rld->doc->size;
390
+ other_size = other_rld->doc->field_count;
393
391
  }
394
- if (ld->size == other_size) {
392
+ if (ld->field_count == other_size) {
395
393
  VALUE self_h = frb_ld_to_h(self);
396
394
  return rb_funcall(self_h, id_eql, 1, other_h);
397
395
  }
@@ -475,7 +473,7 @@ static VALUE frb_ld_has_value(VALUE self, VALUE value) {
475
473
  if (!ld->loaded) frb_ld_load(self);
476
474
  int i;
477
475
  VALUE hvalue;
478
- for (i=0; i<ld->size; i++) {
476
+ for (i=0; i<ld->field_count; i++) {
479
477
  hvalue = (VALUE)frt_h_get(rld->hash, (void *)ID2SYM(ld->fields[i]->name));
480
478
  hvalue = rb_funcall(hvalue, id_equal, 1, value);
481
479
  if (hvalue == Qtrue) return Qtrue;
@@ -499,7 +497,7 @@ static VALUE frb_ld_key(VALUE self, VALUE value) {
499
497
  if (!ld->loaded) frb_ld_load(self);
500
498
  int i;
501
499
  VALUE hvalue;
502
- for (i=0; i<ld->size; i++) {
500
+ for (i=0; i<ld->field_count; i++) {
503
501
  hvalue = (VALUE)frt_h_get(rld->hash, (void *)ID2SYM(ld->fields[i]->name));
504
502
  hvalue = rb_funcall(hvalue, id_equal, 1, value);
505
503
  if (hvalue == Qtrue) return ID2SYM(ld->fields[i]->name);
@@ -509,7 +507,7 @@ static VALUE frb_ld_key(VALUE self, VALUE value) {
509
507
 
510
508
  static VALUE frb_ld_length(VALUE self) {
511
509
  FrtLazyDoc *ld = ((rLazyDoc *)DATA_PTR(self))->doc;
512
- return INT2FIX(ld->size);
510
+ return INT2FIX(ld->field_count);
513
511
  }
514
512
 
515
513
  static VALUE frb_ld_merge(int argc, VALUE *argv, VALUE self) {
@@ -6,7 +6,8 @@
6
6
 
7
7
  // #undef close
8
8
 
9
- VALUE mSearch;
9
+ static VALUE mSearch;
10
+ static VALUE mSpans;
10
11
 
11
12
  static VALUE cHit;
12
13
  static VALUE cTopDocs;
@@ -92,7 +93,7 @@ static VALUE sym_integer;
92
93
  static VALUE sym_float;
93
94
  static VALUE sym_string;
94
95
  static VALUE sym_auto;
95
- static VALUE sym_doc_id;
96
+ static VALUE sym_doc_num;
96
97
  static VALUE sym_score;
97
98
  static VALUE sym_byte;
98
99
 
@@ -145,7 +146,7 @@ extern VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc);
145
146
  ****************************************************************************/
146
147
 
147
148
  static VALUE frb_get_hit(FrtHit *hit) {
148
- return rb_struct_new(cHit, INT2FIX(hit->doc), rb_float_new((double)hit->score), NULL);
149
+ return rb_struct_new(cHit, INT2FIX(hit->doc_num), rb_float_new((double)hit->score), NULL);
149
150
  }
150
151
 
151
152
  /****************************************************************************
@@ -197,10 +198,10 @@ static VALUE frb_td_to_s(int argc, VALUE *argv, VALUE self) {
197
198
 
198
199
  for (i = 0; i < len; i++) {
199
200
  VALUE rhit = RARRAY_PTR(rhits)[i];
200
- int doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
201
+ int doc_num = FIX2INT(rb_funcall(rhit, id_doc, 0));
201
202
  const char *value = "";
202
203
  size_t value_len = 0;
203
- FrtLazyDoc *lzd = sea->get_lazy_doc(sea, doc_id);
204
+ FrtLazyDoc *lzd = sea->get_lazy_doc(sea, doc_num);
204
205
  FrtLazyDocField *lzdf = frt_lazy_doc_get(lzd, field);
205
206
  if (NULL != lzdf) {
206
207
  value = frt_lazy_df_get_data(lzdf, 0);
@@ -211,7 +212,7 @@ static VALUE frb_td_to_s(int argc, VALUE *argv, VALUE self) {
211
212
  FRT_REALLOC_N(str, char, capa);
212
213
  }
213
214
 
214
- sprintf(str + p, "\t%d \"%s\": %0.5f\n", doc_id, value,
215
+ sprintf(str + p, "\t%d \"%s\": %0.5f\n", doc_num, value,
215
216
  NUM2DBL(rb_funcall(rhit, id_score, 0)));
216
217
  p += strlen(str + p);
217
218
  frt_lazy_doc_close(lzd);
@@ -229,7 +230,7 @@ static char *frb_lzd_load_to_json(FrtLazyDoc *lzd, char **str, char *s, int *sle
229
230
  int len = diff, l;
230
231
  FrtLazyDocField *f;
231
232
 
232
- for (i = 0; i < lzd->size; i++) {
233
+ for (i = 0; i < lzd->field_count; i++) {
233
234
  f = lzd->fields[i];
234
235
  /* 3 times length of field to make space for quoted quotes ('"') and
235
236
  * 4 times field elements to make space for '"' around fields and ','
@@ -244,7 +245,7 @@ static char *frb_lzd_load_to_json(FrtLazyDoc *lzd, char **str, char *s, int *sle
244
245
  s = *str + diff;
245
246
  }
246
247
 
247
- for (i = 0; i < lzd->size; i++) {
248
+ for (i = 0; i < lzd->field_count; i++) {
248
249
  const char *field_name;
249
250
  f = lzd->fields[i];
250
251
  field_name = rb_id2name(f->name);
@@ -278,7 +279,7 @@ static VALUE frb_td_to_json(VALUE self) {
278
279
  FrtLazyDoc *lzd;
279
280
  FrtSearcher *sea = (FrtSearcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
280
281
  const int num_hits = RARRAY_LEN(rhits);
281
- int doc_id;
282
+ int doc_num;
282
283
  int len = 32768;
283
284
  char *str = FRT_ALLOC_N(char, len);
284
285
  char *s = str;
@@ -289,8 +290,8 @@ static VALUE frb_td_to_json(VALUE self) {
289
290
  if (i) *(s++) = ',';
290
291
  *(s++) = '{';
291
292
  rhit = RARRAY_PTR(rhits)[i];
292
- doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
293
- lzd = sea->get_lazy_doc(sea, doc_id);
293
+ doc_num = FIX2INT(rb_funcall(rhit, id_doc, 0));
294
+ lzd = sea->get_lazy_doc(sea, doc_num);
294
295
  s = frb_lzd_load_to_json(lzd, &str, s, &len);
295
296
  frt_lazy_doc_close(lzd);
296
297
  *(s++) = '}';
@@ -1292,8 +1293,7 @@ static VALUE frb_phq_init(int argc, VALUE *argv, VALUE self) {
1292
1293
  * # doesn't match => "big house"
1293
1294
  */
1294
1295
  static VALUE
1295
- frb_phq_add(int argc, VALUE *argv, VALUE self)
1296
- {
1296
+ frb_phq_add(int argc, VALUE *argv, VALUE self) {
1297
1297
  VALUE rterm, rpos_inc;
1298
1298
  int pos_inc = 1;
1299
1299
  FrtQuery *q = (FrtQuery *)DATA_PTR(self);
@@ -1688,8 +1688,7 @@ extern float frt_qp_default_fuzzy_min_sim;
1688
1688
  * Set the default value for +:min_similarity+
1689
1689
  */
1690
1690
  static VALUE
1691
- frb_fq_set_dms(VALUE self, VALUE val)
1692
- {
1691
+ frb_fq_set_dms(VALUE self, VALUE val) {
1693
1692
  double min_sim = NUM2DBL(val);
1694
1693
  if (min_sim >= 1.0) {
1695
1694
  rb_raise(rb_eArgError,
@@ -1710,8 +1709,7 @@ frb_fq_set_dms(VALUE self, VALUE val)
1710
1709
  * Get the default value for +:prefix_length+
1711
1710
  */
1712
1711
  static VALUE
1713
- frb_fq_get_dpl(VALUE self)
1714
- {
1712
+ frb_fq_get_dpl(VALUE self) {
1715
1713
  return rb_cvar_get(cFuzzyQuery, id_default_prefix_length);
1716
1714
  }
1717
1715
 
@@ -1723,8 +1721,7 @@ extern int frt_qp_default_fuzzy_pre_len;
1723
1721
  * Set the default value for +:prefix_length+
1724
1722
  */
1725
1723
  static VALUE
1726
- frb_fq_set_dpl(VALUE self, VALUE val)
1727
- {
1724
+ frb_fq_set_dpl(VALUE self, VALUE val) {
1728
1725
  int pre_len = FIX2INT(val);
1729
1726
  if (pre_len < 0) {
1730
1727
  rb_raise(rb_eArgError,
@@ -2695,7 +2692,7 @@ static int get_sort_type(VALUE rtype) {
2695
2692
  return FRT_SORT_TYPE_STRING;
2696
2693
  } else if (rtype == sym_score) {
2697
2694
  return FRT_SORT_TYPE_SCORE;
2698
- } else if (rtype == sym_doc_id) {
2695
+ } else if (rtype == sym_doc_num) {
2699
2696
  return FRT_SORT_TYPE_DOC;
2700
2697
  } else if (rtype == sym_float) {
2701
2698
  return FRT_SORT_TYPE_FLOAT;
@@ -2703,7 +2700,7 @@ static int get_sort_type(VALUE rtype) {
2703
2700
  return FRT_SORT_TYPE_AUTO;
2704
2701
  } else {
2705
2702
  rb_raise(rb_eArgError, ":%s is an unknown sort-type. Please choose "
2706
- "from [:integer, :float, :string, :auto, :score, :doc_id]",
2703
+ "from [:integer, :float, :string, :auto, :score, :doc_num]",
2707
2704
  rb_id2name(SYM2ID(rtype)));
2708
2705
  }
2709
2706
  return FRT_SORT_TYPE_DOC;
@@ -2720,7 +2717,7 @@ static int get_sort_type(VALUE rtype) {
2720
2717
  *
2721
2718
  * :type:: Default: +:auto+. Specifies how a field should be sorted.
2722
2719
  * Choose from one of; +:auto+, +:integer+, +:float+,
2723
- * +:string+, +:byte+, +:doc_id+ or +:score+. +:auto+ will
2720
+ * +:string+, +:byte+, +:doc_num+ or +:score+. +:auto+ will
2724
2721
  * check the datatype of the field by trying to parse it into
2725
2722
  * either a number or a float before settling on a string
2726
2723
  * sort. String sort is locale dependent and works for
@@ -2789,7 +2786,7 @@ static VALUE frb_sf_get_name(VALUE self) {
2789
2786
  * sort_field.type -> symbol
2790
2787
  *
2791
2788
  * Return the type of sort. Should be one of; +:auto+, +:integer+, +:float+,
2792
- * +:string+, +:byte+, +:doc_id+ or +:score+.
2789
+ * +:string+, +:byte+, +:doc_num+ or +:score+.
2793
2790
  */
2794
2791
  static VALUE frb_sf_get_type(VALUE self) {
2795
2792
  GET_SF();
@@ -2799,7 +2796,7 @@ static VALUE frb_sf_get_type(VALUE self) {
2799
2796
  case FRT_SORT_TYPE_FLOAT: return sym_float;
2800
2797
  case FRT_SORT_TYPE_STRING: return sym_string;
2801
2798
  case FRT_SORT_TYPE_AUTO: return sym_auto;
2802
- case FRT_SORT_TYPE_DOC: return sym_doc_id;
2799
+ case FRT_SORT_TYPE_DOC: return sym_doc_num;
2803
2800
  case FRT_SORT_TYPE_SCORE: return sym_score;
2804
2801
  }
2805
2802
  return Qnil;
@@ -2903,7 +2900,7 @@ static void frb_parse_sort_str(FrtSort *sort, char *xsort_str) {
2903
2900
 
2904
2901
  if (strcmp("SCORE", s) == 0) {
2905
2902
  sf = frt_sort_field_score_new(reverse);
2906
- } else if (strcmp("DOC_ID", s) == 0) {
2903
+ } else if (strcmp("DOC_NUM", s) == 0) {
2907
2904
  sf = frt_sort_field_doc_new(reverse);
2908
2905
  } else {
2909
2906
  sf = frt_sort_field_auto_new(rb_intern(s), reverse);
@@ -2942,7 +2939,7 @@ static void frb_sort_add(FrtSort *sort, VALUE rsf, bool reverse) {
2942
2939
  #define GET_SORT() FrtSort *sort = (FrtSort *)DATA_PTR(self)
2943
2940
  /*
2944
2941
  * call-seq:
2945
- * Sort.new(sort_fields = [SortField::SCORE, SortField::DOC_ID], reverse = false) -> Sort
2942
+ * Sort.new(sort_fields = [SortField::SCORE, SortField::DOC_NUM], reverse = false) -> Sort
2946
2943
  *
2947
2944
  * Create a new Sort object. If +reverse+ is true, all sort_fields will be
2948
2945
  * reversed so if any of them are already reversed the will be turned back
@@ -3064,16 +3061,32 @@ static VALUE frb_sea_doc_freq(VALUE self, VALUE rfield, VALUE rterm) {
3064
3061
 
3065
3062
  /*
3066
3063
  * call-seq:
3067
- * searcher.get_document(doc_id) -> LazyDoc
3068
- * searcher[doc_id] -> LazyDoc
3064
+ * searcher.get_document(doc_num) -> LazyDoc
3065
+ * searcher[doc_num] -> LazyDoc
3069
3066
  *
3070
3067
  * Retrieve a document from the index. See LazyDoc for more details on the
3071
3068
  * document returned. Documents are referenced internally by document ids
3072
3069
  * which are returned by the Searchers search methods.
3073
3070
  */
3074
- static VALUE frb_sea_doc(VALUE self, VALUE rdoc_id) {
3071
+ static VALUE frb_sea_doc(VALUE self, VALUE rdoc_num) {
3072
+ int ex_code = 0;
3073
+ const char *msg = NULL;
3075
3074
  GET_SEA();
3076
- return frb_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_id)));
3075
+ VALUE ld = Qnil;
3076
+
3077
+ FRT_TRY
3078
+ ld = frb_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_num)));
3079
+ FRT_XCATCHALL
3080
+ ex_code = xcontext.excode;
3081
+ msg = xcontext.msg;
3082
+ FRT_HANDLED();
3083
+ FRT_XENDTRY
3084
+
3085
+ if (ex_code && msg) {
3086
+ frb_raise(ex_code, msg);
3087
+ }
3088
+
3089
+ return ld;
3077
3090
  }
3078
3091
 
3079
3092
  /*
@@ -3085,13 +3098,13 @@ static VALUE frb_sea_doc(VALUE self, VALUE rdoc_id) {
3085
3098
  * there are no deletions, this number also refers to the number of documents
3086
3099
  * in the index.
3087
3100
  */
3088
- static VALUE frb_sea_max_doc(VALUE self) {
3101
+ static VALUE frb_sea_max_doc_num(VALUE self) {
3089
3102
  GET_SEA();
3090
- return INT2FIX(sea->max_doc(sea));
3103
+ return INT2FIX(sea->max_doc_num(sea));
3091
3104
  }
3092
3105
 
3093
- static float call_filter_proc(int doc_id, float score, FrtSearcher *sea, void *arg) {
3094
- VALUE val = rb_funcall((VALUE)arg, id_call, 3, INT2FIX(doc_id), rb_float_new((double)score), sea->rsea);
3106
+ static float call_filter_proc(int doc_num, float score, FrtSearcher *sea, void *arg) {
3107
+ VALUE val = rb_funcall((VALUE)arg, id_call, 3, INT2FIX(doc_num), rb_float_new((double)score), sea->rsea);
3095
3108
  switch (TYPE(val)) {
3096
3109
  case T_NIL:
3097
3110
  case T_FALSE:
@@ -3192,8 +3205,7 @@ static FrtTopDocs *frb_sea_search_internal(FrtQuery *query, VALUE roptions, FrtS
3192
3205
  post_filter_holder.filter_func = &call_filter_proc;
3193
3206
  post_filter_holder.arg = (void *)rval;
3194
3207
  post_filter = &post_filter_holder;
3195
- }
3196
- else {
3208
+ } else {
3197
3209
  post_filter = DATA_PTR(rval);
3198
3210
  }
3199
3211
  }
@@ -3249,7 +3261,7 @@ static FrtTopDocs *frb_sea_search_internal(FrtQuery *query, VALUE roptions, FrtS
3249
3261
  * to specify a fields type to sort it correctly. For more
3250
3262
  * on this, see the documentation for SortField
3251
3263
  * :filter:: a Filter object to filter the search results with
3252
- * :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
3264
+ * :filter_proc:: a filter Proc is a Proc which takes the doc_num, the score
3253
3265
  * and the Searcher object as its parameters and returns
3254
3266
  * either a Boolean value specifying whether the result
3255
3267
  * should be included in the result set, or a Float between 0
@@ -3268,13 +3280,13 @@ static VALUE frb_sea_search(int argc, VALUE *argv, VALUE self) {
3268
3280
 
3269
3281
  /*
3270
3282
  * call-seq:
3271
- * searcher.search_each(query, options = {}) {|doc_id, score| do_something}
3283
+ * searcher.search_each(query, options = {}) {|doc_num, score| do_something}
3272
3284
  * -> total_hits
3273
3285
  *
3274
3286
  * Run a query through the Searcher on the index. A TopDocs object is
3275
3287
  * returned with the relevant results. The +query+ is a Query object. The
3276
3288
  * Searcher#search_each method yields the internal document id (used to
3277
- * reference documents in the Searcher object like this; +searcher[doc_id]+)
3289
+ * reference documents in the Searcher object like this; +searcher[doc_num]+)
3278
3290
  * and the search score for that document. It is possible for the score to be
3279
3291
  * greater than 1.0 for some queries and taking boosts into account. This
3280
3292
  * method will also normalize scores to the range 0.0..1.0 when the max-score
@@ -3302,7 +3314,7 @@ static VALUE frb_sea_search(int argc, VALUE *argv, VALUE self) {
3302
3314
  * to specify a fields type to sort it correctly. For more
3303
3315
  * on this, see the documentation for SortField
3304
3316
  * :filter:: a Filter object to filter the search results with
3305
- * :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
3317
+ * :filter_proc:: a filter Proc is a Proc which takes the doc_num, the score
3306
3318
  * and the Searcher object as its parameters and returns a
3307
3319
  * Boolean value specifying whether the result should be
3308
3320
  * included in the result set.
@@ -3323,7 +3335,7 @@ static VALUE frb_sea_search_each(int argc, VALUE *argv, VALUE self) {
3323
3335
 
3324
3336
  /* yield normalized scores */
3325
3337
  for (i = 0; i < td->size; i++) {
3326
- rb_yield_values(2, INT2FIX(td->hits[i]->doc), rb_float_new((double)(td->hits[i]->score/max_score)));
3338
+ rb_yield_values(2, INT2FIX(td->hits[i]->doc_num), rb_float_new((double)(td->hits[i]->score/max_score)));
3327
3339
  }
3328
3340
 
3329
3341
  rtotal_hits = INT2FIX(td->total_hits);
@@ -3396,11 +3408,9 @@ static VALUE frb_sea_scan(int argc, VALUE *argv, VALUE self) {
3396
3408
  if (limit <= 0) {
3397
3409
  rb_raise(rb_eArgError, ":limit must be > 0");
3398
3410
  }
3399
- }
3400
- else if (rval == sym_all) {
3411
+ } else if (rval == sym_all) {
3401
3412
  limit = INT_MAX;
3402
- }
3403
- else {
3413
+ } else {
3404
3414
  rb_raise(rb_eArgError, "%s is not a sensible :limit value "
3405
3415
  "Please use a positive integer or :all",
3406
3416
  rs2s(rb_obj_as_string(rval)));
@@ -3420,14 +3430,14 @@ static VALUE frb_sea_scan(int argc, VALUE *argv, VALUE self) {
3420
3430
 
3421
3431
  /*
3422
3432
  * call-seq:
3423
- * searcher.explain(query, doc_id) -> Explanation
3433
+ * searcher.explain(query, doc_num) -> Explanation
3424
3434
  *
3425
3435
  * Create an explanation object to explain the score returned for a
3426
- * particular document at +doc_id+ in the index for the query +query+.
3436
+ * particular document at +doc_num+ in the index for the query +query+.
3427
3437
  *
3428
3438
  * Usually used like this;
3429
3439
  *
3430
- * puts searcher.explain(query, doc_id).to_s
3440
+ * puts searcher.explain(query, doc_num).to_s
3431
3441
  */
3432
3442
 
3433
3443
  static size_t frb_explanation_size(const void *p) {
@@ -3458,17 +3468,17 @@ static VALUE frb_expl_alloc(VALUE rclass) {
3458
3468
  return TypedData_Wrap_Struct(rclass, &frb_explanation_t, e);
3459
3469
  }
3460
3470
 
3461
- static VALUE frb_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id) {
3471
+ static VALUE frb_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_num) {
3462
3472
  GET_SEA();
3463
3473
  FrtQuery *query = DATA_PTR(rquery);
3464
3474
  FrtExplanation *expl;
3465
- expl = sea->explain(sea, query, FIX2INT(rdoc_id));
3475
+ expl = sea->explain(sea, query, FIX2INT(rdoc_num));
3466
3476
  return TypedData_Wrap_Struct(cExplanation, &frb_explanation_t, expl);
3467
3477
  }
3468
3478
 
3469
3479
  /*
3470
3480
  * call-seq:
3471
- * searcher.highlight(query, doc_id, field, options = {}) -> Array
3481
+ * searcher.highlight(query, doc_num, field, options = {}) -> Array
3472
3482
  *
3473
3483
  * Returns an array of strings with the matches highlighted.
3474
3484
  *
@@ -3490,7 +3500,7 @@ static VALUE frb_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id) {
3490
3500
  */
3491
3501
  static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
3492
3502
  GET_SEA();
3493
- VALUE rquery, rdoc_id, rfield, roptions, v;
3503
+ VALUE rquery, rdoc_num, rfield, roptions, v;
3494
3504
  int excerpt_length = 150;
3495
3505
  int num_excerpts = 2;
3496
3506
  const char *pre_tag = "<b>";
@@ -3498,7 +3508,7 @@ static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
3498
3508
  const char *ellipsis = "...";
3499
3509
  char **excerpts;
3500
3510
 
3501
- rb_scan_args(argc, argv, "31", &rquery, &rdoc_id, &rfield, &roptions);
3511
+ rb_scan_args(argc, argv, "31", &rquery, &rdoc_num, &rfield, &roptions);
3502
3512
  FrtQuery *query = DATA_PTR(rquery);
3503
3513
  if (argc > 3) {
3504
3514
  if (TYPE(roptions) != T_HASH) {
@@ -3511,8 +3521,7 @@ static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
3511
3521
  if (v == sym_all) {
3512
3522
  num_excerpts = 1;
3513
3523
  excerpt_length = INT_MAX/2;
3514
- }
3515
- else {
3524
+ } else {
3516
3525
  excerpt_length = FIX2INT(v);
3517
3526
  }
3518
3527
  }
@@ -3529,7 +3538,7 @@ static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
3529
3538
 
3530
3539
  if ((excerpts = frt_searcher_highlight(sea,
3531
3540
  query,
3532
- FIX2INT(rdoc_id),
3541
+ FIX2INT(rdoc_num),
3533
3542
  frb_field(rfield),
3534
3543
  excerpt_length,
3535
3544
  num_excerpts,
@@ -3605,6 +3614,7 @@ static VALUE frb_sea_init(VALUE self, VALUE obj) {
3605
3614
  if (TYPE(obj) == T_STRING) {
3606
3615
  frb_create_dir(obj);
3607
3616
  store = frt_open_mdbx_store(rs2s(obj));
3617
+ store->create_folder(store, segm_idx_name);
3608
3618
  ir = frt_ir_open(NULL, store);
3609
3619
  ir->rir = TypedData_Wrap_Struct(cIndexReader, &frb_index_reader_t, ir);
3610
3620
  } else {
@@ -3859,7 +3869,7 @@ static void Init_TopDocs(void) {
3859
3869
  *
3860
3870
  * == Example
3861
3871
  *
3862
- * puts searcher.explain(query, doc_id).to_s
3872
+ * puts searcher.explain(query, doc_num).to_s
3863
3873
  */
3864
3874
  static void Init_Explanation(void) {
3865
3875
  cExplanation = rb_define_class_under(mSearch, "Explanation", rb_cObject);
@@ -4738,7 +4748,7 @@ static void Init_Filter(void) {
4738
4748
  * * :float
4739
4749
  * * :string
4740
4750
  * * :byte
4741
- * * :doc_id
4751
+ * * :doc_num
4742
4752
  * * :score
4743
4753
  *
4744
4754
  * The type of the SortField is set by passing it as a parameter to the
@@ -4774,7 +4784,7 @@ static void Init_SortField(void) {
4774
4784
  sym_float = ID2SYM(rb_intern("float"));
4775
4785
  sym_string = ID2SYM(rb_intern("string"));
4776
4786
  sym_auto = ID2SYM(rb_intern("auto"));
4777
- sym_doc_id = ID2SYM(rb_intern("doc_id"));
4787
+ sym_doc_num = ID2SYM(rb_intern("doc_num"));
4778
4788
  sym_score = ID2SYM(rb_intern("score"));
4779
4789
  sym_byte = ID2SYM(rb_intern("byte"));
4780
4790
 
@@ -4794,12 +4804,12 @@ static void Init_SortField(void) {
4794
4804
  rb_define_const(cSortField, "SCORE_REV", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_SCORE_REV));
4795
4805
  FRT_SORT_FIELD_SCORE_REV->rfield = rb_const_get(cSortField, rb_intern("SCORE_REV"));
4796
4806
 
4797
- rb_define_const(cSortField, "DOC_ID", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_DOC));
4798
- oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("DOC_ID"));
4807
+ rb_define_const(cSortField, "DOC_NUM", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_DOC));
4808
+ oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("DOC_NUM"));
4799
4809
  FRT_SORT_FIELD_DOC->rfield = oSORT_FIELD_DOC;
4800
4810
 
4801
- rb_define_const(cSortField, "DOC_ID_REV", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_DOC_REV));
4802
- FRT_SORT_FIELD_DOC_REV->rfield = rb_const_get(cSortField, rb_intern("DOC_ID_REV"));
4811
+ rb_define_const(cSortField, "DOC_NUM_REV", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_DOC_REV));
4812
+ FRT_SORT_FIELD_DOC_REV->rfield = rb_const_get(cSortField, rb_intern("DOC_NUM_REV"));
4803
4813
  }
4804
4814
 
4805
4815
  /*
@@ -4861,8 +4871,8 @@ static void Init_Sort(void) {
4861
4871
  *
4862
4872
  * searcher.search_each(TermQuery.new(:content, "ferret")
4863
4873
  * :filter => RangeFilter.new(:date, :< => "2006"),
4864
- * :sort => "date DESC, title") do |doc_id, score|
4865
- * puts "#{searcher[doc_id][title] scored #{score}"
4874
+ * :sort => "date DESC, title") do |doc_num, score|
4875
+ * puts "#{searcher[doc_num][title] scored #{score}"
4866
4876
  * end
4867
4877
  */
4868
4878
  static void Init_Searcher(void) {
@@ -4892,7 +4902,7 @@ static void Init_Searcher(void) {
4892
4902
  rb_define_method(cSearcher, "doc_freq", frb_sea_doc_freq, 2);
4893
4903
  rb_define_method(cSearcher, "get_document", frb_sea_doc, 1);
4894
4904
  rb_define_method(cSearcher, "[]", frb_sea_doc, 1);
4895
- rb_define_method(cSearcher, "max_doc", frb_sea_max_doc, 0);
4905
+ rb_define_method(cSearcher, "max_doc_num", frb_sea_max_doc_num, 0);
4896
4906
  rb_define_method(cSearcher, "search", frb_sea_search, -1);
4897
4907
  rb_define_method(cSearcher, "search_each", frb_sea_search_each, -1);
4898
4908
  rb_define_method(cSearcher, "scan", frb_sea_scan, -1);
@@ -4934,9 +4944,7 @@ static void Init_MultiSearcher(void) {
4934
4944
  *
4935
4945
  * Happy Ferreting!!
4936
4946
  */
4937
- void
4938
- Init_Search(void)
4939
- {
4947
+ void Init_Search(void) {
4940
4948
  mSearch = rb_define_module_under(mFerret, "Search");
4941
4949
 
4942
4950
  fsym_id = rb_intern("id");