isomorfeus-ferret 0.17.2 → 0.17.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. checksums.yaml +4 -4
  2. data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
  3. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
  5. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
  6. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
  7. data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
  8. data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
  9. data/ext/isomorfeus_ferret_ext/frb_index.c +118 -125
  10. data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
  11. data/ext/isomorfeus_ferret_ext/frb_search.c +31 -23
  12. data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
  13. data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
  14. data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
  15. data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
  16. data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
  17. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
  18. data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
  19. data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
  20. data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
  21. data/ext/isomorfeus_ferret_ext/frt_document.h +2 -27
  22. data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
  23. data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
  24. data/ext/isomorfeus_ferret_ext/frt_field_index.c +13 -32
  25. data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
  26. data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
  27. data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
  28. data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
  29. data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
  30. data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
  31. data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
  32. data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
  33. data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
  34. data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
  35. data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
  36. data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
  37. data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
  38. data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
  39. data/ext/isomorfeus_ferret_ext/frt_ind.c +20 -38
  40. data/ext/isomorfeus_ferret_ext/frt_index.c +292 -790
  41. data/ext/isomorfeus_ferret_ext/frt_index.h +1 -102
  42. data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
  43. data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
  44. data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
  45. data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
  46. data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
  47. data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
  48. data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
  49. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
  50. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
  51. data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
  52. data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
  53. data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
  54. data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
  55. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
  56. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
  57. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +85 -171
  58. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -16
  59. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +1 -2
  60. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
  61. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +52 -104
  62. data/ext/isomorfeus_ferret_ext/frt_q_range.c +6 -12
  63. data/ext/isomorfeus_ferret_ext/frt_q_span.c +113 -226
  64. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
  65. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
  66. data/ext/isomorfeus_ferret_ext/frt_search.c +82 -164
  67. data/ext/isomorfeus_ferret_ext/frt_similarity.c +11 -22
  68. data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
  69. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
  70. data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
  71. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
  72. data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
  73. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
  74. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
  75. data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
  76. data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
  77. data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
  78. data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
  79. data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
  80. data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
  81. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
  82. data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
  83. data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
  84. data/ext/isomorfeus_ferret_ext/test.c +40 -87
  85. data/ext/isomorfeus_ferret_ext/test.h +3 -6
  86. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
  87. data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
  88. data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
  89. data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
  90. data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
  91. data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
  92. data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
  93. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +15 -23
  94. data/ext/isomorfeus_ferret_ext/test_filter.c +17 -27
  95. data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
  96. data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
  97. data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
  98. data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
  99. data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
  100. data/ext/isomorfeus_ferret_ext/test_index.c +277 -487
  101. data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
  102. data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
  103. data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
  104. data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
  105. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
  106. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
  107. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
  108. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
  109. data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
  110. data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
  111. data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
  112. data/ext/isomorfeus_ferret_ext/test_search.c +60 -109
  113. data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
  114. data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
  115. data/ext/isomorfeus_ferret_ext/test_sort.c +14 -24
  116. data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
  117. data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
  118. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
  119. data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
  120. data/ext/isomorfeus_ferret_ext/test_threading.c +14 -20
  121. data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
  122. data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
  123. data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
  124. data/lib/isomorfeus/ferret/index/index.rb +1 -1
  125. data/lib/isomorfeus/ferret/version.rb +1 -1
  126. metadata +24 -4
@@ -1,8 +1,6 @@
1
1
  #include "frt_index.h"
2
2
  #include "isomorfeus_ferret.h"
3
3
 
4
- extern VALUE rb_hash_update(int argc, VALUE *argv, VALUE self);
5
-
6
4
  extern VALUE sym_each;
7
5
  extern ID id_eql;
8
6
 
@@ -97,7 +95,7 @@ static VALUE frb_ld_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
97
95
  rLazyDoc *rld = DATA_PTR(self);
98
96
  VALUE rdata;
99
97
  if (lazy_df->size == 1) {
100
- char *data = frt_lazy_df_get_data(lazy_df, 0);
98
+ const char *data = frt_lazy_df_get_data(lazy_df, 0);
101
99
  rdata = rb_str_new(data, lazy_df->data[0].length);
102
100
  rb_enc_associate(rdata, lazy_df->data[0].encoding);
103
101
  } else {
@@ -105,7 +103,7 @@ static VALUE frb_ld_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
105
103
  VALUE rstr;
106
104
  rdata = rb_ary_new2(lazy_df->size);
107
105
  for (i = 0; i < lazy_df->size; i++) {
108
- char *data = frt_lazy_df_get_data(lazy_df, i);
106
+ const char *data = frt_lazy_df_get_data(lazy_df, i);
109
107
  rstr = rb_str_new(data, lazy_df->data[i].length);
110
108
  rb_enc_associate(rstr, lazy_df->data[i].encoding);
111
109
  rb_ary_store(rdata, i, rstr);
@@ -127,7 +125,7 @@ static VALUE frb_ld_load(VALUE self) {
127
125
  if (ld->loaded) return self;
128
126
  int i;
129
127
  FrtLazyDocField *lazy_df;
130
- for (i = 0; i < ld->size; i++) {
128
+ for (i = 0; i < ld->field_count; i++) {
131
129
  lazy_df = ld->fields[i];
132
130
  if (!(lazy_df->loaded)) frb_ld_df_load(self, ID2SYM(lazy_df->name), lazy_df);
133
131
  }
@@ -148,8 +146,8 @@ static VALUE frb_ld_fields(VALUE self) {
148
146
  VALUE rfields = rb_ivar_get(self, id_fields);
149
147
  if (rfields == Qnil) {
150
148
  int i;
151
- rfields = rb_ary_new2(ld->size);
152
- for (i = 0; i < ld->size; i++) {
149
+ rfields = rb_ary_new2(ld->field_count);
150
+ for (i = 0; i < ld->field_count; i++) {
153
151
  rb_ary_store(rfields, i, ID2SYM(ld->fields[i]->name));
154
152
  }
155
153
  rb_ivar_set(self, id_fields, rfields);
@@ -208,9 +206,9 @@ static VALUE frb_ld_equal(VALUE self, VALUE other) {
208
206
  rLazyDoc *other_rld;
209
207
  TypedData_Get_Struct(other, rLazyDoc, &frb_ld_t, other_rld);
210
208
  other_h = frb_ld_to_h(other);
211
- other_size = other_rld->doc->size;
209
+ other_size = other_rld->doc->field_count;
212
210
  }
213
- if (ld->size == other_size) {
211
+ if (ld->field_count == other_size) {
214
212
  VALUE self_h = frb_ld_to_h(self);
215
213
  return rb_funcall(self_h, id_equal, 1, other_h);
216
214
  }
@@ -278,7 +276,7 @@ static VALUE frb_ld_any(int argc, VALUE *argv, VALUE self) {
278
276
  FrtLazyDoc *ld = rld->doc;
279
277
  if (argc == 0) {
280
278
  if (!rb_block_given_p()) {
281
- return (ld->size > 0) ? Qtrue : Qfalse;
279
+ return (ld->field_count > 0) ? Qtrue : Qfalse;
282
280
  } else {
283
281
  if (!ld->loaded) frb_ld_load(self);
284
282
  VALUE res = Qnil;
@@ -375,7 +373,7 @@ static VALUE frb_ld_each_value(VALUE self) {
375
373
 
376
374
  static VALUE frb_ld_empty(VALUE self) {
377
375
  FrtLazyDoc *ld = ((rLazyDoc *)DATA_PTR(self))->doc;
378
- return (ld->size == 0) ? Qtrue : Qfalse;
376
+ return (ld->field_count == 0) ? Qtrue : Qfalse;
379
377
  }
380
378
 
381
379
  static VALUE frb_ld_eql(VALUE self, VALUE other) {
@@ -389,9 +387,9 @@ static VALUE frb_ld_eql(VALUE self, VALUE other) {
389
387
  } else {
390
388
  TypedData_Get_Struct(other, rLazyDoc, &frb_ld_t, other_rld);
391
389
  other_h = frb_ld_to_h(other);
392
- other_size = other_rld->doc->size;
390
+ other_size = other_rld->doc->field_count;
393
391
  }
394
- if (ld->size == other_size) {
392
+ if (ld->field_count == other_size) {
395
393
  VALUE self_h = frb_ld_to_h(self);
396
394
  return rb_funcall(self_h, id_eql, 1, other_h);
397
395
  }
@@ -475,7 +473,7 @@ static VALUE frb_ld_has_value(VALUE self, VALUE value) {
475
473
  if (!ld->loaded) frb_ld_load(self);
476
474
  int i;
477
475
  VALUE hvalue;
478
- for (i=0; i<ld->size; i++) {
476
+ for (i=0; i<ld->field_count; i++) {
479
477
  hvalue = (VALUE)frt_h_get(rld->hash, (void *)ID2SYM(ld->fields[i]->name));
480
478
  hvalue = rb_funcall(hvalue, id_equal, 1, value);
481
479
  if (hvalue == Qtrue) return Qtrue;
@@ -499,7 +497,7 @@ static VALUE frb_ld_key(VALUE self, VALUE value) {
499
497
  if (!ld->loaded) frb_ld_load(self);
500
498
  int i;
501
499
  VALUE hvalue;
502
- for (i=0; i<ld->size; i++) {
500
+ for (i=0; i<ld->field_count; i++) {
503
501
  hvalue = (VALUE)frt_h_get(rld->hash, (void *)ID2SYM(ld->fields[i]->name));
504
502
  hvalue = rb_funcall(hvalue, id_equal, 1, value);
505
503
  if (hvalue == Qtrue) return ID2SYM(ld->fields[i]->name);
@@ -509,7 +507,7 @@ static VALUE frb_ld_key(VALUE self, VALUE value) {
509
507
 
510
508
  static VALUE frb_ld_length(VALUE self) {
511
509
  FrtLazyDoc *ld = ((rLazyDoc *)DATA_PTR(self))->doc;
512
- return INT2FIX(ld->size);
510
+ return INT2FIX(ld->field_count);
513
511
  }
514
512
 
515
513
  static VALUE frb_ld_merge(int argc, VALUE *argv, VALUE self) {
@@ -6,7 +6,8 @@
6
6
 
7
7
  // #undef close
8
8
 
9
- VALUE mSearch;
9
+ static VALUE mSearch;
10
+ static VALUE mSpans;
10
11
 
11
12
  static VALUE cHit;
12
13
  static VALUE cTopDocs;
@@ -229,7 +230,7 @@ static char *frb_lzd_load_to_json(FrtLazyDoc *lzd, char **str, char *s, int *sle
229
230
  int len = diff, l;
230
231
  FrtLazyDocField *f;
231
232
 
232
- for (i = 0; i < lzd->size; i++) {
233
+ for (i = 0; i < lzd->field_count; i++) {
233
234
  f = lzd->fields[i];
234
235
  /* 3 times length of field to make space for quoted quotes ('"') and
235
236
  * 4 times field elements to make space for '"' around fields and ','
@@ -244,7 +245,7 @@ static char *frb_lzd_load_to_json(FrtLazyDoc *lzd, char **str, char *s, int *sle
244
245
  s = *str + diff;
245
246
  }
246
247
 
247
- for (i = 0; i < lzd->size; i++) {
248
+ for (i = 0; i < lzd->field_count; i++) {
248
249
  const char *field_name;
249
250
  f = lzd->fields[i];
250
251
  field_name = rb_id2name(f->name);
@@ -1292,8 +1293,7 @@ static VALUE frb_phq_init(int argc, VALUE *argv, VALUE self) {
1292
1293
  * # doesn't match => "big house"
1293
1294
  */
1294
1295
  static VALUE
1295
- frb_phq_add(int argc, VALUE *argv, VALUE self)
1296
- {
1296
+ frb_phq_add(int argc, VALUE *argv, VALUE self) {
1297
1297
  VALUE rterm, rpos_inc;
1298
1298
  int pos_inc = 1;
1299
1299
  FrtQuery *q = (FrtQuery *)DATA_PTR(self);
@@ -1688,8 +1688,7 @@ extern float frt_qp_default_fuzzy_min_sim;
1688
1688
  * Set the default value for +:min_similarity+
1689
1689
  */
1690
1690
  static VALUE
1691
- frb_fq_set_dms(VALUE self, VALUE val)
1692
- {
1691
+ frb_fq_set_dms(VALUE self, VALUE val) {
1693
1692
  double min_sim = NUM2DBL(val);
1694
1693
  if (min_sim >= 1.0) {
1695
1694
  rb_raise(rb_eArgError,
@@ -1710,8 +1709,7 @@ frb_fq_set_dms(VALUE self, VALUE val)
1710
1709
  * Get the default value for +:prefix_length+
1711
1710
  */
1712
1711
  static VALUE
1713
- frb_fq_get_dpl(VALUE self)
1714
- {
1712
+ frb_fq_get_dpl(VALUE self) {
1715
1713
  return rb_cvar_get(cFuzzyQuery, id_default_prefix_length);
1716
1714
  }
1717
1715
 
@@ -1723,8 +1721,7 @@ extern int frt_qp_default_fuzzy_pre_len;
1723
1721
  * Set the default value for +:prefix_length+
1724
1722
  */
1725
1723
  static VALUE
1726
- frb_fq_set_dpl(VALUE self, VALUE val)
1727
- {
1724
+ frb_fq_set_dpl(VALUE self, VALUE val) {
1728
1725
  int pre_len = FIX2INT(val);
1729
1726
  if (pre_len < 0) {
1730
1727
  rb_raise(rb_eArgError,
@@ -3072,8 +3069,24 @@ static VALUE frb_sea_doc_freq(VALUE self, VALUE rfield, VALUE rterm) {
3072
3069
  * which are returned by the Searchers search methods.
3073
3070
  */
3074
3071
  static VALUE frb_sea_doc(VALUE self, VALUE rdoc_id) {
3072
+ int ex_code = 0;
3073
+ const char *msg = NULL;
3075
3074
  GET_SEA();
3076
- return frb_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_id)));
3075
+ VALUE ld = Qnil;
3076
+
3077
+ FRT_TRY
3078
+ ld = frb_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_id)));
3079
+ FRT_XCATCHALL
3080
+ ex_code = xcontext.excode;
3081
+ msg = xcontext.msg;
3082
+ FRT_HANDLED();
3083
+ FRT_XENDTRY
3084
+
3085
+ if (ex_code && msg) {
3086
+ frb_raise(ex_code, msg);
3087
+ }
3088
+
3089
+ return ld;
3077
3090
  }
3078
3091
 
3079
3092
  /*
@@ -3192,8 +3205,7 @@ static FrtTopDocs *frb_sea_search_internal(FrtQuery *query, VALUE roptions, FrtS
3192
3205
  post_filter_holder.filter_func = &call_filter_proc;
3193
3206
  post_filter_holder.arg = (void *)rval;
3194
3207
  post_filter = &post_filter_holder;
3195
- }
3196
- else {
3208
+ } else {
3197
3209
  post_filter = DATA_PTR(rval);
3198
3210
  }
3199
3211
  }
@@ -3396,11 +3408,9 @@ static VALUE frb_sea_scan(int argc, VALUE *argv, VALUE self) {
3396
3408
  if (limit <= 0) {
3397
3409
  rb_raise(rb_eArgError, ":limit must be > 0");
3398
3410
  }
3399
- }
3400
- else if (rval == sym_all) {
3411
+ } else if (rval == sym_all) {
3401
3412
  limit = INT_MAX;
3402
- }
3403
- else {
3413
+ } else {
3404
3414
  rb_raise(rb_eArgError, "%s is not a sensible :limit value "
3405
3415
  "Please use a positive integer or :all",
3406
3416
  rs2s(rb_obj_as_string(rval)));
@@ -3511,8 +3521,7 @@ static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
3511
3521
  if (v == sym_all) {
3512
3522
  num_excerpts = 1;
3513
3523
  excerpt_length = INT_MAX/2;
3514
- }
3515
- else {
3524
+ } else {
3516
3525
  excerpt_length = FIX2INT(v);
3517
3526
  }
3518
3527
  }
@@ -3605,6 +3614,7 @@ static VALUE frb_sea_init(VALUE self, VALUE obj) {
3605
3614
  if (TYPE(obj) == T_STRING) {
3606
3615
  frb_create_dir(obj);
3607
3616
  store = frt_open_mdbx_store(rs2s(obj));
3617
+ store->create_folder(store, segm_idx_name);
3608
3618
  ir = frt_ir_open(NULL, store);
3609
3619
  ir->rir = TypedData_Wrap_Struct(cIndexReader, &frb_index_reader_t, ir);
3610
3620
  } else {
@@ -4934,9 +4944,7 @@ static void Init_MultiSearcher(void) {
4934
4944
  *
4935
4945
  * Happy Ferreting!!
4936
4946
  */
4937
- void
4938
- Init_Search(void)
4939
- {
4947
+ void Init_Search(void) {
4940
4948
  mSearch = rb_define_module_under(mFerret, "Search");
4941
4949
 
4942
4950
  fsym_id = rb_intern("id");
@@ -8,8 +8,10 @@ static ID id_ref_cnt;
8
8
  VALUE cLock;
9
9
  VALUE cLockError;
10
10
  VALUE cDirectory;
11
- VALUE cRAMDirectory;
12
11
  VALUE cMDBXDirectory;
12
+ VALUE cRAMDirectory;
13
+ VALUE cObjectStore;
14
+ VALUE mStore;
13
15
 
14
16
  /****************************************************************************
15
17
  * Lock Methods
@@ -307,7 +309,7 @@ static VALUE frb_dir_close(VALUE self) {
307
309
 
308
310
  /*
309
311
  * call-seq:
310
- * dir.exists?(file_name) -> nil
312
+ * dir.exist?(file_name) -> nil
311
313
  *
312
314
  * Return true if a file with the name +file_name+ exists in the directory.
313
315
  */
@@ -319,7 +321,7 @@ static VALUE frb_dir_exists(VALUE self, VALUE rfname) {
319
321
  bool res;
320
322
 
321
323
  FRT_TRY
322
- res = store->exists(store, rs2s(rfname));
324
+ res = store->exists(store, segm_idx_name, rs2s(rfname));
323
325
  FRT_XCATCHALL
324
326
  ex_code = xcontext.excode;
325
327
  msg = xcontext.msg;
@@ -346,7 +348,7 @@ static VALUE frb_dir_touch(VALUE self, VALUE rfname) {
346
348
  StringValue(rfname);
347
349
 
348
350
  FRT_TRY
349
- store->touch(store, rs2s(rfname));
351
+ store->touch(store, segm_idx_name, rs2s(rfname));
350
352
  FRT_XCATCHALL
351
353
  ex_code = xcontext.excode;
352
354
  msg = xcontext.msg;
@@ -373,7 +375,7 @@ static VALUE frb_dir_delete(VALUE self, VALUE rfname) {
373
375
  StringValue(rfname);
374
376
  bool res;
375
377
  FRT_TRY
376
- res = (store->remove(store, rs2s(rfname)) == 0);
378
+ res = (store->remove(store, segm_idx_name, rs2s(rfname)) == 0);
377
379
  FRT_XCATCHALL
378
380
  ex_code = xcontext.excode;
379
381
  msg = xcontext.msg;
@@ -399,7 +401,7 @@ static VALUE frb_dir_file_count(VALUE self) {
399
401
  FrtStore *store = DATA_PTR(self);
400
402
  int cnt = 0;
401
403
  FRT_TRY
402
- cnt = INT2FIX(store->count(store));
404
+ cnt = INT2FIX(store->count(store, segm_idx_name));
403
405
  FRT_XCATCHALL
404
406
  ex_code = xcontext.excode;
405
407
  msg = xcontext.msg;
@@ -425,7 +427,7 @@ static VALUE frb_dir_refresh(VALUE self) {
425
427
  FrtStore *store = DATA_PTR(self);
426
428
 
427
429
  FRT_TRY
428
- store->clear_all(store);
430
+ store->clear_all(store, segm_idx_name);
429
431
  FRT_XCATCHALL
430
432
  ex_code = xcontext.excode;
431
433
  msg = xcontext.msg;
@@ -454,7 +456,7 @@ static VALUE frb_dir_rename(VALUE self, VALUE rfrom, VALUE rto) {
454
456
  StringValue(rfrom);
455
457
  StringValue(rto);
456
458
  FRT_TRY
457
- store->rename(store, rs2s(rfrom), rs2s(rto));
459
+ store->rename(store, segm_idx_name, rs2s(rfrom), rs2s(rto));
458
460
  FRT_XCATCHALL
459
461
  ex_code = xcontext.excode;
460
462
  msg = xcontext.msg;
@@ -486,7 +488,7 @@ static VALUE frb_dir_make_lock(VALUE self, VALUE rlock_name) {
486
488
  FrtStore *store = DATA_PTR(self);
487
489
  StringValue(rlock_name);
488
490
  FRT_TRY
489
- lock = frt_open_lock(store, rs2s(rlock_name));
491
+ lock = frt_open_lock(store, segm_idx_name, rs2s(rlock_name));
490
492
  FRT_XCATCHALL
491
493
  ex_code = xcontext.excode;
492
494
  msg = xcontext.msg;
@@ -529,10 +531,13 @@ static VALUE frb_ramdir_init(int argc, VALUE *argv, VALUE self) {
529
531
  case 1: {
530
532
  FrtStore *ostore;
531
533
  TypedData_Get_Struct(rdir, FrtStore, &frb_store_t, ostore);
532
- frt_open_ram_store_and_copy(store, ostore, false);
534
+ frt_open_ram_store_and_copy(store, ostore, segm_idx_name, false);
533
535
  break;
534
536
  }
535
- default: frt_open_ram_store(store);
537
+ default: {
538
+ frt_open_ram_store(store);
539
+ store->create_folder(store, segm_idx_name);
540
+ }
536
541
  }
537
542
  store->rstore = self;
538
543
  rb_ivar_set(self, id_ref_cnt, INT2FIX(0));
@@ -583,7 +588,8 @@ static VALUE frb_mdbxdir_new(int argc, VALUE *argv, VALUE klass) {
583
588
 
584
589
  FRT_TRY
585
590
  store = frt_open_mdbx_store(rs2s(rpath));
586
- if (create) store->clear_all(store);
591
+ store->create_folder(store, segm_idx_name);
592
+ if (create) store->clear_all(store, segm_idx_name);
587
593
  self = store->rstore;
588
594
  if (self == Qnil || DATA_PTR(self) == NULL) {
589
595
  self = TypedData_Wrap_Struct(klass, &frb_store_t, store);
@@ -634,7 +640,6 @@ void Init_Directory(void) {
634
640
  cDirectory = rb_define_class_under(mStore, "Directory", rb_cObject);
635
641
  rb_define_const(cDirectory, "LOCK_PREFIX", rb_str_new2(FRT_LOCK_PREFIX));
636
642
  rb_define_method(cDirectory, "close", frb_dir_close, 0);
637
- rb_define_method(cDirectory, "exists?", frb_dir_exists, 1);
638
643
  rb_define_method(cDirectory, "exist?", frb_dir_exists, 1);
639
644
  rb_define_method(cDirectory, "touch", frb_dir_touch, 1);
640
645
  rb_define_method(cDirectory, "delete", frb_dir_delete, 1);
@@ -707,6 +712,14 @@ void Init_MDBXDirectory(void) {
707
712
  rb_define_singleton_method(cMDBXDirectory, "new", frb_mdbxdir_new, -1);
708
713
  }
709
714
 
715
+ void Init_ObjectStore(void) {
716
+ cObjectStore = rb_define_class_under(mStore, "ObjectStore", rb_cObject);
717
+ // rb_define_alloc_func(cObjectStore, frb_obst_alloc);
718
+ // rb_define_method(cObjectStore, "initialize", frb_obst_init, -1);
719
+ // rb_define_method(cObjectStore, "fetch", frb_obst_fetch, -1);
720
+ // rb_define_method(cObjectStore, "key?", frb_obst_key?, -1);
721
+ // rb_define_method(cObjectStore, "store", frb_obst_store, -1);
722
+ }
710
723
  /*
711
724
  * Document-module: Ferret::Store
712
725
  *
@@ -724,4 +737,5 @@ void Init_Store(void) {
724
737
  Init_Lock();
725
738
  Init_RAMDirectory();
726
739
  Init_MDBXDirectory();
740
+ Init_ObjectStore();
727
741
  }
@@ -599,8 +599,7 @@ static int frb_mulmap_add_mappings_i(VALUE key, VALUE value, VALUE arg) {
599
599
  for (i = RARRAY_LEN(key) - 1; i >= 0; i--) {
600
600
  frb_mulmap_add_mapping_i(mulmap, RARRAY_PTR(key)[i], to);
601
601
  }
602
- }
603
- else {
602
+ } else {
604
603
  frb_mulmap_add_mapping_i(mulmap, key, to);
605
604
  }
606
605
  }
@@ -904,8 +903,7 @@ static VALUE frb_pq_insert(VALUE self, VALUE elem) {
904
903
  GET_PQ(pq, self);
905
904
  if (pq->size < pq->capa) {
906
905
  frb_pq_push(pq, elem);
907
- }
908
- else if (pq->size > 0 && frb_pq_lt(pq->proc, pq->heap[1], elem)) {
906
+ } else if (pq->size > 0 && frb_pq_lt(pq->proc, pq->heap[1], elem)) {
909
907
  pq->heap[1] = elem;
910
908
  frb_pq_down(pq);
911
909
  }
@@ -957,8 +955,7 @@ static VALUE frb_pq_pop(VALUE self) {
957
955
  pq->size--;
958
956
  frb_pq_down(pq); /* adjust heap */
959
957
  return result;
960
- }
961
- else {
958
+ } else {
962
959
  return Qnil;
963
960
  }
964
961
  }
@@ -52,7 +52,7 @@ static bool cp_enc_istok(OnigCodePoint cp, rb_encoding *enc) {
52
52
  return false;
53
53
  }
54
54
 
55
- static inline int get_cp(char *start, char *end, int *cp_len, rb_encoding *enc) {
55
+ static inline int get_cp(const char *start, const char *end, int *cp_len, rb_encoding *enc) {
56
56
  if (start >= end) {
57
57
  *cp_len = 0;
58
58
  return 0;
@@ -64,7 +64,7 @@ static inline int get_cp(char *start, char *end, int *cp_len, rb_encoding *enc)
64
64
  /*** FrtToken ****************************************************************/
65
65
  /*****************************************************************************/
66
66
 
67
- FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
67
+ FrtToken *frt_tk_set(FrtToken *tk, const char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
68
68
  if (tlen >= FRT_MAX_WORD_SIZE) {
69
69
  char *head_last = rb_enc_left_char_head(text, text + FRT_MAX_WORD_SIZE - 1, text + tlen, encoding);
70
70
  tlen = head_last - text;
@@ -89,11 +89,11 @@ FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_of
89
89
  return tk;
90
90
  }
91
91
 
92
- static FrtToken *frt_tk_set_ts(FrtToken *tk, char *start, char *end, char *text, int pos_inc, rb_encoding *encoding) {
92
+ FrtToken *frt_tk_set_ts(FrtToken *tk, const char *start, const char *end, const char *text, int pos_inc, rb_encoding *encoding) {
93
93
  return frt_tk_set(tk, start, (int)(end - start), (off_t)(start - text), (off_t)(end - text), pos_inc, encoding);
94
94
  }
95
95
 
96
- FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
96
+ FrtToken *frt_tk_set_no_len(FrtToken *tk, const char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
97
97
  return frt_tk_set(tk, text, (int)strlen(text), start, end, pos_inc, encoding);
98
98
  }
99
99
 
@@ -138,7 +138,7 @@ void frt_ts_deref(FrtTokenStream *ts) {
138
138
  ts->destroy_i(ts);
139
139
  }
140
140
 
141
- FrtTokenStream *frt_ts_reset(FrtTokenStream *ts, char *text, rb_encoding *encoding) {
141
+ FrtTokenStream *frt_ts_reset(FrtTokenStream *ts, const char *text, rb_encoding *encoding) {
142
142
  ts->t = ts->text = text;
143
143
  ts->length = strlen(text);
144
144
  ts->encoding = encoding;
@@ -224,14 +224,13 @@ FrtTokenStream *frt_non_tokenizer_new(void) {
224
224
  /*** FrtWhiteSpaceTokenizer **************************************************/
225
225
  /*****************************************************************************/
226
226
 
227
- static FrtToken *wst_next(FrtTokenStream *ts)
228
- {
227
+ static FrtToken *wst_next(FrtTokenStream *ts) {
229
228
  int cp_len = 0;
230
229
  OnigCodePoint cp;
231
230
  rb_encoding *enc = ts->encoding;
232
- char *end = ts->text + ts->length;
233
- char *start;
234
- char *t = ts->t;
231
+ const char *end = ts->text + ts->length;
232
+ const char *start;
233
+ const char *t = ts->t;
235
234
 
236
235
  cp = get_cp(t, end, &cp_len, enc);
237
236
  if (cp < 1)
@@ -278,9 +277,9 @@ static FrtToken *lt_next(FrtTokenStream *ts) {
278
277
  int cp_len = 0;
279
278
  OnigCodePoint cp;
280
279
  rb_encoding *enc = ts->encoding;
281
- char *end = ts->text + ts->length;
282
- char *start;
283
- char *t = ts->t;
280
+ const char *end = ts->text + ts->length;
281
+ const char *start;
282
+ const char *t = ts->t;
284
283
 
285
284
  cp = get_cp(t, end, &cp_len, enc);
286
285
  if (cp < 1)
@@ -324,9 +323,9 @@ FrtTokenStream *frt_letter_tokenizer_new(void) {
324
323
  /*****************************************************************************/
325
324
 
326
325
  static int std_get_alnum(FrtTokenStream *ts, char *token, OnigCodePoint cp, int *cp_len_p, OnigCodePoint *cp_out_p, rb_encoding *enc) {
327
- char *end = ts->text + ts->length;
328
- char *t = ts->t;
329
- char *tt = ts->t;
326
+ const char *end = ts->text + ts->length;
327
+ const char *t = ts->t;
328
+ const char *tt = ts->t;
330
329
  int cp_len = *cp_len_p;
331
330
 
332
331
  while (cp > 0 && rb_enc_isalnum(cp, enc)) {
@@ -349,10 +348,9 @@ static int std_get_alnum(FrtTokenStream *ts, char *token, OnigCodePoint cp, int
349
348
  * (alnum) = [a-zA-Z0-9]
350
349
  * (punc) = [_\/.,-]
351
350
  */
352
- static int std_get_number(FrtTokenStream *ts, char *start, char *end, OnigCodePoint cp, int cp_len_a, rb_encoding *enc) {
353
-
351
+ static int std_get_number(FrtTokenStream *ts, const char *start, const char *end, OnigCodePoint cp, int cp_len_a, rb_encoding *enc) {
354
352
  OnigCodePoint cp_1 = 0;
355
- char *t = start;
353
+ const char *t = start;
356
354
  int cp_len = cp_len_a;
357
355
  int cp_1_len = 0;
358
356
  int last_seen_digit = 2;
@@ -385,10 +383,10 @@ static int std_get_number(FrtTokenStream *ts, char *start, char *end, OnigCodePo
385
383
  }
386
384
  }
387
385
 
388
- static int std_get_apostrophe(FrtTokenStream *ts, char *input, OnigCodePoint cp, int *cp_len_p, rb_encoding *enc) {
386
+ static int std_get_apostrophe(FrtTokenStream *ts, const char *input, OnigCodePoint cp, int *cp_len_p, rb_encoding *enc) {
389
387
  int cp_len = *cp_len_p;
390
- char *end = ts->text + ts->length;
391
- char *t = input;
388
+ const char *end = ts->text + ts->length;
389
+ const char *t = input;
392
390
 
393
391
  while (cp_len > 0 && (rb_enc_isalpha(cp, enc) || cp == cp_apostrophe)) {
394
392
  t += cp_len;
@@ -397,14 +395,14 @@ static int std_get_apostrophe(FrtTokenStream *ts, char *input, OnigCodePoint cp,
397
395
  return (int)(t - input);
398
396
  }
399
397
 
400
- static char *std_get_url(FrtTokenStream *ts, char *start, char *end, char *token, int *len, int bufred) {
398
+ static const char *std_get_url(FrtTokenStream *ts, const char *start, const char *end, char *token, int *len, int bufred) {
401
399
  rb_encoding *enc = ts->encoding;
402
400
  OnigCodePoint cp;
403
401
  OnigCodePoint prev_cp = 0;
404
402
  int cp_len = 0;
405
403
  int prev_cp_len = 0;
406
- char *t = start;
407
- char *tt = start;
404
+ const char *t = start;
405
+ const char *tt = start;
408
406
 
409
407
  cp = get_cp(t, end, &cp_len, enc);
410
408
  while (cp > 0 && cp_enc_isurlc(cp, enc)) {
@@ -432,9 +430,9 @@ static char *std_get_url(FrtTokenStream *ts, char *start, char *end, char *token
432
430
  }
433
431
 
434
432
  /* Company names can contain '@' and '&' like AT&T and Excite@Home. */
435
- static int std_get_company_name(FrtTokenStream *ts, char *start, char* end) {
433
+ static int std_get_company_name(FrtTokenStream *ts, const char *start, const char* end) {
436
434
  rb_encoding *enc = ts->encoding;
437
- char * t = start;
435
+ const char * t = start;
438
436
  OnigCodePoint cp;
439
437
  int cp_len = 0;
440
438
 
@@ -452,8 +450,8 @@ static int std_advance_to_start(FrtTokenStream *ts, int *cp_len_p, OnigCodePoint
452
450
  int cp_next = 0;
453
451
  int cp_len_next = 0;
454
452
  OnigCodePoint cp;
455
- char *end = ts->text + ts->length;
456
- char *t = ts->t;
453
+ const char *end = ts->text + ts->length;
454
+ const char *t = ts->t;
457
455
 
458
456
  cp = get_cp(t, end, &cp_len, enc);
459
457
  while (cp > 0 && !rb_enc_isalnum(cp, enc)) {
@@ -472,11 +470,9 @@ static int std_advance_to_start(FrtTokenStream *ts, int *cp_len_p, OnigCodePoint
472
470
  }
473
471
 
474
472
  static FrtToken *std_next(FrtTokenStream *ts) {
475
- char *s;
476
- char *t;
477
- char *start = NULL;
478
- char *end;
479
- char *num_end = NULL;
473
+ const char *s, *t, *end;
474
+ const char *start = NULL;
475
+ const char *num_end = NULL;
480
476
  char token[FRT_MAX_WORD_SIZE + 1];
481
477
  OnigCodePoint cp = 0;
482
478
  OnigCodePoint cp_1 = 0;
@@ -528,13 +524,11 @@ static FrtToken *std_next(FrtTokenStream *ts) {
528
524
  t -= 2;
529
525
  frt_tk_set_ts(&(ts->token), start, t, ts->text, 1, enc);
530
526
  ts->token.end += 2;
531
- }
532
- else if (t[-1] == '\'') {
527
+ } else if (t[-1] == '\'') {
533
528
  t -= 1;
534
529
  frt_tk_set_ts(&(ts->token), start, t, ts->text, 1, enc);
535
530
  ts->token.end += 1;
536
- }
537
- else {
531
+ } else {
538
532
  frt_tk_set_ts(&(ts->token), start, t, ts->text, 1, enc);
539
533
  }
540
534
  return &(ts->token);
@@ -606,8 +600,7 @@ static FrtToken *std_next(FrtTokenStream *ts) {
606
600
  if (cp == cp_at) {
607
601
  if (seen_at_symbol) {
608
602
  break; /* we can only have one @ symbol */
609
- }
610
- else {
603
+ } else {
611
604
  seen_at_symbol = true;
612
605
  }
613
606
  }
@@ -693,7 +686,7 @@ static FrtTokenStream *filter_clone_i(FrtTokenStream *ts) {
693
686
  return frt_filter_clone_size(ts, sizeof(FrtTokenFilter));
694
687
  }
695
688
 
696
- static FrtTokenStream *filter_reset(FrtTokenStream *ts, char *text, rb_encoding *encoding) {
689
+ static FrtTokenStream *filter_reset(FrtTokenStream *ts, const char *text, rb_encoding *encoding) {
697
690
  TkFilt(ts)->sub_ts->reset(TkFilt(ts)->sub_ts, text, encoding);
698
691
  return ts;
699
692
  }
@@ -837,7 +830,7 @@ static FrtToken *mf_next(FrtTokenStream *ts) {
837
830
  return tk;
838
831
  }
839
832
 
840
- static FrtTokenStream *mf_reset(FrtTokenStream *ts, char *text, rb_encoding *encoding) {
833
+ static FrtTokenStream *mf_reset(FrtTokenStream *ts, const char *text, rb_encoding *encoding) {
841
834
  FrtMultiMapper *mm = MFilt(ts)->mapper;
842
835
  if (mm->d_size == 0)
843
836
  frt_mulmap_compile(MFilt(ts)->mapper);
@@ -1100,7 +1093,7 @@ static void frt_a_standard_destroy_i(FrtAnalyzer *a) {
1100
1093
  free(a);
1101
1094
  }
1102
1095
 
1103
- static FrtTokenStream *a_standard_get_ts(FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding) {
1096
+ static FrtTokenStream *a_standard_get_ts(FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding) {
1104
1097
  FrtTokenStream *ts;
1105
1098
  (void)field;
1106
1099
  ts = frt_ts_clone(a->current_ts);
@@ -1112,7 +1105,7 @@ FrtAnalyzer *frt_analyzer_alloc(void) {
1112
1105
  }
1113
1106
 
1114
1107
  void frt_analyzer_init(FrtAnalyzer *a, FrtTokenStream *ts, void (*destroy_i)(FrtAnalyzer *a),
1115
- FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding)) {
1108
+ FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding)) {
1116
1109
  a->current_ts = ts;
1117
1110
  a->destroy_i = (destroy_i ? destroy_i : &frt_a_standard_destroy_i);
1118
1111
  a->get_ts = (get_ts ? get_ts : &a_standard_get_ts);
@@ -1121,7 +1114,7 @@ void frt_analyzer_init(FrtAnalyzer *a, FrtTokenStream *ts, void (*destroy_i)(Frt
1121
1114
  }
1122
1115
 
1123
1116
  FrtAnalyzer *frt_analyzer_new(FrtTokenStream *ts, void (*destroy_i)(FrtAnalyzer *a),
1124
- FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding)) {
1117
+ FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding)) {
1125
1118
  FrtAnalyzer *a = frt_analyzer_alloc();
1126
1119
  frt_analyzer_init(a, ts, destroy_i, get_ts);
1127
1120
  return a;
@@ -1214,7 +1207,7 @@ static void pfa_destroy_i(FrtAnalyzer *self) {
1214
1207
  free(self);
1215
1208
  }
1216
1209
 
1217
- static FrtTokenStream *pfa_get_ts(FrtAnalyzer *self, ID field, char *text, rb_encoding *encoding) {
1210
+ static FrtTokenStream *pfa_get_ts(FrtAnalyzer *self, ID field, const char *text, rb_encoding *encoding) {
1218
1211
  FrtAnalyzer *a = (FrtAnalyzer *)frt_h_get(PFA(self)->dict, (void *)field);
1219
1212
  if (a == NULL)
1220
1213
  a = PFA(self)->default_a;