isomorfeus-ferret 0.12.7 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (164) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +54 -1
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
  9. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
  10. data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
  11. data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
  12. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  13. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  14. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  15. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  16. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  17. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  18. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  19. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  20. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  21. data/ext/isomorfeus_ferret_ext/frb_index.c +492 -474
  22. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  23. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  24. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  25. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  26. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  27. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  28. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  29. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  30. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  31. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  32. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  33. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
  34. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  42. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  43. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  44. data/ext/isomorfeus_ferret_ext/frt_index.c +580 -399
  45. data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
  46. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  47. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  48. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
  49. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  50. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  51. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  52. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  53. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  54. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  55. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  56. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  57. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  58. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  59. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  60. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  61. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
  62. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  63. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  64. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  65. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  66. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  67. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  68. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  69. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  70. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
  71. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  72. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  73. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  74. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  75. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  76. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  77. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  78. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  79. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  80. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  81. data/ext/isomorfeus_ferret_ext/test.c +1 -2
  82. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  83. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  84. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  85. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  86. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  87. data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
  88. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  89. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  90. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  91. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  92. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  93. data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
  94. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  95. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  96. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  97. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  98. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  99. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  100. data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
  101. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  102. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  103. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  104. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  105. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  106. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  107. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  108. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  109. data/lib/isomorfeus/ferret/version.rb +1 -1
  110. metadata +27 -57
  111. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  112. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  113. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  114. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  115. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  116. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  117. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  118. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  119. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  120. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  144. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  145. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  146. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  147. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  148. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  149. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  150. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  151. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  152. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  153. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  154. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  155. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  156. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  157. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  158. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  159. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  160. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  161. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  162. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  163. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  164. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -1,6 +1,8 @@
1
1
  #include "frt_index.h"
2
2
  #include "isomorfeus_ferret.h"
3
- #include <ruby/st.h>
3
+ #include <ruby.h>
4
+
5
+ #undef close
4
6
 
5
7
  VALUE mIndex;
6
8
 
@@ -41,8 +43,10 @@ static VALUE sym_store;
41
43
  static VALUE sym_index;
42
44
  static VALUE sym_term_vector;
43
45
 
44
- static VALUE sym_compress;
45
- static VALUE sym_compressed;
46
+ static VALUE sym_brotli;
47
+ static VALUE sym_bz2;
48
+ static VALUE sym_lz4;
49
+ static VALUE sym_compression;
46
50
 
47
51
  static VALUE sym_untokenized;
48
52
  static VALUE sym_omit_norms;
@@ -52,7 +56,7 @@ static VALUE sym_with_positions;
52
56
  static VALUE sym_with_offsets;
53
57
  static VALUE sym_with_positions_offsets;
54
58
 
55
- static FrtSymbol fsym_content;
59
+ static ID fsym_content;
56
60
 
57
61
  static ID id_term;
58
62
  static ID id_fields;
@@ -70,20 +74,11 @@ extern VALUE frb_get_analyzer(FrtAnalyzer *a);
70
74
  *
71
75
  ****************************************************************************/
72
76
 
73
- static void
74
- frb_fi_free(void *p)
75
- {
76
- object_del(p);
77
+ static void frb_fi_free(void *p) {
77
78
  frt_fi_deref((FrtFieldInfo *)p);
78
79
  }
79
80
 
80
- static void
81
- frb_fi_get_params(VALUE roptions,
82
- FrtStoreValue *store,
83
- FrtIndexValue *index,
84
- FrtTermVectorValue *term_vector,
85
- float *boost)
86
- {
81
+ static void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost) {
87
82
  VALUE v;
88
83
  Check_Type(roptions, T_HASH);
89
84
  v = rb_hash_aref(roptions, sym_boost);
@@ -98,13 +93,27 @@ frb_fi_get_params(VALUE roptions,
98
93
  *store = FRT_STORE_NO;
99
94
  } else if (v == sym_yes || v == sym_true || v == Qtrue) {
100
95
  *store = FRT_STORE_YES;
101
- } else if (v == sym_compress || v == sym_compressed) {
102
- *store = FRT_STORE_COMPRESS;
103
96
  } else if (v == Qnil) {
104
97
  /* leave as default */
105
98
  } else {
106
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :store."
107
- " Please choose from [:yes, :no, :compressed]",
99
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]",
100
+ rb_id2name(SYM2ID(v)));
101
+ }
102
+
103
+ v = rb_hash_aref(roptions, sym_compression);
104
+ if (Qnil != v) Check_Type(v, T_SYMBOL);
105
+ if (v == sym_no || v == sym_false || v == Qfalse) {
106
+ *compression = FRT_COMPRESSION_NONE;
107
+ } else if (v == sym_yes || v == sym_true || v == Qtrue || v == sym_brotli) {
108
+ *compression = FRT_COMPRESSION_BROTLI;
109
+ } else if (v == sym_bz2) {
110
+ *compression = FRT_COMPRESSION_BZ2;
111
+ } else if (v == sym_lz4) {
112
+ *compression = FRT_COMPRESSION_LZ4;
113
+ } else if (v == Qnil) {
114
+ /* leave as default */
115
+ } else {
116
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]",
108
117
  rb_id2name(SYM2ID(v)));
109
118
  }
110
119
 
@@ -123,10 +132,8 @@ frb_fi_get_params(VALUE roptions,
123
132
  } else if (v == Qnil) {
124
133
  /* leave as default */
125
134
  } else {
126
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :index."
127
- " Please choose from [:no, :yes, :untokenized, "
128
- ":omit_norms, :untokenized_omit_norms]",
129
- rb_id2name(SYM2ID(v)));
135
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :index. Please choose from [:no, :yes, :untokenized, "
136
+ ":omit_norms, :untokenized_omit_norms]", rb_id2name(SYM2ID(v)));
130
137
  }
131
138
 
132
139
  v = rb_hash_aref(roptions, sym_term_vector);
@@ -144,28 +151,38 @@ frb_fi_get_params(VALUE roptions,
144
151
  } else if (v == Qnil) {
145
152
  /* leave as default */
146
153
  } else {
147
- rb_raise(rb_eArgError, ":%s isn't a valid argument for "
148
- ":term_vector. Please choose from [:no, :yes, "
149
- ":with_positions, :with_offsets, "
150
- ":with_positions_offsets]",
151
- rb_id2name(SYM2ID(v)));
154
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :term_vector. Please choose from [:no, :yes, "
155
+ ":with_positions, :with_offsets, :with_positions_offsets]", rb_id2name(SYM2ID(v)));
152
156
  }
153
157
  }
154
158
 
155
- static VALUE
156
- frb_get_field_info(FrtFieldInfo *fi)
157
- {
159
+ static size_t frb_fi_size(const void *p) {
160
+ return sizeof(FrtFieldInfo);
161
+ (void)p;
162
+ }
158
163
 
159
- VALUE rfi = Qnil;
164
+ const rb_data_type_t frb_field_info_t = {
165
+ .wrap_struct_name = "FrbFieldInfo",
166
+ .function = {
167
+ .dmark = NULL,
168
+ .dfree = frb_fi_free,
169
+ .dsize = frb_fi_size,
170
+ .dcompact = NULL,
171
+ .reserved = {0},
172
+ },
173
+ .parent = NULL,
174
+ .data = NULL,
175
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
176
+ };
177
+
178
+ static VALUE frb_get_field_info(FrtFieldInfo *fi) {
160
179
  if (fi) {
161
- rfi = object_get(fi);
162
- if (rfi == Qnil) {
163
- rfi = Data_Wrap_Struct(cFieldInfo, NULL, &frb_fi_free, fi);
180
+ if (fi->rfi == 0 || fi->rfi == Qnil) {
181
+ fi->rfi = TypedData_Wrap_Struct(cFieldInfo, &frb_field_info_t, fi);
164
182
  FRT_REF(fi);
165
- object_add(fi, rfi);
166
183
  }
167
184
  }
168
- return rfi;
185
+ return fi->rfi;
169
186
  }
170
187
 
171
188
  /*
@@ -173,28 +190,32 @@ frb_get_field_info(FrtFieldInfo *fi)
173
190
  * FieldInfo.new(name, options = {}) -> field_info
174
191
  *
175
192
  * Create a new FieldInfo object with the name +name+ and the properties
176
- * specified in +options+. The available options are [:store, :index,
177
- * :term_vector, :boost]. See the description of FieldInfo for more
193
+ * specified in +options+. The available options are [:store, :compression,
194
+ * :index, :term_vector, :boost]. See the description of FieldInfo for more
178
195
  * information on these properties.
179
196
  */
180
- static VALUE
181
- frb_fi_init(int argc, VALUE *argv, VALUE self)
182
- {
197
+ static VALUE frb_fi_alloc(VALUE rclass) {
198
+ FrtFieldInfo *fi = frt_fi_alloc();
199
+ return TypedData_Wrap_Struct(rclass, &frb_field_info_t, fi);
200
+ }
201
+
202
+ static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
183
203
  VALUE roptions, rname;
184
204
  FrtFieldInfo *fi;
205
+ TypedData_Get_Struct(self, FrtFieldInfo, &frb_field_info_t, fi);
185
206
  FrtStoreValue store = FRT_STORE_YES;
207
+ FrtCompressionType compression = FRT_COMPRESSION_NONE;
186
208
  FrtIndexValue index = FRT_INDEX_YES;
187
209
  FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
188
210
  float boost = 1.0f;
189
211
 
190
212
  rb_scan_args(argc, argv, "11", &rname, &roptions);
191
213
  if (argc > 1) {
192
- frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
214
+ frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
193
215
  }
194
- fi = frt_fi_new(frb_field(rname), store, index, term_vector);
216
+ fi = frt_fi_init(fi, frb_field(rname), store, compression, index, term_vector);
195
217
  fi->boost = boost;
196
- Frt_Wrap_Struct(self, NULL, &frb_fi_free, fi);
197
- object_add(fi, self);
218
+ fi->rfi = self;
198
219
  return self;
199
220
  }
200
221
 
@@ -204,9 +225,7 @@ frb_fi_init(int argc, VALUE *argv, VALUE self)
204
225
  *
205
226
  * Return the name of the field
206
227
  */
207
- static VALUE
208
- frb_fi_name(VALUE self)
209
- {
228
+ static VALUE frb_fi_name(VALUE self) {
210
229
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
211
230
  return rb_str_new_cstr(rb_id2name(fi->name));
212
231
  }
@@ -217,9 +236,7 @@ frb_fi_name(VALUE self)
217
236
  *
218
237
  * Return true if the field is stored in the index.
219
238
  */
220
- static VALUE
221
- frb_fi_is_stored(VALUE self)
222
- {
239
+ static VALUE frb_fi_is_stored(VALUE self) {
223
240
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
224
241
  return fi_is_stored(fi) ? Qtrue : Qfalse;
225
242
  }
@@ -230,9 +247,7 @@ frb_fi_is_stored(VALUE self)
230
247
  *
231
248
  * Return true if the field is stored in the index in compressed format.
232
249
  */
233
- static VALUE
234
- frb_fi_is_compressed(VALUE self)
235
- {
250
+ static VALUE frb_fi_is_compressed(VALUE self) {
236
251
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
237
252
  return fi_is_compressed(fi) ? Qtrue : Qfalse;
238
253
  }
@@ -243,9 +258,7 @@ frb_fi_is_compressed(VALUE self)
243
258
  *
244
259
  * Return true if the field is indexed, ie searchable in the index.
245
260
  */
246
- static VALUE
247
- frb_fi_is_indexed(VALUE self)
248
- {
261
+ static VALUE frb_fi_is_indexed(VALUE self) {
249
262
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
250
263
  return fi_is_indexed(fi) ? Qtrue : Qfalse;
251
264
  }
@@ -261,9 +274,7 @@ frb_fi_is_indexed(VALUE self)
261
274
  *
262
275
  * A field can only be tokenized if it is indexed.
263
276
  */
264
- static VALUE
265
- frb_fi_is_tokenized(VALUE self)
266
- {
277
+ static VALUE frb_fi_is_tokenized(VALUE self) {
267
278
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
268
279
  return fi_is_tokenized(fi) ? Qtrue : Qfalse;
269
280
  }
@@ -279,9 +290,7 @@ frb_fi_is_tokenized(VALUE self)
279
290
  * boost and it will use less memory, especially for indexes which have a
280
291
  * large number of documents.
281
292
  */
282
- static VALUE
283
- frb_fi_omit_norms(VALUE self)
284
- {
293
+ static VALUE frb_fi_omit_norms(VALUE self) {
285
294
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
286
295
  return fi_omit_norms(fi) ? Qtrue : Qfalse;
287
296
  }
@@ -292,9 +301,7 @@ frb_fi_omit_norms(VALUE self)
292
301
  *
293
302
  * Return true if the term-vectors are stored for this field.
294
303
  */
295
- static VALUE
296
- frb_fi_store_term_vector(VALUE self)
297
- {
304
+ static VALUE frb_fi_store_term_vector(VALUE self) {
298
305
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
299
306
  return fi_store_term_vector(fi) ? Qtrue : Qfalse;
300
307
  }
@@ -305,9 +312,7 @@ frb_fi_store_term_vector(VALUE self)
305
312
  *
306
313
  * Return true if positions are stored with the term-vectors for this field.
307
314
  */
308
- static VALUE
309
- frb_fi_store_positions(VALUE self)
310
- {
315
+ static VALUE frb_fi_store_positions(VALUE self) {
311
316
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
312
317
  return fi_store_positions(fi) ? Qtrue : Qfalse;
313
318
  }
@@ -318,9 +323,7 @@ frb_fi_store_positions(VALUE self)
318
323
  *
319
324
  * Return true if offsets are stored with the term-vectors for this field.
320
325
  */
321
- static VALUE
322
- frb_fi_store_offsets(VALUE self)
323
- {
326
+ static VALUE frb_fi_store_offsets(VALUE self) {
324
327
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
325
328
  return fi_store_offsets(fi) ? Qtrue : Qfalse;
326
329
  }
@@ -333,9 +336,7 @@ frb_fi_store_offsets(VALUE self)
333
336
  *
334
337
  * fi.indexed? and not fi.omit_norms?
335
338
  */
336
- static VALUE
337
- frb_fi_has_norms(VALUE self)
338
- {
339
+ static VALUE frb_fi_has_norms(VALUE self) {
339
340
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
340
341
  return fi_has_norms(fi) ? Qtrue : Qfalse;
341
342
  }
@@ -346,9 +347,7 @@ frb_fi_has_norms(VALUE self)
346
347
  *
347
348
  * Return the default boost for this field
348
349
  */
349
- static VALUE
350
- frb_fi_boost(VALUE self)
351
- {
350
+ static VALUE frb_fi_boost(VALUE self) {
352
351
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
353
352
  return rb_float_new((double)fi->boost);
354
353
  }
@@ -359,9 +358,7 @@ frb_fi_boost(VALUE self)
359
358
  *
360
359
  * Return a string representation of the FieldInfo object.
361
360
  */
362
- static VALUE
363
- frb_fi_to_s(VALUE self)
364
- {
361
+ static VALUE frb_fi_to_s(VALUE self) {
365
362
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
366
363
  char *fi_s = frt_fi_to_s(fi);
367
364
  VALUE rfi_s = rb_str_new2(fi_s);
@@ -375,39 +372,47 @@ frb_fi_to_s(VALUE self)
375
372
  *
376
373
  ****************************************************************************/
377
374
 
378
- static void
379
- frb_fis_free(void *p)
380
- {
381
- object_del(p);
375
+ static void frb_fis_free(void *p) {
382
376
  frt_fis_deref((FrtFieldInfos *)p);
383
377
  }
384
378
 
385
- static void
386
- frb_fis_mark(void *p)
387
- {
379
+ static void frb_fis_mark(void *p) {
388
380
  int i;
389
381
  FrtFieldInfos *fis = (FrtFieldInfos *)p;
390
382
 
391
383
  for (i = 0; i < fis->size; i++) {
392
- frb_gc_mark(fis->fields[i]);
384
+ if (fis->fields[i]->rfi)
385
+ rb_gc_mark(fis->fields[i]->rfi);
393
386
  }
394
387
  }
395
388
 
396
- static VALUE
397
- frb_get_field_infos(FrtFieldInfos *fis)
398
- {
389
+ static size_t frb_field_infos_t_size(const void *p) {
390
+ return sizeof(FrtFieldInfos);
391
+ (void)p;
392
+ }
393
+
394
+ const rb_data_type_t frb_field_infos_t = {
395
+ .wrap_struct_name = "FrbFieldInfos",
396
+ .function = {
397
+ .dmark = frb_fis_mark,
398
+ .dfree = frb_fis_free,
399
+ .dsize = frb_field_infos_t_size,
400
+ .dcompact = NULL,
401
+ .reserved = {0},
402
+ },
403
+ .parent = NULL,
404
+ .data = NULL,
405
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
406
+ };
399
407
 
400
- VALUE rfis = Qnil;
408
+ static VALUE frb_get_field_infos(FrtFieldInfos *fis) {
401
409
  if (fis) {
402
- rfis = object_get(fis);
403
- if (rfis == Qnil) {
404
- rfis = Data_Wrap_Struct(cFieldInfos, &frb_fis_mark, &frb_fis_free,
405
- fis);
410
+ if (fis->rfis == 0 || fis->rfis == Qnil) {
411
+ fis->rfis = TypedData_Wrap_Struct(cFieldInfos, &frb_field_infos_t, fis);
406
412
  FRT_REF(fis);
407
- object_add(fis, rfis);
408
413
  }
409
414
  }
410
- return rfis;
415
+ return fis->rfis;
411
416
  }
412
417
 
413
418
  /*
@@ -418,23 +423,28 @@ frb_get_field_infos(FrtFieldInfos *fis)
418
423
  * specified in the +default+ hash parameter. See FieldInfo for available
419
424
  * property values.
420
425
  */
421
- static VALUE
422
- frb_fis_init(int argc, VALUE *argv, VALUE self)
423
- {
426
+
427
+ static VALUE frb_fis_alloc(VALUE rclass) {
428
+ FrtFieldInfos *fis = frt_fis_alloc();
429
+ return TypedData_Wrap_Struct(rclass, &frb_field_infos_t, fis);
430
+ }
431
+
432
+ static VALUE frb_fis_init(int argc, VALUE *argv, VALUE self) {
424
433
  VALUE roptions;
425
434
  FrtFieldInfos *fis;
435
+ TypedData_Get_Struct(self, FrtFieldInfos, &frb_field_infos_t, fis);
426
436
  FrtStoreValue store = FRT_STORE_YES;
437
+ FrtCompressionType compression = FRT_COMPRESSION_NONE;
427
438
  FrtIndexValue index = FRT_INDEX_YES;
428
439
  FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
429
440
  float boost;
430
441
 
431
442
  rb_scan_args(argc, argv, "01", &roptions);
432
443
  if (argc > 0) {
433
- frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
444
+ frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
434
445
  }
435
- fis = frt_fis_new(store, index, term_vector);
436
- Frt_Wrap_Struct(self, &frb_fis_mark, &frb_fis_free, fis);
437
- object_add(fis, self);
446
+ fis = frt_fis_init(fis, store, compression, index, term_vector);
447
+ fis->rfis = self;
438
448
  return self;
439
449
  }
440
450
 
@@ -445,9 +455,7 @@ frb_fis_init(int argc, VALUE *argv, VALUE self)
445
455
  * Return an array of the FieldInfo objects contained but this FieldInfos
446
456
  * object.
447
457
  */
448
- static VALUE
449
- frb_fis_to_a(VALUE self)
450
- {
458
+ static VALUE frb_fis_to_a(VALUE self) {
451
459
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
452
460
  VALUE rary = rb_ary_new();
453
461
  int i;
@@ -470,9 +478,7 @@ frb_fis_to_a(VALUE self)
470
478
  * fi = fis[:name]
471
479
  * fi = fis[2]
472
480
  */
473
- static VALUE
474
- frb_fis_get(VALUE self, VALUE ridx)
475
- {
481
+ static VALUE frb_fis_get(VALUE self, VALUE ridx) {
476
482
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
477
483
  VALUE rfi = Qnil;
478
484
  switch (TYPE(ridx)) {
@@ -511,9 +517,7 @@ frb_fis_get(VALUE self, VALUE ridx)
511
517
  * Add a FieldInfo object. Use the FieldInfos#add_field method where
512
518
  * possible.
513
519
  */
514
- static VALUE
515
- frb_fis_add(VALUE self, VALUE rfi)
516
- {
520
+ static VALUE frb_fis_add(VALUE self, VALUE rfi) {
517
521
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
518
522
  FrtFieldInfo *fi = (FrtFieldInfo *)frb_rb_data_ptr(rfi);
519
523
  frt_fis_add_field(fis, fi);
@@ -534,6 +538,7 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
534
538
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
535
539
  FrtFieldInfo *fi;
536
540
  FrtStoreValue store = fis->store;
541
+ FrtCompressionType compression = fis->compression;
537
542
  FrtIndexValue index = fis->index;
538
543
  FrtTermVectorValue term_vector = fis->term_vector;
539
544
  float boost = 1.0f;
@@ -541,9 +546,9 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
541
546
 
542
547
  rb_scan_args(argc, argv, "11", &rname, &roptions);
543
548
  if (argc > 1) {
544
- frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
549
+ frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
545
550
  }
546
- fi = frt_fi_new(frb_field(rname), store, index, term_vector);
551
+ fi = frt_fi_new(frb_field(rname), store, compression, index, term_vector);
547
552
  fi->boost = boost;
548
553
  frt_fis_add_field(fis, fi);
549
554
  return self;
@@ -670,28 +675,46 @@ frb_fis_get_tk_fields(VALUE self)
670
675
  *
671
676
  ****************************************************************************/
672
677
 
673
- static void
674
- frb_te_free(void *p)
675
- {
678
+ static void frb_te_free(void *p) {
676
679
  FrtTermEnum *te = (FrtTermEnum *)p;
677
680
  te->close(te);
678
681
  }
679
682
 
680
- static VALUE
681
- frb_te_get_set_term(VALUE self, const char *term)
682
- {
683
+ static size_t frb_te_size(const void *p) {
684
+ return sizeof(FrtTermEnum);
685
+ (void)p;
686
+ }
687
+
688
+ const rb_data_type_t frb_term_enum_t = {
689
+ .wrap_struct_name = "FrbTermEnum",
690
+ .function = {
691
+ .dmark = NULL,
692
+ .dfree = frb_te_free,
693
+ .dsize = frb_te_size,
694
+ .dcompact = NULL,
695
+ .reserved = {0},
696
+ },
697
+ .parent = NULL,
698
+ .data = NULL,
699
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
700
+ };
701
+
702
+ static VALUE frb_te_alloc(VALUE rclass) {
703
+ FrtTermEnum *te = FRT_ALLOC_AND_ZERO(FrtTermEnum);
704
+ return TypedData_Wrap_Struct(rclass, &frb_term_enum_t, te);
705
+ }
706
+
707
+ static VALUE frb_te_get_set_term(VALUE self, const char *term) {
683
708
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
684
709
  VALUE str = term ? rb_str_new(term, te->curr_term_len) : Qnil;
685
710
  rb_ivar_set(self, id_term, str);
686
711
  return str;
687
712
  }
688
713
 
689
- static VALUE
690
- frb_get_te(VALUE rir, FrtTermEnum *te)
691
- {
714
+ static VALUE frb_get_te(VALUE rir, FrtTermEnum *te) {
692
715
  VALUE self = Qnil;
693
716
  if (te != NULL) {
694
- self = Data_Wrap_Struct(cTermEnum, NULL, &frb_te_free, te);
717
+ self = TypedData_Wrap_Struct(cTermEnum, &frb_term_enum_t, te);
695
718
  frb_te_get_set_term(self, te->curr_term);
696
719
  rb_ivar_set(self, id_fld_num_map, rb_ivar_get(rir, id_fld_num_map));
697
720
  }
@@ -704,9 +727,7 @@ frb_get_te(VALUE rir, FrtTermEnum *te)
704
727
  *
705
728
  * Returns the next term in the enumeration or nil otherwise.
706
729
  */
707
- static VALUE
708
- frb_te_next(VALUE self)
709
- {
730
+ static VALUE frb_te_next(VALUE self) {
710
731
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
711
732
  return frb_te_get_set_term(self, te->next(te));
712
733
  }
@@ -718,9 +739,7 @@ frb_te_next(VALUE self)
718
739
  * Returns the current term pointed to by the enum. This method should only
719
740
  * be called after a successful call to TermEnum#next.
720
741
  */
721
- static VALUE
722
- frb_te_term(VALUE self)
723
- {
742
+ static VALUE frb_te_term(VALUE self) {
724
743
  return rb_ivar_get(self, id_term);
725
744
  }
726
745
 
@@ -732,9 +751,7 @@ frb_te_term(VALUE self)
732
751
  * That is the number of documents that this term appears in. The method
733
752
  * should only be called after a successful call to TermEnum#next.
734
753
  */
735
- static VALUE
736
- frb_te_doc_freq(VALUE self)
737
- {
754
+ static VALUE frb_te_doc_freq(VALUE self) {
738
755
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
739
756
  return INT2FIX(te->curr_ti.doc_freq);
740
757
  }
@@ -750,9 +767,7 @@ frb_te_doc_freq(VALUE self)
750
767
  *
751
768
  * Returns the first term greater than or equal to +target+
752
769
  */
753
- static VALUE
754
- frb_te_skip_to(VALUE self, VALUE rterm)
755
- {
770
+ static VALUE frb_te_skip_to(VALUE self, VALUE rterm) {
756
771
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
757
772
  return frb_te_get_set_term(self, te->skip_to(te, rs2s(rterm)));
758
773
  }
@@ -764,9 +779,7 @@ frb_te_skip_to(VALUE self, VALUE rterm)
764
779
  * Iterates through all the terms in the field, yielding the term and the
765
780
  * document frequency.
766
781
  */
767
- static VALUE
768
- frb_te_each(VALUE self)
769
- {
782
+ static VALUE frb_te_each(VALUE self) {
770
783
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
771
784
  char *term;
772
785
  int term_cnt = 0;
@@ -798,9 +811,7 @@ frb_te_each(VALUE self)
798
811
  * do_something()
799
812
  * end
800
813
  */
801
- static VALUE
802
- frb_te_set_field(VALUE self, VALUE rfield)
803
- {
814
+ static VALUE frb_te_set_field(VALUE self, VALUE rfield) {
804
815
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
805
816
  int field_num = 0;
806
817
  VALUE rfnum_map = rb_ivar_get(self, id_fld_num_map);
@@ -840,9 +851,7 @@ frb_te_set_field(VALUE self, VALUE rfield)
840
851
  * # ["cantaloupe",12]
841
852
  * # ]
842
853
  */
843
- static VALUE
844
- frb_te_to_json(int argc, VALUE *argv, VALUE self)
845
- {
854
+ static VALUE frb_te_to_json(int argc, VALUE *argv, VALUE self) {
846
855
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
847
856
  VALUE rjson;
848
857
  char *json, *jp;
@@ -867,8 +876,7 @@ frb_te_to_json(int argc, VALUE *argv, VALUE self)
867
876
  *(jp++) = ']';
868
877
  *(jp++) = ',';
869
878
  }
870
- }
871
- else {
879
+ } else {
872
880
  while (NULL != (term = te->next(te))) {
873
881
  /* enough room for for term after converting " to '"' and frequency
874
882
  * plus some extra for good measure */
@@ -904,17 +912,37 @@ frb_te_to_json(int argc, VALUE *argv, VALUE self)
904
912
  *
905
913
  ****************************************************************************/
906
914
 
907
- static void
908
- frb_tde_free(void *p)
909
- {
915
+ static void frb_tde_free(void *p) {
910
916
  FrtTermDocEnum *tde = (FrtTermDocEnum *)p;
911
917
  tde->close(tde);
912
918
  }
913
919
 
914
- static VALUE
915
- frb_get_tde(VALUE rir, FrtTermDocEnum *tde)
916
- {
917
- VALUE self = Data_Wrap_Struct(cTermDocEnum, NULL, &frb_tde_free, tde);
920
+ static size_t frb_tde_size(const void *p) {
921
+ return sizeof(FrtTermDocEnum);
922
+ (void)p;
923
+ }
924
+
925
+ const rb_data_type_t frb_term_doc_enum_t = {
926
+ .wrap_struct_name = "FrbTermDocEnum",
927
+ .function = {
928
+ .dmark = NULL,
929
+ .dfree = frb_tde_free,
930
+ .dsize = frb_tde_size,
931
+ .dcompact = NULL,
932
+ .reserved = {0},
933
+ },
934
+ .parent = NULL,
935
+ .data = NULL,
936
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
937
+ };
938
+
939
+ static VALUE frb_tde_alloc(VALUE rclass) {
940
+ FrtTermDocEnum *tde = FRT_ALLOC_AND_ZERO(FrtTermDocEnum);
941
+ return TypedData_Wrap_Struct(rclass, &frb_term_doc_enum_t, tde);
942
+ }
943
+
944
+ static VALUE frb_get_tde(VALUE rir, FrtTermDocEnum *tde) {
945
+ VALUE self = TypedData_Wrap_Struct(cTermDocEnum, &frb_term_doc_enum_t, tde);
918
946
  rb_ivar_set(self, id_fld_num_map, rb_ivar_get(rir, id_fld_num_map));
919
947
  return self;
920
948
  }
@@ -927,9 +955,7 @@ frb_get_tde(VALUE rir, FrtTermDocEnum *tde)
927
955
  * you can call next or each to skip through the documents and positions of
928
956
  * this particular term.
929
957
  */
930
- static VALUE
931
- frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm)
932
- {
958
+ static VALUE frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm) {
933
959
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
934
960
  char *term;
935
961
  VALUE rfnum_map = rb_ivar_get(self, id_fld_num_map);
@@ -939,8 +965,7 @@ frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm)
939
965
  if (rfnum != Qnil) {
940
966
  field_num = FIX2INT(rfnum);
941
967
  } else {
942
- rb_raise(rb_eArgError, "field %s doesn't exist in the index",
943
- rb_id2name(frb_field(rfield)));
968
+ rb_raise(rb_eArgError, "field %s doesn't exist in the index", rb_id2name(frb_field(rfield)));
944
969
  }
945
970
  tde->seek(tde, field_num, term);
946
971
  return self;
@@ -958,9 +983,7 @@ frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm)
958
983
  * However the +seek_term_enum+ method saves an index lookup so should offer
959
984
  * a large performance improvement.
960
985
  */
961
- static VALUE
962
- frb_tde_seek_te(VALUE self, VALUE rterm_enum)
963
- {
986
+ static VALUE frb_tde_seek_te(VALUE self, VALUE rterm_enum) {
964
987
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
965
988
  FrtTermEnum *te = (FrtTermEnum *)frb_rb_data_ptr(rterm_enum);
966
989
  tde->seek_te(tde, te);
@@ -973,9 +996,7 @@ frb_tde_seek_te(VALUE self, VALUE rterm_enum)
973
996
  *
974
997
  * Returns the current document number pointed to by the +term_doc_enum+.
975
998
  */
976
- static VALUE
977
- frb_tde_doc(VALUE self)
978
- {
999
+ static VALUE frb_tde_doc(VALUE self) {
979
1000
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
980
1001
  return INT2FIX(tde->doc_num(tde));
981
1002
  }
@@ -987,9 +1008,7 @@ frb_tde_doc(VALUE self)
987
1008
  * Returns the frequency of the current document pointed to by the
988
1009
  * +term_doc_enum+.
989
1010
  */
990
- static VALUE
991
- frb_tde_freq(VALUE self)
992
- {
1011
+ static VALUE frb_tde_freq(VALUE self) {
993
1012
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
994
1013
  return INT2FIX(tde->freq(tde));
995
1014
  }
@@ -1001,9 +1020,7 @@ frb_tde_freq(VALUE self)
1001
1020
  * Move forward to the next document in the enumeration. Returns +true+ if
1002
1021
  * there is another document or +false+ otherwise.
1003
1022
  */
1004
- static VALUE
1005
- frb_tde_next(VALUE self)
1006
- {
1023
+ static VALUE frb_tde_next(VALUE self) {
1007
1024
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1008
1025
  return tde->next(tde) ? Qtrue : Qfalse;
1009
1026
  }
@@ -1015,9 +1032,7 @@ frb_tde_next(VALUE self)
1015
1032
  * Move forward to the next document in the enumeration. Returns +true+ if
1016
1033
  * there is another document or +false+ otherwise.
1017
1034
  */
1018
- static VALUE
1019
- frb_tde_next_position(VALUE self)
1020
- {
1035
+ static VALUE frb_tde_next_position(VALUE self) {
1021
1036
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1022
1037
  int pos;
1023
1038
  if (tde->next_position == NULL) {
@@ -1039,9 +1054,7 @@ frb_tde_next_position(VALUE self)
1039
1054
  * NOTE: this method can only be called once after each seek. If you need to
1040
1055
  * call +#each+ again then you should call +#seek+ again too.
1041
1056
  */
1042
- static VALUE
1043
- frb_tde_each(VALUE self)
1044
- {
1057
+ static VALUE frb_tde_each(VALUE self) {
1045
1058
  int doc_cnt = 0;
1046
1059
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1047
1060
  VALUE vals = rb_ary_new2(2);
@@ -1083,9 +1096,7 @@ frb_tde_each(VALUE self)
1083
1096
  * # [30,3]
1084
1097
  * # ]
1085
1098
  */
1086
- static VALUE
1087
- frb_tde_to_json(int argc, VALUE *argv, VALUE self)
1088
- {
1099
+ static VALUE frb_tde_to_json(int argc, VALUE *argv, VALUE self) {
1089
1100
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1090
1101
  VALUE rjson;
1091
1102
  char *json, *jp;
@@ -1157,9 +1168,7 @@ frb_tde_to_json(int argc, VALUE *argv, VALUE self)
1157
1168
  * puts " #{positions.join(', ')}"
1158
1169
  * end
1159
1170
  */
1160
- static VALUE
1161
- frb_tde_each_position(VALUE self)
1162
- {
1171
+ static VALUE frb_tde_each_position(VALUE self) {
1163
1172
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1164
1173
  int pos;
1165
1174
  if (tde->next_position == NULL) {
@@ -1180,9 +1189,7 @@ frb_tde_each_position(VALUE self)
1180
1189
  * Skip to the required document number +target+ and return true if there is
1181
1190
  * a document >= +target+.
1182
1191
  */
1183
- static VALUE
1184
- frb_tde_skip_to(VALUE self, VALUE rtarget)
1185
- {
1192
+ static VALUE frb_tde_skip_to(VALUE self, VALUE rtarget) {
1186
1193
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1187
1194
  return tde->skip_to(tde, FIX2INT(rtarget)) ? Qtrue : Qfalse;
1188
1195
  }
@@ -1193,9 +1200,7 @@ frb_tde_skip_to(VALUE self, VALUE rtarget)
1193
1200
  *
1194
1201
  ****************************************************************************/
1195
1202
 
1196
- static VALUE
1197
- frb_get_tv_offsets(FrtOffset *offset)
1198
- {
1203
+ static VALUE frb_get_tv_offsets(FrtOffset *offset) {
1199
1204
  return rb_struct_new(cTVOffsets,
1200
1205
  ULL2NUM((frt_u64)offset->start),
1201
1206
  ULL2NUM((frt_u64)offset->end),
@@ -1208,9 +1213,7 @@ frb_get_tv_offsets(FrtOffset *offset)
1208
1213
  *
1209
1214
  ****************************************************************************/
1210
1215
 
1211
- static VALUE
1212
- frb_get_tv_term(FrtTVTerm *tv_term)
1213
- {
1216
+ static VALUE frb_get_tv_term(FrtTVTerm *tv_term) {
1214
1217
  int i;
1215
1218
  const int freq = tv_term->freq;
1216
1219
  VALUE rtext;
@@ -1232,9 +1235,7 @@ frb_get_tv_term(FrtTVTerm *tv_term)
1232
1235
  *
1233
1236
  ****************************************************************************/
1234
1237
 
1235
- static VALUE
1236
- frb_get_tv(FrtTermVector *tv)
1237
- {
1238
+ static VALUE frb_get_tv(FrtTermVector *tv) {
1238
1239
  int i;
1239
1240
  FrtTVTerm *terms = tv->terms;
1240
1241
  const int t_cnt = tv->term_cnt;
@@ -1265,19 +1266,18 @@ frb_get_tv(FrtTermVector *tv)
1265
1266
  *
1266
1267
  ****************************************************************************/
1267
1268
 
1268
- void
1269
- frb_iw_free(void *p)
1270
- {
1269
+ void frb_iw_free(void *p) {
1271
1270
  frt_iw_close((FrtIndexWriter *)p);
1272
1271
  }
1273
1272
 
1274
- void
1275
- frb_iw_mark(void *p)
1276
- {
1273
+ void frb_iw_mark(void *p) {
1277
1274
  FrtIndexWriter *iw = (FrtIndexWriter *)p;
1278
- frb_gc_mark(iw->analyzer);
1279
- frb_gc_mark(iw->store);
1280
- frb_gc_mark(iw->fis);
1275
+ if (iw->analyzer->ranalyzer)
1276
+ rb_gc_mark(iw->analyzer->ranalyzer);
1277
+ if (iw->store->rstore)
1278
+ rb_gc_mark(iw->store->rstore);
1279
+ if (iw->fis->rfis)
1280
+ rb_gc_mark(iw->fis->rfis);
1281
1281
  }
1282
1282
 
1283
1283
  /*
@@ -1288,11 +1288,11 @@ frb_iw_mark(void *p)
1288
1288
  * exclusively by the index writer. The garbage collector will do this
1289
1289
  * automatically if not called explicitly.
1290
1290
  */
1291
- static VALUE
1292
- frb_iw_close(VALUE self)
1293
- {
1291
+ static VALUE frb_iw_close(VALUE self) {
1294
1292
  FrtIndexWriter *iw = (FrtIndexWriter *)DATA_PTR(self);
1295
- Frt_Unwrap_Struct(self);
1293
+ ((struct RData *)(self))->data = NULL;
1294
+ ((struct RData *)(self))->dmark = NULL;
1295
+ ((struct RData *)(self))->dfree = NULL;
1296
1296
  frt_iw_close(iw);
1297
1297
  return Qnil;
1298
1298
  }
@@ -1321,9 +1321,31 @@ frb_iw_close(VALUE self)
1321
1321
  *
1322
1322
  * See FrtIndexWriter for more options.
1323
1323
  */
1324
- static VALUE
1325
- frb_iw_init(int argc, VALUE *argv, VALUE self)
1326
- {
1324
+ static size_t frb_index_writer_t_size(const void *p) {
1325
+ return sizeof(FrtIndexWriter);
1326
+ (void)p;
1327
+ }
1328
+
1329
+ const rb_data_type_t frb_index_writer_t = {
1330
+ .wrap_struct_name = "FrbIndexWriter",
1331
+ .function = {
1332
+ .dmark = frb_iw_mark,
1333
+ .dfree = frb_iw_free,
1334
+ .dsize = frb_index_writer_t_size,
1335
+ .dcompact = NULL,
1336
+ .reserved = {0},
1337
+ },
1338
+ .parent = NULL,
1339
+ .data = NULL,
1340
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
1341
+ };
1342
+
1343
+ static VALUE frb_iw_alloc(VALUE rclass) {
1344
+ FrtIndexWriter *iw = frt_iw_alloc();
1345
+ return TypedData_Wrap_Struct(rclass, &frb_index_writer_t, iw);
1346
+ }
1347
+
1348
+ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
1327
1349
  VALUE roptions, rval;
1328
1350
  bool create = false;
1329
1351
  bool create_if_missing = true;
@@ -1341,7 +1363,7 @@ frb_iw_init(int argc, VALUE *argv, VALUE self)
1341
1363
  Check_Type(roptions, T_HASH);
1342
1364
 
1343
1365
  if ((rval = rb_hash_aref(roptions, sym_dir)) != Qnil) {
1344
- Check_Type(rval, T_DATA);
1366
+ // Check_Type(rval, T_DATA);
1345
1367
  store = DATA_PTR(rval);
1346
1368
  } else if ((rval = rb_hash_aref(roptions, sym_path)) != Qnil) {
1347
1369
  StringValue(rval);
@@ -1349,17 +1371,9 @@ frb_iw_init(int argc, VALUE *argv, VALUE self)
1349
1371
  store = frt_open_fs_store(rs2s(rval));
1350
1372
  FRT_DEREF(store);
1351
1373
  }
1352
-
1353
- /* Let ruby's garbage collector handle the closing of the store
1354
- if (!close_dir) {
1355
- close_dir = RTEST(rb_hash_aref(roptions, sym_close_dir));
1356
- }
1357
- */
1358
1374
  /* use_compound_file defaults to true */
1359
1375
  config.use_compound_file =
1360
- (rb_hash_aref(roptions, sym_use_compound_file) == Qfalse)
1361
- ? false
1362
- : true;
1376
+ (rb_hash_aref(roptions, sym_use_compound_file) == Qfalse) ? false : true;
1363
1377
 
1364
1378
  if ((rval = rb_hash_aref(roptions, sym_analyzer)) != Qnil) {
1365
1379
  analyzer = frb_get_cwrapped_analyzer(rval);
@@ -1379,7 +1393,7 @@ frb_iw_init(int argc, VALUE *argv, VALUE self)
1379
1393
  SET_INT_ATTR(max_field_length);
1380
1394
  }
1381
1395
  if (NULL == store) {
1382
- store = frt_open_ram_store();
1396
+ store = frt_open_ram_store(NULL);
1383
1397
  FRT_DEREF(store);
1384
1398
  }
1385
1399
  if (!create && create_if_missing && !store->exists(store, "segments")) {
@@ -1388,26 +1402,29 @@ frb_iw_init(int argc, VALUE *argv, VALUE self)
1388
1402
  if (create) {
1389
1403
  FrtFieldInfos *fis;
1390
1404
  if ((rval = rb_hash_aref(roptions, sym_field_infos)) != Qnil) {
1391
- Data_Get_Struct(rval, FrtFieldInfos, fis);
1405
+ TypedData_Get_Struct(rval, FrtFieldInfos, &frb_field_infos_t, fis);
1392
1406
  frt_index_create(store, fis);
1393
1407
  } else {
1394
- fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES,
1395
- FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
1408
+ fis = frt_fis_new(FRT_STORE_YES, FRT_COMPRESSION_NONE, FRT_INDEX_YES, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
1396
1409
  frt_index_create(store, fis);
1397
1410
  frt_fis_deref(fis);
1398
1411
  }
1399
1412
  }
1400
1413
 
1401
- iw = frt_iw_open(store, analyzer, &config);
1402
-
1403
- Frt_Wrap_Struct(self, &frb_iw_mark, &frb_iw_free, iw);
1404
- default:
1414
+ TypedData_Get_Struct(self, FrtIndexWriter, &frb_index_writer_t, iw);
1415
+ iw = frt_iw_open(iw, store, analyzer, &config);
1416
+ FRT_XCATCHALL
1405
1417
  ex_code = xcontext.excode;
1406
1418
  msg = xcontext.msg;
1407
1419
  FRT_HANDLED();
1408
1420
  FRT_XENDTRY
1409
1421
 
1410
- if (ex_code && msg) { frb_raise(ex_code, msg); }
1422
+ if (ex_code && msg) {
1423
+ ((struct RData *)(self))->data = NULL;
1424
+ ((struct RData *)(self))->dmark = NULL;
1425
+ ((struct RData *)(self))->dfree = NULL;
1426
+ frb_raise(ex_code, msg);
1427
+ }
1411
1428
 
1412
1429
  if (rb_block_given_p()) {
1413
1430
  rb_yield(self);
@@ -1439,7 +1456,7 @@ frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
1439
1456
  return ST_CONTINUE;
1440
1457
  } else {
1441
1458
  FrtDocument *doc = (FrtDocument *)arg;
1442
- FrtSymbol field = frb_field(key);
1459
+ ID field = frb_field(key);
1443
1460
  VALUE val;
1444
1461
  FrtDocField *df;
1445
1462
  if (NULL == (df = frt_doc_get_field(doc, field))) {
@@ -1455,17 +1472,17 @@ frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
1455
1472
  df->destroy_data = true;
1456
1473
  for (i = 0; i < RARRAY_LEN(value); i++) {
1457
1474
  val = rb_obj_as_string(RARRAY_PTR(value)[i]);
1458
- frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val));
1475
+ frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1459
1476
  }
1460
1477
  }
1461
1478
  break;
1462
1479
  case T_STRING:
1463
- frt_df_add_data_len(df, rs2s(value), RSTRING_LEN(value));
1480
+ frt_df_add_data_len(df, rs2s(value), RSTRING_LEN(value), rb_enc_get(value));
1464
1481
  break;
1465
1482
  default:
1466
1483
  val = rb_obj_as_string(value);
1467
1484
  df->destroy_data = true;
1468
- frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val));
1485
+ frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1469
1486
  break;
1470
1487
  }
1471
1488
  frt_doc_add_field(doc, df);
@@ -1495,25 +1512,23 @@ frb_get_doc(VALUE rdoc)
1495
1512
  df->destroy_data = true;
1496
1513
  for (i = 0; i < RARRAY_LEN(rdoc); i++) {
1497
1514
  val = rb_obj_as_string(RARRAY_PTR(rdoc)[i]);
1498
- frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val));
1515
+ frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1499
1516
  }
1500
1517
  frt_doc_add_field(doc, df);
1501
1518
  }
1502
1519
  break;
1503
1520
  case T_SYMBOL:
1504
1521
  /* TODO: clean up this ugly cast */
1505
- df = frt_df_add_data(frt_df_new(fsym_content), (char *)rb_id2name(SYM2ID(rdoc)));
1522
+ df = frt_df_add_data(frt_df_new(fsym_content), (char *)rb_id2name(SYM2ID(rdoc)), rb_enc_get(rdoc));
1506
1523
  frt_doc_add_field(doc, df);
1507
1524
  break;
1508
1525
  case T_STRING:
1509
- df = frt_df_add_data_len(frt_df_new(fsym_content), rs2s(rdoc),
1510
- RSTRING_LEN(rdoc));
1526
+ df = frt_df_add_data_len(frt_df_new(fsym_content), rs2s(rdoc), RSTRING_LEN(rdoc), rb_enc_get(rdoc));
1511
1527
  frt_doc_add_field(doc, df);
1512
1528
  break;
1513
1529
  default:
1514
1530
  val = rb_obj_as_string(rdoc);
1515
- df = frt_df_add_data_len(frt_df_new(fsym_content), rstrdup(val),
1516
- RSTRING_LEN(val));
1531
+ df = frt_df_add_data_len(frt_df_new(fsym_content), rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1517
1532
  df->destroy_data = true;
1518
1533
  frt_doc_add_field(doc, df);
1519
1534
  break;
@@ -1575,6 +1590,48 @@ frb_iw_commit(VALUE self)
1575
1590
  return self;
1576
1591
  }
1577
1592
 
1593
+ /* index reader intermission */
1594
+ static VALUE frb_ir_close(VALUE self);
1595
+
1596
+ void frb_ir_free(void *p) {
1597
+ frt_ir_close((FrtIndexReader *)p);
1598
+ }
1599
+
1600
+ void frb_ir_mark(void *p) {
1601
+ FrtIndexReader *ir = (FrtIndexReader *)p;
1602
+ FrtMultiReader *mr = (FrtMultiReader *)p;
1603
+
1604
+ if (ir->type == FRT_MULTI_READER) {
1605
+ int i;
1606
+ for (i = 0; i < mr->r_cnt; i++) {
1607
+ if (mr->sub_readers[i]->rir)
1608
+ rb_gc_mark(mr->sub_readers[i]->rir);
1609
+ }
1610
+ } else {
1611
+ if (ir->store && ir->store->rstore)
1612
+ rb_gc_mark(ir->store->rstore);
1613
+ }
1614
+ }
1615
+
1616
+ static size_t frb_index_reader_t_size(const void *p) {
1617
+ return sizeof(FrtMultiReader);
1618
+ (void)p;
1619
+ }
1620
+
1621
+ const rb_data_type_t frb_index_reader_t = {
1622
+ .wrap_struct_name = "FrbIndexReader",
1623
+ .function = {
1624
+ .dmark = frb_ir_mark,
1625
+ .dfree = frb_ir_free,
1626
+ .dsize = frb_index_reader_t_size,
1627
+ .dcompact = NULL,
1628
+ .reserved = {0},
1629
+ },
1630
+ .parent = NULL,
1631
+ .data = NULL,
1632
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
1633
+ };
1634
+
1578
1635
  /*
1579
1636
  * call-seq:
1580
1637
  * iw.add_readers(reader_array) -> iw
@@ -1585,9 +1642,7 @@ frb_iw_commit(VALUE self)
1585
1642
  * machines. Then you can finish by merging all of the indexes into a single
1586
1643
  * index.
1587
1644
  */
1588
- static VALUE
1589
- frb_iw_add_readers(VALUE self, VALUE rreaders)
1590
- {
1645
+ static VALUE frb_iw_add_readers(VALUE self, VALUE rreaders) {
1591
1646
  FrtIndexWriter *iw = (FrtIndexWriter *)DATA_PTR(self);
1592
1647
  int i;
1593
1648
  FrtIndexReader **irs;
@@ -1597,7 +1652,7 @@ frb_iw_add_readers(VALUE self, VALUE rreaders)
1597
1652
  i = RARRAY_LEN(rreaders);
1598
1653
  while (i-- > 0) {
1599
1654
  FrtIndexReader *ir;
1600
- Data_Get_Struct(RARRAY_PTR(rreaders)[i], FrtIndexReader, ir);
1655
+ TypedData_Get_Struct(RARRAY_PTR(rreaders)[i], FrtIndexReader, &frb_index_reader_t, ir);
1601
1656
  irs[i] = ir;
1602
1657
  }
1603
1658
  frt_iw_add_readers(iw, irs, RARRAY_LEN(rreaders));
@@ -1943,26 +1998,50 @@ frb_iw_set_use_compound_file(VALUE self, VALUE rval)
1943
1998
  *
1944
1999
  ****************************************************************************/
1945
2000
 
1946
- static void
1947
- frb_lzd_data_free(void *p)
1948
- {
2001
+ static void frb_lzd_data_free(void *p) {
1949
2002
  frt_lazy_doc_close((FrtLazyDoc *)p);
1950
2003
  }
1951
2004
 
1952
- static VALUE
1953
- frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df)
1954
- {
2005
+ static size_t frb_lazy_doc_size(const void *p) {
2006
+ return sizeof(FrtLazyDoc);
2007
+ (void)p;
2008
+ }
2009
+
2010
+ const rb_data_type_t frb_lazy_doc_t = {
2011
+ .wrap_struct_name = "FrbLazyDoc",
2012
+ .function = {
2013
+ .dmark = NULL,
2014
+ .dfree = frb_lzd_data_free,
2015
+ .dsize = frb_lazy_doc_size,
2016
+ .dcompact = NULL,
2017
+ .reserved = {0},
2018
+ },
2019
+ .parent = NULL,
2020
+ .data = NULL,
2021
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
2022
+ };
2023
+
2024
+ static VALUE frb_lzd_alloc(VALUE klass) {
2025
+ FrtLazyDoc *ld = FRT_ALLOC(FrtLazyDoc);
2026
+ return TypedData_Wrap_Struct(klass, &frb_lazy_doc_t, ld);
2027
+ }
2028
+
2029
+ static VALUE frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
1955
2030
  VALUE rdata = Qnil;
1956
2031
  if (lazy_df) {
1957
2032
  if (lazy_df->size == 1) {
1958
2033
  char *data = frt_lazy_df_get_data(lazy_df, 0);
1959
- rdata = rb_str_new(data, lazy_df->len);
2034
+ rdata = rb_str_new(data, lazy_df->data[0].length);
2035
+ rb_enc_associate(rdata, lazy_df->data[0].encoding);
1960
2036
  } else {
1961
2037
  int i;
2038
+ VALUE rstr;
1962
2039
  rdata = rb_ary_new2(lazy_df->size);
1963
2040
  for (i = 0; i < lazy_df->size; i++) {
1964
2041
  char *data = frt_lazy_df_get_data(lazy_df, i);
1965
- rb_ary_store(rdata, i, rb_str_new(data, lazy_df->data[i].length));
2042
+ rstr = rb_str_new(data, lazy_df->data[i].length);
2043
+ rb_enc_associate(rstr, lazy_df->data[i].encoding);
2044
+ rb_ary_store(rdata, i, rstr);
1966
2045
  }
1967
2046
  }
1968
2047
  rb_hash_aset(self, rkey, rdata);
@@ -1977,11 +2056,9 @@ frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df)
1977
2056
  * This method is used internally to lazily load fields. You should never
1978
2057
  * really need to call it yourself.
1979
2058
  */
1980
- static VALUE
1981
- frb_lzd_default(VALUE self, VALUE rkey)
1982
- {
2059
+ static VALUE frb_lzd_default(VALUE self, VALUE rkey) {
1983
2060
  FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
1984
- FrtSymbol field = frb_field(rkey);
2061
+ ID field = frb_field(rkey);
1985
2062
  VALUE rfield = ID2SYM(field);
1986
2063
 
1987
2064
  return frb_lazy_df_load(self, rfield, frt_lazy_doc_get(lazy_doc, field));
@@ -1995,9 +2072,7 @@ frb_lzd_default(VALUE self, VALUE rkey)
1995
2072
  * to access any of these fields in the document the field will be loaded.
1996
2073
  * Try to access any other field an nil will be returned.
1997
2074
  */
1998
- static VALUE
1999
- frb_lzd_fields(VALUE self)
2000
- {
2075
+ static VALUE frb_lzd_fields(VALUE self) {
2001
2076
  return rb_ivar_get(self, id_fields);
2002
2077
  }
2003
2078
 
@@ -2007,9 +2082,7 @@ frb_lzd_fields(VALUE self)
2007
2082
  *
2008
2083
  * Load all unloaded fields in the document from the index.
2009
2084
  */
2010
- static VALUE
2011
- frb_lzd_load(VALUE self)
2012
- {
2085
+ static VALUE frb_lzd_load(VALUE self) {
2013
2086
  FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
2014
2087
  int i;
2015
2088
  for (i = 0; i < lazy_doc->size; i++) {
@@ -2019,9 +2092,7 @@ frb_lzd_load(VALUE self)
2019
2092
  return self;
2020
2093
  }
2021
2094
 
2022
- VALUE
2023
- frb_get_lazy_doc(FrtLazyDoc *lazy_doc)
2024
- {
2095
+ VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc) {
2025
2096
  int i;
2026
2097
  VALUE rfields = rb_ary_new2(lazy_doc->size);
2027
2098
 
@@ -2029,7 +2100,7 @@ frb_get_lazy_doc(FrtLazyDoc *lazy_doc)
2029
2100
  self = rb_hash_new();
2030
2101
  OBJSETUP(self, cLazyDoc, T_HASH);
2031
2102
 
2032
- rdata = Data_Wrap_Struct(cLazyDocData, NULL, &frb_lzd_data_free, lazy_doc);
2103
+ rdata = TypedData_Wrap_Struct(cLazyDocData, &frb_lazy_doc_t, lazy_doc);
2033
2104
  rb_ivar_set(self, id_data, rdata);
2034
2105
 
2035
2106
  for (i = 0; i < lazy_doc->size; i++) {
@@ -2046,32 +2117,6 @@ frb_get_lazy_doc(FrtLazyDoc *lazy_doc)
2046
2117
  *
2047
2118
  ****************************************************************************/
2048
2119
 
2049
- void
2050
- frb_ir_free(void *p)
2051
- {
2052
- object_del(p);
2053
- frt_ir_close((FrtIndexReader *)p);
2054
- }
2055
-
2056
- void
2057
- frb_ir_mark(void *p)
2058
- {
2059
- FrtIndexReader *ir = (FrtIndexReader *)p;
2060
- frb_gc_mark(ir->store);
2061
- }
2062
-
2063
- static VALUE frb_ir_close(VALUE self);
2064
-
2065
- void
2066
- frb_mr_mark(void *p)
2067
- {
2068
- FrtMultiReader *mr = (FrtMultiReader *)p;
2069
- int i;
2070
- for (i = 0; i < mr->r_cnt; i++) {
2071
- frb_gc_mark(mr->sub_readers[i]);
2072
- }
2073
- }
2074
-
2075
2120
  /*
2076
2121
  * call-seq:
2077
2122
  * IndexReader.new(dir) -> index_reader
@@ -2098,9 +2143,15 @@ frb_mr_mark(void *p)
2098
2143
  *
2099
2144
  * iw = IndexReader.new(["/path/to/index1", "/path/to/index2"])
2100
2145
  */
2101
- static VALUE
2102
- frb_ir_init(VALUE self, VALUE rdir)
2103
- {
2146
+
2147
+ static VALUE frb_ir_alloc(VALUE rclass) {
2148
+ // allocate for FrtSegmentReader, the largest of the Frt*Reader structs,
2149
+ // FrtIndexReader is part of it and later on its determined what its going to be
2150
+ FrtIndexReader *ir = (FrtIndexReader *)frt_sr_alloc();
2151
+ return TypedData_Wrap_Struct(rclass, &frb_index_reader_t, ir);
2152
+ }
2153
+
2154
+ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
2104
2155
  FrtStore *store = NULL;
2105
2156
  FrtIndexReader *ir;
2106
2157
  int i;
@@ -2120,7 +2171,7 @@ frb_ir_init(VALUE self, VALUE rdir)
2120
2171
  switch (TYPE(rdir)) {
2121
2172
  case T_DATA:
2122
2173
  if (CLASS_OF(rdir) == cIndexReader) {
2123
- Data_Get_Struct(rdir, FrtIndexReader, sub_readers[i]);
2174
+ TypedData_Get_Struct(rdir, FrtIndexReader, &frb_index_reader_t, sub_readers[i]);
2124
2175
  FRT_REF(sub_readers[i]);
2125
2176
  continue;
2126
2177
  } else if (RTEST(rb_obj_is_kind_of(rdir, cDirectory))) {
@@ -2145,10 +2196,10 @@ frb_ir_init(VALUE self, VALUE rdir)
2145
2196
  rs2s(rb_obj_as_string(rdir)));
2146
2197
  break;
2147
2198
  }
2148
- sub_readers[i] = frt_ir_open(store);
2199
+ sub_readers[i] = frt_ir_open(NULL, store);
2149
2200
  }
2150
- ir = frt_mr_open(sub_readers, reader_cnt);
2151
- Frt_Wrap_Struct(self, &frb_mr_mark, &frb_ir_free, ir);
2201
+ TypedData_Get_Struct(self, FrtIndexReader, &frb_index_reader_t, ir);
2202
+ ir = frt_mr_open(ir, sub_readers, reader_cnt);
2152
2203
  } else {
2153
2204
  switch (TYPE(rdir)) {
2154
2205
  case T_DATA:
@@ -2165,25 +2216,28 @@ frb_ir_init(VALUE self, VALUE rdir)
2165
2216
  rs2s(rb_obj_as_string(rdir)));
2166
2217
  break;
2167
2218
  }
2168
- ir = frt_ir_open(store);
2169
- Frt_Wrap_Struct(self, &frb_ir_mark, &frb_ir_free, ir);
2219
+ TypedData_Get_Struct(self, FrtIndexReader, &frb_index_reader_t, ir);
2220
+ ir = frt_ir_open(ir, store);
2170
2221
  }
2171
- default:
2222
+ FRT_XCATCHALL
2172
2223
  ex_code = xcontext.excode;
2173
2224
  msg = xcontext.msg;
2174
2225
  FRT_HANDLED();
2175
2226
  FRT_XENDTRY
2176
2227
 
2177
- if (ex_code && msg) { frb_raise(ex_code, msg); }
2228
+ if (ex_code && msg) {
2229
+ ((struct RData *)(self))->data = NULL;
2230
+ ((struct RData *)(self))->dmark = NULL;
2231
+ ((struct RData *)(self))->dfree = NULL;
2232
+ frb_raise(ex_code, msg);
2233
+ }
2178
2234
 
2179
- object_add(ir, self);
2235
+ ir->rir = self;
2180
2236
 
2181
2237
  fis = ir->fis;
2182
2238
  for (i = 0; i < fis->size; i++) {
2183
2239
  FrtFieldInfo *fi = fis->fields[i];
2184
- rb_hash_aset(rfield_num_map,
2185
- ID2SYM(fi->name),
2186
- INT2FIX(fi->number));
2240
+ rb_hash_aset(rfield_num_map, ID2SYM(fi->name), INT2FIX(fi->number));
2187
2241
  }
2188
2242
  rb_ivar_set(self, id_fld_num_map, rfield_num_map);
2189
2243
 
@@ -2281,8 +2335,9 @@ static VALUE
2281
2335
  frb_ir_close(VALUE self)
2282
2336
  {
2283
2337
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2284
- object_del(ir);
2285
- Frt_Unwrap_Struct(self);
2338
+ ((struct RData *)(self))->data = NULL;
2339
+ ((struct RData *)(self))->dmark = NULL;
2340
+ ((struct RData *)(self))->dfree = NULL;
2286
2341
  frt_ir_close(ir);
2287
2342
  return self;
2288
2343
  }
@@ -2482,7 +2537,7 @@ frb_ir_term_vector(VALUE self, VALUE rdoc_id, VALUE rfield)
2482
2537
  static void
2483
2538
  frb_add_each_tv(void *key, void *value, void *rtvs)
2484
2539
  {
2485
- rb_hash_aset((VALUE)rtvs, ID2SYM((FrtSymbol)key), frb_get_tv(value));
2540
+ rb_hash_aset((VALUE)rtvs, ID2SYM((ID)key), frb_get_tv(value));
2486
2541
  }
2487
2542
 
2488
2543
  /*
@@ -2769,9 +2824,14 @@ frb_ir_version(VALUE self)
2769
2824
  * | | want to highlight matches.
2770
2825
  * | | or print match excerpts a la
2771
2826
  * | | Google search.
2827
+ * -------------|-------------------------|------------------------------
2828
+ * :compression | :no (default) | Don't compress stored field
2829
+ * | |
2830
+ * | :brotli | Compress field using Brotli
2772
2831
  * | |
2773
- * | :compressed | Store field in compressed
2774
- * | | format.
2832
+ * | :bz2 | Compress field using BZip2
2833
+ * | |
2834
+ * | :lz4 | Compress field using LZ4
2775
2835
  * -------------|-------------------------|------------------------------
2776
2836
  * :index | :no | Do not make this field
2777
2837
  * | | searchable.
@@ -2831,7 +2891,7 @@ frb_ir_version(VALUE self)
2831
2891
  * fi = FieldInfo.new(:created_on, :index => :untokenized_omit_norms,
2832
2892
  * :term_vector => :no)
2833
2893
  *
2834
- * fi = FieldInfo.new(:image, :store => :compressed, :index => :no,
2894
+ * fi = FieldInfo.new(:image, :store => :yes, :compression => :brotli, :index => :no,
2835
2895
  * :term_vector => :no)
2836
2896
  */
2837
2897
  static void
@@ -2841,8 +2901,11 @@ Init_FieldInfo(void)
2841
2901
  sym_index = ID2SYM(rb_intern("index"));
2842
2902
  sym_term_vector = ID2SYM(rb_intern("term_vector"));
2843
2903
 
2844
- sym_compress = ID2SYM(rb_intern("compress"));
2845
- sym_compressed = ID2SYM(rb_intern("compressed"));
2904
+ sym_brotli = ID2SYM(rb_intern("brotli"));
2905
+ sym_bz2 = ID2SYM(rb_intern("bz2"));
2906
+ sym_lz4 = ID2SYM(rb_intern("lz4"));
2907
+ // sym_level = ID2SYM(rb_intern("level"));
2908
+ sym_compression = ID2SYM(rb_intern("compression"));
2846
2909
 
2847
2910
  sym_untokenized = ID2SYM(rb_intern("untokenized"));
2848
2911
  sym_omit_norms = ID2SYM(rb_intern("omit_norms"));
@@ -2853,7 +2916,7 @@ Init_FieldInfo(void)
2853
2916
  sym_with_positions_offsets = ID2SYM(rb_intern("with_positions_offsets"));
2854
2917
 
2855
2918
  cFieldInfo = rb_define_class_under(mIndex, "FieldInfo", rb_cObject);
2856
- rb_define_alloc_func(cFieldInfo, frb_data_alloc);
2919
+ rb_define_alloc_func(cFieldInfo, frb_fi_alloc);
2857
2920
 
2858
2921
  rb_define_method(cFieldInfo, "initialize", frb_fi_init, -1);
2859
2922
  rb_define_method(cFieldInfo, "name", frb_fi_name, 0);
@@ -2897,7 +2960,7 @@ Init_FieldInfo(void)
2897
2960
  * field_infos.add_field(:created_on, :index => :untokenized_omit_norms,
2898
2961
  * :term_vector => :no)
2899
2962
  *
2900
- * field_infos.add_field(:image, :store => :compressed, :index => :no,
2963
+ * field_infos.add_field(:image, :store => :yes, :compression => :brotli, :index => :no,
2901
2964
  * :term_vector => :no)
2902
2965
  *
2903
2966
  * field_infos.create_index("/path/to/index")
@@ -2913,13 +2976,11 @@ Init_FieldInfo(void)
2913
2976
  * along. If you add a document to the index which has fields that the index
2914
2977
  * doesn't know about then the default properties are used for the new field.
2915
2978
  */
2916
- static void
2917
- Init_FieldInfos(void)
2918
- {
2979
+ static void Init_FieldInfos(void) {
2919
2980
  Init_FieldInfo();
2920
2981
 
2921
2982
  cFieldInfos = rb_define_class_under(mIndex, "FieldInfos", rb_cObject);
2922
- rb_define_alloc_func(cFieldInfos, frb_data_alloc);
2983
+ rb_define_alloc_func(cFieldInfos, frb_fis_alloc);
2923
2984
 
2924
2985
  rb_define_method(cFieldInfos, "initialize", frb_fis_init, -1);
2925
2986
  rb_define_method(cFieldInfos, "to_a", frb_fis_to_a, 0);
@@ -2958,12 +3019,11 @@ Init_FieldInfos(void)
2958
3019
  * end
2959
3020
  */
2960
3021
  static void
2961
- Init_TermEnum(void)
2962
- {
3022
+ Init_TermEnum(void) {
2963
3023
  id_term = rb_intern("@term");
2964
3024
 
2965
3025
  cTermEnum = rb_define_class_under(mIndex, "TermEnum", rb_cObject);
2966
- rb_define_alloc_func(cTermEnum, frb_data_alloc);
3026
+ rb_define_alloc_func(cTermEnum, frb_te_alloc);
2967
3027
 
2968
3028
  rb_define_method(cTermEnum, "next?", frb_te_next, 0);
2969
3029
  rb_define_method(cTermEnum, "term", frb_te_term, 0);
@@ -3007,14 +3067,12 @@ Init_TermEnum(void)
3007
3067
  * puts " #{positions.join(', ')}"
3008
3068
  * end
3009
3069
  */
3010
- static void
3011
- Init_TermDocEnum(void)
3012
- {
3070
+ static void Init_TermDocEnum(void) {
3013
3071
  id_fld_num_map = rb_intern("@field_num_map");
3014
3072
  id_field_num = rb_intern("@field_num");
3015
3073
 
3016
3074
  cTermDocEnum = rb_define_class_under(mIndex, "TermDocEnum", rb_cObject);
3017
- rb_define_alloc_func(cTermDocEnum, frb_data_alloc);
3075
+ rb_define_alloc_func(cTermDocEnum, frb_tde_alloc);
3018
3076
  rb_define_method(cTermDocEnum, "seek", frb_tde_seek, 2);
3019
3077
  rb_define_method(cTermDocEnum, "seek_term_enum", frb_tde_seek_te, 1);
3020
3078
  rb_define_method(cTermDocEnum, "doc", frb_tde_doc, 0);
@@ -3047,9 +3105,7 @@ cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
3047
3105
  *
3048
3106
  * See the Analysis module for more information on setting the offsets.
3049
3107
  */
3050
- static void
3051
- Init_TVOffsets(void)
3052
- {
3108
+ static void Init_TVOffsets(void) {
3053
3109
  const char *tv_offsets_class = "TVOffsets";
3054
3110
  /* rdochack
3055
3111
  cTVOffsets = rb_define_class_under(cTermVector, "TVOffsets", rb_cObject);
@@ -3245,112 +3301,80 @@ Init_TermVector(void)
3245
3301
  *
3246
3302
  * index_writer.delete(:id, "/path/to/indexed/file")
3247
3303
  */
3248
- void
3249
- Init_IndexWriter(void)
3250
- {
3304
+ void Init_IndexWriter(void) {
3251
3305
  id_boost = rb_intern("boost");
3252
3306
 
3253
- sym_create = ID2SYM(rb_intern("create"));
3254
- sym_create_if_missing = ID2SYM(rb_intern("create_if_missing"));
3255
- sym_field_infos = ID2SYM(rb_intern("field_infos"));
3307
+ sym_create = ID2SYM(rb_intern("create"));
3308
+ sym_create_if_missing = ID2SYM(rb_intern("create_if_missing"));
3309
+ sym_field_infos = ID2SYM(rb_intern("field_infos"));
3256
3310
 
3257
- sym_chunk_size = ID2SYM(rb_intern("chunk_size"));
3258
- sym_max_buffer_memory = ID2SYM(rb_intern("max_buffer_memory"));
3259
- sym_index_interval = ID2SYM(rb_intern("term_index_interval"));
3260
- sym_skip_interval = ID2SYM(rb_intern("doc_skip_interval"));
3261
- sym_merge_factor = ID2SYM(rb_intern("merge_factor"));
3262
- sym_max_buffered_docs = ID2SYM(rb_intern("max_buffered_docs"));
3263
- sym_max_merge_docs = ID2SYM(rb_intern("max_merge_docs"));
3264
- sym_max_field_length = ID2SYM(rb_intern("max_field_length"));
3265
- sym_use_compound_file = ID2SYM(rb_intern("use_compound_file"));
3311
+ sym_chunk_size = ID2SYM(rb_intern("chunk_size"));
3312
+ sym_max_buffer_memory = ID2SYM(rb_intern("max_buffer_memory"));
3313
+ sym_index_interval = ID2SYM(rb_intern("term_index_interval"));
3314
+ sym_skip_interval = ID2SYM(rb_intern("doc_skip_interval"));
3315
+ sym_merge_factor = ID2SYM(rb_intern("merge_factor"));
3316
+ sym_max_buffered_docs = ID2SYM(rb_intern("max_buffered_docs"));
3317
+ sym_max_merge_docs = ID2SYM(rb_intern("max_merge_docs"));
3318
+ sym_max_field_length = ID2SYM(rb_intern("max_field_length"));
3319
+ sym_use_compound_file = ID2SYM(rb_intern("use_compound_file"));
3266
3320
 
3267
3321
  cIndexWriter = rb_define_class_under(mIndex, "IndexWriter", rb_cObject);
3268
- rb_define_alloc_func(cIndexWriter, frb_data_alloc);
3322
+ rb_define_alloc_func(cIndexWriter, frb_iw_alloc);
3269
3323
 
3270
3324
  rb_define_const(cIndexWriter, "WRITE_LOCK_TIMEOUT", INT2FIX(1));
3271
3325
  rb_define_const(cIndexWriter, "COMMIT_LOCK_TIMEOUT", INT2FIX(10));
3272
- rb_define_const(cIndexWriter, "WRITE_LOCK_NAME",
3273
- rb_str_new2(FRT_WRITE_LOCK_NAME));
3274
- rb_define_const(cIndexWriter, "COMMIT_LOCK_NAME",
3275
- rb_str_new2(FRT_COMMIT_LOCK_NAME));
3276
- rb_define_const(cIndexWriter, "DEFAULT_CHUNK_SIZE",
3277
- INT2FIX(frt_default_config.chunk_size));
3278
- rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFER_MEMORY",
3279
- INT2FIX(frt_default_config.max_buffer_memory));
3280
- rb_define_const(cIndexWriter, "DEFAULT_TERM_INDEX_INTERVAL",
3281
- INT2FIX(frt_default_config.index_interval));
3282
- rb_define_const(cIndexWriter, "DEFAULT_DOC_SKIP_INTERVAL",
3283
- INT2FIX(frt_default_config.skip_interval));
3284
- rb_define_const(cIndexWriter, "DEFAULT_MERGE_FACTOR",
3285
- INT2FIX(frt_default_config.merge_factor));
3286
- rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFERED_DOCS",
3287
- INT2FIX(frt_default_config.max_buffered_docs));
3288
- rb_define_const(cIndexWriter, "DEFAULT_MAX_MERGE_DOCS",
3289
- INT2FIX(frt_default_config.max_merge_docs));
3290
- rb_define_const(cIndexWriter, "DEFAULT_MAX_FIELD_LENGTH",
3291
- INT2FIX(frt_default_config.max_field_length));
3292
- rb_define_const(cIndexWriter, "DEFAULT_USE_COMPOUND_FILE",
3293
- frt_default_config.use_compound_file ? Qtrue : Qfalse);
3294
-
3295
- rb_define_method(cIndexWriter, "initialize", frb_iw_init, -1);
3296
- rb_define_method(cIndexWriter, "doc_count", frb_iw_get_doc_count, 0);
3297
- rb_define_method(cIndexWriter, "close", frb_iw_close, 0);
3298
- rb_define_method(cIndexWriter, "add_document", frb_iw_add_doc, 1);
3299
- rb_define_method(cIndexWriter, "<<", frb_iw_add_doc, 1);
3300
- rb_define_method(cIndexWriter, "optimize", frb_iw_optimize, 0);
3301
- rb_define_method(cIndexWriter, "commit", frb_iw_commit, 0);
3302
- rb_define_method(cIndexWriter, "add_readers", frb_iw_add_readers, 1);
3303
- rb_define_method(cIndexWriter, "delete", frb_iw_delete, 2);
3304
- rb_define_method(cIndexWriter, "field_infos", frb_iw_field_infos, 0);
3305
- rb_define_method(cIndexWriter, "analyzer", frb_iw_get_analyzer, 0);
3306
- rb_define_method(cIndexWriter, "analyzer=", frb_iw_set_analyzer, 1);
3307
- rb_define_method(cIndexWriter, "version", frb_iw_version, 0);
3308
-
3309
- rb_define_method(cIndexWriter, "chunk_size",
3310
- frb_iw_get_chunk_size, 0);
3311
- rb_define_method(cIndexWriter, "chunk_size=",
3312
- frb_iw_set_chunk_size, 1);
3313
-
3314
- rb_define_method(cIndexWriter, "max_buffer_memory",
3315
- frb_iw_get_max_buffer_memory, 0);
3316
- rb_define_method(cIndexWriter, "max_buffer_memory=",
3317
- frb_iw_set_max_buffer_memory, 1);
3318
-
3319
- rb_define_method(cIndexWriter, "term_index_interval",
3320
- frb_iw_get_index_interval, 0);
3321
- rb_define_method(cIndexWriter, "term_index_interval=",
3322
- frb_iw_set_index_interval, 1);
3323
-
3324
- rb_define_method(cIndexWriter, "doc_skip_interval",
3325
- frb_iw_get_skip_interval, 0);
3326
- rb_define_method(cIndexWriter, "doc_skip_interval=",
3327
- frb_iw_set_skip_interval, 1);
3328
-
3329
- rb_define_method(cIndexWriter, "merge_factor",
3330
- frb_iw_get_merge_factor, 0);
3331
- rb_define_method(cIndexWriter, "merge_factor=",
3332
- frb_iw_set_merge_factor, 1);
3333
-
3334
- rb_define_method(cIndexWriter, "max_buffered_docs",
3335
- frb_iw_get_max_buffered_docs, 0);
3336
- rb_define_method(cIndexWriter, "max_buffered_docs=",
3337
- frb_iw_set_max_buffered_docs, 1);
3338
-
3339
- rb_define_method(cIndexWriter, "max_merge_docs",
3340
- frb_iw_get_max_merge_docs, 0);
3341
- rb_define_method(cIndexWriter, "max_merge_docs=",
3342
- frb_iw_set_max_merge_docs, 1);
3343
-
3344
- rb_define_method(cIndexWriter, "max_field_length",
3345
- frb_iw_get_max_field_length, 0);
3346
- rb_define_method(cIndexWriter, "max_field_length=",
3347
- frb_iw_set_max_field_length, 1);
3348
-
3349
- rb_define_method(cIndexWriter, "use_compound_file",
3350
- frb_iw_get_use_compound_file, 0);
3351
- rb_define_method(cIndexWriter, "use_compound_file=",
3352
- frb_iw_set_use_compound_file, 1);
3326
+ rb_define_const(cIndexWriter, "WRITE_LOCK_NAME", rb_str_new2(FRT_WRITE_LOCK_NAME));
3327
+ rb_define_const(cIndexWriter, "COMMIT_LOCK_NAME", rb_str_new2(FRT_COMMIT_LOCK_NAME));
3328
+ rb_define_const(cIndexWriter, "DEFAULT_CHUNK_SIZE", INT2FIX(frt_default_config.chunk_size));
3329
+ rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFER_MEMORY", INT2FIX(frt_default_config.max_buffer_memory));
3330
+ rb_define_const(cIndexWriter, "DEFAULT_TERM_INDEX_INTERVAL", INT2FIX(frt_default_config.index_interval));
3331
+ rb_define_const(cIndexWriter, "DEFAULT_DOC_SKIP_INTERVAL", INT2FIX(frt_default_config.skip_interval));
3332
+ rb_define_const(cIndexWriter, "DEFAULT_MERGE_FACTOR", INT2FIX(frt_default_config.merge_factor));
3333
+ rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFERED_DOCS", INT2FIX(frt_default_config.max_buffered_docs));
3334
+ rb_define_const(cIndexWriter, "DEFAULT_MAX_MERGE_DOCS", INT2FIX(frt_default_config.max_merge_docs));
3335
+ rb_define_const(cIndexWriter, "DEFAULT_MAX_FIELD_LENGTH", INT2FIX(frt_default_config.max_field_length));
3336
+ rb_define_const(cIndexWriter, "DEFAULT_USE_COMPOUND_FILE", frt_default_config.use_compound_file ? Qtrue : Qfalse);
3337
+
3338
+ rb_define_method(cIndexWriter, "initialize", frb_iw_init, -1);
3339
+ rb_define_method(cIndexWriter, "doc_count", frb_iw_get_doc_count, 0);
3340
+ rb_define_method(cIndexWriter, "close", frb_iw_close, 0);
3341
+ rb_define_method(cIndexWriter, "add_document", frb_iw_add_doc, 1);
3342
+ rb_define_method(cIndexWriter, "<<", frb_iw_add_doc, 1);
3343
+ rb_define_method(cIndexWriter, "optimize", frb_iw_optimize, 0);
3344
+ rb_define_method(cIndexWriter, "commit", frb_iw_commit, 0);
3345
+ rb_define_method(cIndexWriter, "add_readers", frb_iw_add_readers, 1);
3346
+ rb_define_method(cIndexWriter, "delete", frb_iw_delete, 2);
3347
+ rb_define_method(cIndexWriter, "field_infos", frb_iw_field_infos, 0);
3348
+ rb_define_method(cIndexWriter, "analyzer", frb_iw_get_analyzer, 0);
3349
+ rb_define_method(cIndexWriter, "analyzer=", frb_iw_set_analyzer, 1);
3350
+ rb_define_method(cIndexWriter, "version", frb_iw_version, 0);
3351
+
3352
+ rb_define_method(cIndexWriter, "chunk_size", frb_iw_get_chunk_size, 0);
3353
+ rb_define_method(cIndexWriter, "chunk_size=", frb_iw_set_chunk_size, 1);
3354
+
3355
+ rb_define_method(cIndexWriter, "max_buffer_memory", frb_iw_get_max_buffer_memory, 0);
3356
+ rb_define_method(cIndexWriter, "max_buffer_memory=", frb_iw_set_max_buffer_memory, 1);
3357
+
3358
+ rb_define_method(cIndexWriter, "term_index_interval", frb_iw_get_index_interval, 0);
3359
+ rb_define_method(cIndexWriter, "term_index_interval=", frb_iw_set_index_interval, 1);
3360
+
3361
+ rb_define_method(cIndexWriter, "doc_skip_interval", frb_iw_get_skip_interval, 0);
3362
+ rb_define_method(cIndexWriter, "doc_skip_interval=", frb_iw_set_skip_interval, 1);
3353
3363
 
3364
+ rb_define_method(cIndexWriter, "merge_factor", frb_iw_get_merge_factor, 0);
3365
+ rb_define_method(cIndexWriter, "merge_factor=", frb_iw_set_merge_factor, 1);
3366
+
3367
+ rb_define_method(cIndexWriter, "max_buffered_docs", frb_iw_get_max_buffered_docs, 0);
3368
+ rb_define_method(cIndexWriter, "max_buffered_docs=", frb_iw_set_max_buffered_docs, 1);
3369
+
3370
+ rb_define_method(cIndexWriter, "max_merge_docs", frb_iw_get_max_merge_docs, 0);
3371
+ rb_define_method(cIndexWriter, "max_merge_docs=", frb_iw_set_max_merge_docs, 1);
3372
+
3373
+ rb_define_method(cIndexWriter, "max_field_length", frb_iw_get_max_field_length, 0);
3374
+ rb_define_method(cIndexWriter, "max_field_length=", frb_iw_set_max_field_length, 1);
3375
+
3376
+ rb_define_method(cIndexWriter, "use_compound_file", frb_iw_get_use_compound_file, 0);
3377
+ rb_define_method(cIndexWriter, "use_compound_file=", frb_iw_set_use_compound_file, 1);
3354
3378
  }
3355
3379
 
3356
3380
  /*
@@ -3383,18 +3407,16 @@ Init_IndexWriter(void)
3383
3407
  * doc.values #=> ["the title", "the content"]
3384
3408
  * doc.fields #=> [:title, :content]
3385
3409
  */
3386
- void
3387
- Init_LazyDoc(void)
3388
- {
3410
+ void Init_LazyDoc(void) {
3389
3411
  id_fields = rb_intern("@fields");
3390
3412
 
3391
3413
  cLazyDoc = rb_define_class_under(mIndex, "LazyDoc", rb_cHash);
3392
- rb_define_method(cLazyDoc, "default", frb_lzd_default, 1);
3393
- rb_define_method(cLazyDoc, "load", frb_lzd_load, 0);
3394
- rb_define_method(cLazyDoc, "fields", frb_lzd_fields, 0);
3414
+ rb_define_method(cLazyDoc, "default", frb_lzd_default, 1);
3415
+ rb_define_method(cLazyDoc, "load", frb_lzd_load, 0);
3416
+ rb_define_method(cLazyDoc, "fields", frb_lzd_fields, 0);
3395
3417
 
3396
3418
  cLazyDocData = rb_define_class_under(cLazyDoc, "LazyDocData", rb_cObject);
3397
- rb_define_alloc_func(cLazyDocData, frb_data_alloc);
3419
+ rb_define_alloc_func(cLazyDocData, frb_lzd_alloc);
3398
3420
  }
3399
3421
 
3400
3422
  /*
@@ -3407,41 +3429,39 @@ Init_LazyDoc(void)
3407
3429
  * index, accessing term-vectors or deleting documents by document id. It is
3408
3430
  * also used internally by IndexSearcher.
3409
3431
  */
3410
- void
3411
- Init_IndexReader(void)
3412
- {
3432
+ void Init_IndexReader(void) {
3413
3433
  cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
3414
- rb_define_alloc_func(cIndexReader, frb_data_alloc);
3415
- rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
3416
- rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
3417
- rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
3418
- rb_define_method(cIndexReader, "get_norms_into",frb_ir_get_norms_into, 3);
3419
- rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
3420
- rb_define_method(cIndexReader, "close", frb_ir_close, 0);
3421
- rb_define_method(cIndexReader, "has_deletions?",frb_ir_has_deletions, 0);
3422
- rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
3423
- rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
3424
- rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
3425
- rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
3426
- rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
3427
- rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
3428
- rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
3429
- rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
3430
- rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
3431
- rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
3432
- rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
3433
- rb_define_method(cIndexReader, "term_positions",frb_ir_term_positions, 0);
3434
- rb_define_method(cIndexReader, "term_docs_for", frb_ir_term_docs_for, 2);
3434
+ rb_define_alloc_func(cIndexReader, frb_ir_alloc);
3435
+ rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
3436
+ rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
3437
+ rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
3438
+ rb_define_method(cIndexReader, "get_norms_into", frb_ir_get_norms_into, 3);
3439
+ rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
3440
+ rb_define_method(cIndexReader, "close", frb_ir_close, 0);
3441
+ rb_define_method(cIndexReader, "has_deletions?", frb_ir_has_deletions, 0);
3442
+ rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
3443
+ rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
3444
+ rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
3445
+ rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
3446
+ rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
3447
+ rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
3448
+ rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
3449
+ rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
3450
+ rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
3451
+ rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
3452
+ rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
3453
+ rb_define_method(cIndexReader, "term_positions", frb_ir_term_positions, 0);
3454
+ rb_define_method(cIndexReader, "term_docs_for", frb_ir_term_docs_for, 2);
3435
3455
  rb_define_method(cIndexReader, "term_positions_for", frb_ir_t_pos_for, 2);
3436
- rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
3437
- rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
3438
- rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
3439
- rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
3440
- rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
3441
- rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
3442
- rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
3456
+ rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
3457
+ rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
3458
+ rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
3459
+ rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
3460
+ rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
3461
+ rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
3462
+ rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
3443
3463
  rb_define_method(cIndexReader, "tokenized_fields", frb_ir_tk_fields, 0);
3444
- rb_define_method(cIndexReader, "version", frb_ir_version, 0);
3464
+ rb_define_method(cIndexReader, "version", frb_ir_version, 0);
3445
3465
  }
3446
3466
 
3447
3467
  /* rdoc hack
@@ -3466,9 +3486,7 @@ extern VALUE mFerret = rb_define_module("Ferret");
3466
3486
  * building tag clouds, creating more-like-this queries, custom highlighting
3467
3487
  * etc. They are also useful for index browsers.
3468
3488
  */
3469
- void
3470
- Init_Index(void)
3471
- {
3489
+ void Init_Index(void) {
3472
3490
  mIndex = rb_define_module_under(mFerret, "Index");
3473
3491
 
3474
3492
  sym_boost = ID2SYM(rb_intern("boost"));