isomorfeus-ferret 0.12.7 → 0.13.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +85 -13
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
  9. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
  10. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  11. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  12. data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
  13. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  14. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  15. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  16. data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
  17. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  18. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  19. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  20. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  21. data/ext/isomorfeus_ferret_ext/frb_index.c +497 -495
  22. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  23. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  24. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  25. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  26. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  27. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  28. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  29. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  30. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  31. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  32. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  33. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
  34. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  42. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  43. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  44. data/ext/isomorfeus_ferret_ext/frt_index.c +603 -410
  45. data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
  46. data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
  47. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  48. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  49. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
  50. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  51. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  52. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  53. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  54. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  55. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  56. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  57. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  58. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  59. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  60. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  61. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  62. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
  63. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  64. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  65. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  66. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  67. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  68. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  69. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  70. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  71. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
  72. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  73. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  74. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  75. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  76. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  77. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  78. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  79. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  80. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  81. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  82. data/ext/isomorfeus_ferret_ext/test.c +0 -17
  83. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  84. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  85. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  86. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  87. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  88. data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
  89. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  90. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  91. data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
  92. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  93. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  94. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  95. data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
  96. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  97. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  98. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  99. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  100. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  101. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  102. data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
  103. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  104. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  105. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  106. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  107. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  108. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  109. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  110. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  111. data/lib/isomorfeus/ferret/version.rb +1 -1
  112. metadata +27 -57
  113. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  114. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  115. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  116. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  117. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  118. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  119. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  120. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  121. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  122. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  144. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  145. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  146. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  147. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  148. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  149. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  150. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  151. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  152. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  153. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  154. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  155. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  156. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  157. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  158. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  159. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  160. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  161. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  162. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  163. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  164. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  165. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  166. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -1,6 +1,8 @@
1
1
  #include "frt_index.h"
2
2
  #include "isomorfeus_ferret.h"
3
- #include <ruby/st.h>
3
+ #include <ruby.h>
4
+
5
+ #undef close
4
6
 
5
7
  VALUE mIndex;
6
8
 
@@ -41,8 +43,10 @@ static VALUE sym_store;
41
43
  static VALUE sym_index;
42
44
  static VALUE sym_term_vector;
43
45
 
44
- static VALUE sym_compress;
45
- static VALUE sym_compressed;
46
+ static VALUE sym_brotli;
47
+ static VALUE sym_bz2;
48
+ static VALUE sym_lz4;
49
+ static VALUE sym_compression;
46
50
 
47
51
  static VALUE sym_untokenized;
48
52
  static VALUE sym_omit_norms;
@@ -52,7 +56,7 @@ static VALUE sym_with_positions;
52
56
  static VALUE sym_with_offsets;
53
57
  static VALUE sym_with_positions_offsets;
54
58
 
55
- static FrtSymbol fsym_content;
59
+ static ID fsym_content;
56
60
 
57
61
  static ID id_term;
58
62
  static ID id_fields;
@@ -60,6 +64,7 @@ static ID id_fld_num_map;
60
64
  static ID id_field_num;
61
65
  static ID id_boost;
62
66
 
67
+ extern rb_encoding *utf8_encoding;
63
68
  extern void frb_set_term(VALUE rterm, FrtTerm *t);
64
69
  extern FrtAnalyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
65
70
  extern VALUE frb_get_analyzer(FrtAnalyzer *a);
@@ -70,20 +75,11 @@ extern VALUE frb_get_analyzer(FrtAnalyzer *a);
70
75
  *
71
76
  ****************************************************************************/
72
77
 
73
- static void
74
- frb_fi_free(void *p)
75
- {
76
- object_del(p);
78
+ static void frb_fi_free(void *p) {
77
79
  frt_fi_deref((FrtFieldInfo *)p);
78
80
  }
79
81
 
80
- static void
81
- frb_fi_get_params(VALUE roptions,
82
- FrtStoreValue *store,
83
- FrtIndexValue *index,
84
- FrtTermVectorValue *term_vector,
85
- float *boost)
86
- {
82
+ static void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost) {
87
83
  VALUE v;
88
84
  Check_Type(roptions, T_HASH);
89
85
  v = rb_hash_aref(roptions, sym_boost);
@@ -98,13 +94,27 @@ frb_fi_get_params(VALUE roptions,
98
94
  *store = FRT_STORE_NO;
99
95
  } else if (v == sym_yes || v == sym_true || v == Qtrue) {
100
96
  *store = FRT_STORE_YES;
101
- } else if (v == sym_compress || v == sym_compressed) {
102
- *store = FRT_STORE_COMPRESS;
103
97
  } else if (v == Qnil) {
104
98
  /* leave as default */
105
99
  } else {
106
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :store."
107
- " Please choose from [:yes, :no, :compressed]",
100
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]",
101
+ rb_id2name(SYM2ID(v)));
102
+ }
103
+
104
+ v = rb_hash_aref(roptions, sym_compression);
105
+ if (Qnil != v) Check_Type(v, T_SYMBOL);
106
+ if (v == sym_no || v == sym_false || v == Qfalse) {
107
+ *compression = FRT_COMPRESSION_NONE;
108
+ } else if (v == sym_yes || v == sym_true || v == Qtrue || v == sym_brotli) {
109
+ *compression = FRT_COMPRESSION_BROTLI;
110
+ } else if (v == sym_bz2) {
111
+ *compression = FRT_COMPRESSION_BZ2;
112
+ } else if (v == sym_lz4) {
113
+ *compression = FRT_COMPRESSION_LZ4;
114
+ } else if (v == Qnil) {
115
+ /* leave as default */
116
+ } else {
117
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]",
108
118
  rb_id2name(SYM2ID(v)));
109
119
  }
110
120
 
@@ -123,10 +133,8 @@ frb_fi_get_params(VALUE roptions,
123
133
  } else if (v == Qnil) {
124
134
  /* leave as default */
125
135
  } else {
126
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :index."
127
- " Please choose from [:no, :yes, :untokenized, "
128
- ":omit_norms, :untokenized_omit_norms]",
129
- rb_id2name(SYM2ID(v)));
136
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :index. Please choose from [:no, :yes, :untokenized, "
137
+ ":omit_norms, :untokenized_omit_norms]", rb_id2name(SYM2ID(v)));
130
138
  }
131
139
 
132
140
  v = rb_hash_aref(roptions, sym_term_vector);
@@ -144,28 +152,38 @@ frb_fi_get_params(VALUE roptions,
144
152
  } else if (v == Qnil) {
145
153
  /* leave as default */
146
154
  } else {
147
- rb_raise(rb_eArgError, ":%s isn't a valid argument for "
148
- ":term_vector. Please choose from [:no, :yes, "
149
- ":with_positions, :with_offsets, "
150
- ":with_positions_offsets]",
151
- rb_id2name(SYM2ID(v)));
155
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :term_vector. Please choose from [:no, :yes, "
156
+ ":with_positions, :with_offsets, :with_positions_offsets]", rb_id2name(SYM2ID(v)));
152
157
  }
153
158
  }
154
159
 
155
- static VALUE
156
- frb_get_field_info(FrtFieldInfo *fi)
157
- {
160
+ static size_t frb_fi_size(const void *p) {
161
+ return sizeof(FrtFieldInfo);
162
+ (void)p;
163
+ }
158
164
 
159
- VALUE rfi = Qnil;
165
+ const rb_data_type_t frb_field_info_t = {
166
+ .wrap_struct_name = "FrbFieldInfo",
167
+ .function = {
168
+ .dmark = NULL,
169
+ .dfree = frb_fi_free,
170
+ .dsize = frb_fi_size,
171
+ .dcompact = NULL,
172
+ .reserved = {0},
173
+ },
174
+ .parent = NULL,
175
+ .data = NULL,
176
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
177
+ };
178
+
179
+ static VALUE frb_get_field_info(FrtFieldInfo *fi) {
160
180
  if (fi) {
161
- rfi = object_get(fi);
162
- if (rfi == Qnil) {
163
- rfi = Data_Wrap_Struct(cFieldInfo, NULL, &frb_fi_free, fi);
181
+ if (fi->rfi == 0 || fi->rfi == Qnil) {
182
+ fi->rfi = TypedData_Wrap_Struct(cFieldInfo, &frb_field_info_t, fi);
164
183
  FRT_REF(fi);
165
- object_add(fi, rfi);
166
184
  }
167
185
  }
168
- return rfi;
186
+ return fi->rfi;
169
187
  }
170
188
 
171
189
  /*
@@ -173,28 +191,32 @@ frb_get_field_info(FrtFieldInfo *fi)
173
191
  * FieldInfo.new(name, options = {}) -> field_info
174
192
  *
175
193
  * Create a new FieldInfo object with the name +name+ and the properties
176
- * specified in +options+. The available options are [:store, :index,
177
- * :term_vector, :boost]. See the description of FieldInfo for more
194
+ * specified in +options+. The available options are [:store, :compression,
195
+ * :index, :term_vector, :boost]. See the description of FieldInfo for more
178
196
  * information on these properties.
179
197
  */
180
- static VALUE
181
- frb_fi_init(int argc, VALUE *argv, VALUE self)
182
- {
198
+ static VALUE frb_fi_alloc(VALUE rclass) {
199
+ FrtFieldInfo *fi = frt_fi_alloc();
200
+ return TypedData_Wrap_Struct(rclass, &frb_field_info_t, fi);
201
+ }
202
+
203
+ static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
183
204
  VALUE roptions, rname;
184
205
  FrtFieldInfo *fi;
206
+ TypedData_Get_Struct(self, FrtFieldInfo, &frb_field_info_t, fi);
185
207
  FrtStoreValue store = FRT_STORE_YES;
208
+ FrtCompressionType compression = FRT_COMPRESSION_NONE;
186
209
  FrtIndexValue index = FRT_INDEX_YES;
187
210
  FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
188
211
  float boost = 1.0f;
189
212
 
190
213
  rb_scan_args(argc, argv, "11", &rname, &roptions);
191
214
  if (argc > 1) {
192
- frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
215
+ frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
193
216
  }
194
- fi = frt_fi_new(frb_field(rname), store, index, term_vector);
217
+ fi = frt_fi_init(fi, frb_field(rname), store, compression, index, term_vector);
195
218
  fi->boost = boost;
196
- Frt_Wrap_Struct(self, NULL, &frb_fi_free, fi);
197
- object_add(fi, self);
219
+ fi->rfi = self;
198
220
  return self;
199
221
  }
200
222
 
@@ -204,9 +226,7 @@ frb_fi_init(int argc, VALUE *argv, VALUE self)
204
226
  *
205
227
  * Return the name of the field
206
228
  */
207
- static VALUE
208
- frb_fi_name(VALUE self)
209
- {
229
+ static VALUE frb_fi_name(VALUE self) {
210
230
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
211
231
  return rb_str_new_cstr(rb_id2name(fi->name));
212
232
  }
@@ -217,9 +237,7 @@ frb_fi_name(VALUE self)
217
237
  *
218
238
  * Return true if the field is stored in the index.
219
239
  */
220
- static VALUE
221
- frb_fi_is_stored(VALUE self)
222
- {
240
+ static VALUE frb_fi_is_stored(VALUE self) {
223
241
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
224
242
  return fi_is_stored(fi) ? Qtrue : Qfalse;
225
243
  }
@@ -230,9 +248,7 @@ frb_fi_is_stored(VALUE self)
230
248
  *
231
249
  * Return true if the field is stored in the index in compressed format.
232
250
  */
233
- static VALUE
234
- frb_fi_is_compressed(VALUE self)
235
- {
251
+ static VALUE frb_fi_is_compressed(VALUE self) {
236
252
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
237
253
  return fi_is_compressed(fi) ? Qtrue : Qfalse;
238
254
  }
@@ -243,9 +259,7 @@ frb_fi_is_compressed(VALUE self)
243
259
  *
244
260
  * Return true if the field is indexed, ie searchable in the index.
245
261
  */
246
- static VALUE
247
- frb_fi_is_indexed(VALUE self)
248
- {
262
+ static VALUE frb_fi_is_indexed(VALUE self) {
249
263
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
250
264
  return fi_is_indexed(fi) ? Qtrue : Qfalse;
251
265
  }
@@ -261,9 +275,7 @@ frb_fi_is_indexed(VALUE self)
261
275
  *
262
276
  * A field can only be tokenized if it is indexed.
263
277
  */
264
- static VALUE
265
- frb_fi_is_tokenized(VALUE self)
266
- {
278
+ static VALUE frb_fi_is_tokenized(VALUE self) {
267
279
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
268
280
  return fi_is_tokenized(fi) ? Qtrue : Qfalse;
269
281
  }
@@ -279,9 +291,7 @@ frb_fi_is_tokenized(VALUE self)
279
291
  * boost and it will use less memory, especially for indexes which have a
280
292
  * large number of documents.
281
293
  */
282
- static VALUE
283
- frb_fi_omit_norms(VALUE self)
284
- {
294
+ static VALUE frb_fi_omit_norms(VALUE self) {
285
295
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
286
296
  return fi_omit_norms(fi) ? Qtrue : Qfalse;
287
297
  }
@@ -292,9 +302,7 @@ frb_fi_omit_norms(VALUE self)
292
302
  *
293
303
  * Return true if the term-vectors are stored for this field.
294
304
  */
295
- static VALUE
296
- frb_fi_store_term_vector(VALUE self)
297
- {
305
+ static VALUE frb_fi_store_term_vector(VALUE self) {
298
306
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
299
307
  return fi_store_term_vector(fi) ? Qtrue : Qfalse;
300
308
  }
@@ -305,9 +313,7 @@ frb_fi_store_term_vector(VALUE self)
305
313
  *
306
314
  * Return true if positions are stored with the term-vectors for this field.
307
315
  */
308
- static VALUE
309
- frb_fi_store_positions(VALUE self)
310
- {
316
+ static VALUE frb_fi_store_positions(VALUE self) {
311
317
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
312
318
  return fi_store_positions(fi) ? Qtrue : Qfalse;
313
319
  }
@@ -318,9 +324,7 @@ frb_fi_store_positions(VALUE self)
318
324
  *
319
325
  * Return true if offsets are stored with the term-vectors for this field.
320
326
  */
321
- static VALUE
322
- frb_fi_store_offsets(VALUE self)
323
- {
327
+ static VALUE frb_fi_store_offsets(VALUE self) {
324
328
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
325
329
  return fi_store_offsets(fi) ? Qtrue : Qfalse;
326
330
  }
@@ -333,9 +337,7 @@ frb_fi_store_offsets(VALUE self)
333
337
  *
334
338
  * fi.indexed? and not fi.omit_norms?
335
339
  */
336
- static VALUE
337
- frb_fi_has_norms(VALUE self)
338
- {
340
+ static VALUE frb_fi_has_norms(VALUE self) {
339
341
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
340
342
  return fi_has_norms(fi) ? Qtrue : Qfalse;
341
343
  }
@@ -346,9 +348,7 @@ frb_fi_has_norms(VALUE self)
346
348
  *
347
349
  * Return the default boost for this field
348
350
  */
349
- static VALUE
350
- frb_fi_boost(VALUE self)
351
- {
351
+ static VALUE frb_fi_boost(VALUE self) {
352
352
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
353
353
  return rb_float_new((double)fi->boost);
354
354
  }
@@ -359,9 +359,7 @@ frb_fi_boost(VALUE self)
359
359
  *
360
360
  * Return a string representation of the FieldInfo object.
361
361
  */
362
- static VALUE
363
- frb_fi_to_s(VALUE self)
364
- {
362
+ static VALUE frb_fi_to_s(VALUE self) {
365
363
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
366
364
  char *fi_s = frt_fi_to_s(fi);
367
365
  VALUE rfi_s = rb_str_new2(fi_s);
@@ -375,39 +373,47 @@ frb_fi_to_s(VALUE self)
375
373
  *
376
374
  ****************************************************************************/
377
375
 
378
- static void
379
- frb_fis_free(void *p)
380
- {
381
- object_del(p);
376
+ static void frb_fis_free(void *p) {
382
377
  frt_fis_deref((FrtFieldInfos *)p);
383
378
  }
384
379
 
385
- static void
386
- frb_fis_mark(void *p)
387
- {
380
+ static void frb_fis_mark(void *p) {
388
381
  int i;
389
382
  FrtFieldInfos *fis = (FrtFieldInfos *)p;
390
383
 
391
384
  for (i = 0; i < fis->size; i++) {
392
- frb_gc_mark(fis->fields[i]);
385
+ if (fis->fields[i]->rfi)
386
+ rb_gc_mark(fis->fields[i]->rfi);
393
387
  }
394
388
  }
395
389
 
396
- static VALUE
397
- frb_get_field_infos(FrtFieldInfos *fis)
398
- {
390
+ static size_t frb_field_infos_t_size(const void *p) {
391
+ return sizeof(FrtFieldInfos);
392
+ (void)p;
393
+ }
399
394
 
400
- VALUE rfis = Qnil;
395
+ const rb_data_type_t frb_field_infos_t = {
396
+ .wrap_struct_name = "FrbFieldInfos",
397
+ .function = {
398
+ .dmark = frb_fis_mark,
399
+ .dfree = frb_fis_free,
400
+ .dsize = frb_field_infos_t_size,
401
+ .dcompact = NULL,
402
+ .reserved = {0},
403
+ },
404
+ .parent = NULL,
405
+ .data = NULL,
406
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
407
+ };
408
+
409
+ static VALUE frb_get_field_infos(FrtFieldInfos *fis) {
401
410
  if (fis) {
402
- rfis = object_get(fis);
403
- if (rfis == Qnil) {
404
- rfis = Data_Wrap_Struct(cFieldInfos, &frb_fis_mark, &frb_fis_free,
405
- fis);
411
+ if (fis->rfis == 0 || fis->rfis == Qnil) {
412
+ fis->rfis = TypedData_Wrap_Struct(cFieldInfos, &frb_field_infos_t, fis);
406
413
  FRT_REF(fis);
407
- object_add(fis, rfis);
408
414
  }
409
415
  }
410
- return rfis;
416
+ return fis->rfis;
411
417
  }
412
418
 
413
419
  /*
@@ -418,23 +424,28 @@ frb_get_field_infos(FrtFieldInfos *fis)
418
424
  * specified in the +default+ hash parameter. See FieldInfo for available
419
425
  * property values.
420
426
  */
421
- static VALUE
422
- frb_fis_init(int argc, VALUE *argv, VALUE self)
423
- {
427
+
428
+ static VALUE frb_fis_alloc(VALUE rclass) {
429
+ FrtFieldInfos *fis = frt_fis_alloc();
430
+ return TypedData_Wrap_Struct(rclass, &frb_field_infos_t, fis);
431
+ }
432
+
433
+ static VALUE frb_fis_init(int argc, VALUE *argv, VALUE self) {
424
434
  VALUE roptions;
425
435
  FrtFieldInfos *fis;
436
+ TypedData_Get_Struct(self, FrtFieldInfos, &frb_field_infos_t, fis);
426
437
  FrtStoreValue store = FRT_STORE_YES;
438
+ FrtCompressionType compression = FRT_COMPRESSION_NONE;
427
439
  FrtIndexValue index = FRT_INDEX_YES;
428
440
  FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
429
441
  float boost;
430
442
 
431
443
  rb_scan_args(argc, argv, "01", &roptions);
432
444
  if (argc > 0) {
433
- frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
445
+ frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
434
446
  }
435
- fis = frt_fis_new(store, index, term_vector);
436
- Frt_Wrap_Struct(self, &frb_fis_mark, &frb_fis_free, fis);
437
- object_add(fis, self);
447
+ fis = frt_fis_init(fis, store, compression, index, term_vector);
448
+ fis->rfis = self;
438
449
  return self;
439
450
  }
440
451
 
@@ -445,9 +456,7 @@ frb_fis_init(int argc, VALUE *argv, VALUE self)
445
456
  * Return an array of the FieldInfo objects contained but this FieldInfos
446
457
  * object.
447
458
  */
448
- static VALUE
449
- frb_fis_to_a(VALUE self)
450
- {
459
+ static VALUE frb_fis_to_a(VALUE self) {
451
460
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
452
461
  VALUE rary = rb_ary_new();
453
462
  int i;
@@ -470,9 +479,7 @@ frb_fis_to_a(VALUE self)
470
479
  * fi = fis[:name]
471
480
  * fi = fis[2]
472
481
  */
473
- static VALUE
474
- frb_fis_get(VALUE self, VALUE ridx)
475
- {
482
+ static VALUE frb_fis_get(VALUE self, VALUE ridx) {
476
483
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
477
484
  VALUE rfi = Qnil;
478
485
  switch (TYPE(ridx)) {
@@ -511,9 +518,7 @@ frb_fis_get(VALUE self, VALUE ridx)
511
518
  * Add a FieldInfo object. Use the FieldInfos#add_field method where
512
519
  * possible.
513
520
  */
514
- static VALUE
515
- frb_fis_add(VALUE self, VALUE rfi)
516
- {
521
+ static VALUE frb_fis_add(VALUE self, VALUE rfi) {
517
522
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
518
523
  FrtFieldInfo *fi = (FrtFieldInfo *)frb_rb_data_ptr(rfi);
519
524
  frt_fis_add_field(fis, fi);
@@ -534,6 +539,7 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
534
539
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
535
540
  FrtFieldInfo *fi;
536
541
  FrtStoreValue store = fis->store;
542
+ FrtCompressionType compression = fis->compression;
537
543
  FrtIndexValue index = fis->index;
538
544
  FrtTermVectorValue term_vector = fis->term_vector;
539
545
  float boost = 1.0f;
@@ -541,9 +547,9 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
541
547
 
542
548
  rb_scan_args(argc, argv, "11", &rname, &roptions);
543
549
  if (argc > 1) {
544
- frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
550
+ frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
545
551
  }
546
- fi = frt_fi_new(frb_field(rname), store, index, term_vector);
552
+ fi = frt_fi_new(frb_field(rname), store, compression, index, term_vector);
547
553
  fi->boost = boost;
548
554
  frt_fis_add_field(fis, fi);
549
555
  return self;
@@ -670,28 +676,46 @@ frb_fis_get_tk_fields(VALUE self)
670
676
  *
671
677
  ****************************************************************************/
672
678
 
673
- static void
674
- frb_te_free(void *p)
675
- {
679
+ static void frb_te_free(void *p) {
676
680
  FrtTermEnum *te = (FrtTermEnum *)p;
677
681
  te->close(te);
678
682
  }
679
683
 
680
- static VALUE
681
- frb_te_get_set_term(VALUE self, const char *term)
682
- {
684
+ static size_t frb_te_size(const void *p) {
685
+ return sizeof(FrtTermEnum);
686
+ (void)p;
687
+ }
688
+
689
+ const rb_data_type_t frb_term_enum_t = {
690
+ .wrap_struct_name = "FrbTermEnum",
691
+ .function = {
692
+ .dmark = NULL,
693
+ .dfree = frb_te_free,
694
+ .dsize = frb_te_size,
695
+ .dcompact = NULL,
696
+ .reserved = {0},
697
+ },
698
+ .parent = NULL,
699
+ .data = NULL,
700
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
701
+ };
702
+
703
+ static VALUE frb_te_alloc(VALUE rclass) {
704
+ FrtTermEnum *te = FRT_ALLOC_AND_ZERO(FrtTermEnum);
705
+ return TypedData_Wrap_Struct(rclass, &frb_term_enum_t, te);
706
+ }
707
+
708
+ static VALUE frb_te_get_set_term(VALUE self, const char *term) {
683
709
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
684
710
  VALUE str = term ? rb_str_new(term, te->curr_term_len) : Qnil;
685
711
  rb_ivar_set(self, id_term, str);
686
712
  return str;
687
713
  }
688
714
 
689
- static VALUE
690
- frb_get_te(VALUE rir, FrtTermEnum *te)
691
- {
715
+ static VALUE frb_get_te(VALUE rir, FrtTermEnum *te) {
692
716
  VALUE self = Qnil;
693
717
  if (te != NULL) {
694
- self = Data_Wrap_Struct(cTermEnum, NULL, &frb_te_free, te);
718
+ self = TypedData_Wrap_Struct(cTermEnum, &frb_term_enum_t, te);
695
719
  frb_te_get_set_term(self, te->curr_term);
696
720
  rb_ivar_set(self, id_fld_num_map, rb_ivar_get(rir, id_fld_num_map));
697
721
  }
@@ -704,9 +728,7 @@ frb_get_te(VALUE rir, FrtTermEnum *te)
704
728
  *
705
729
  * Returns the next term in the enumeration or nil otherwise.
706
730
  */
707
- static VALUE
708
- frb_te_next(VALUE self)
709
- {
731
+ static VALUE frb_te_next(VALUE self) {
710
732
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
711
733
  return frb_te_get_set_term(self, te->next(te));
712
734
  }
@@ -718,9 +740,7 @@ frb_te_next(VALUE self)
718
740
  * Returns the current term pointed to by the enum. This method should only
719
741
  * be called after a successful call to TermEnum#next.
720
742
  */
721
- static VALUE
722
- frb_te_term(VALUE self)
723
- {
743
+ static VALUE frb_te_term(VALUE self) {
724
744
  return rb_ivar_get(self, id_term);
725
745
  }
726
746
 
@@ -732,9 +752,7 @@ frb_te_term(VALUE self)
732
752
  * That is the number of documents that this term appears in. The method
733
753
  * should only be called after a successful call to TermEnum#next.
734
754
  */
735
- static VALUE
736
- frb_te_doc_freq(VALUE self)
737
- {
755
+ static VALUE frb_te_doc_freq(VALUE self) {
738
756
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
739
757
  return INT2FIX(te->curr_ti.doc_freq);
740
758
  }
@@ -750,9 +768,7 @@ frb_te_doc_freq(VALUE self)
750
768
  *
751
769
  * Returns the first term greater than or equal to +target+
752
770
  */
753
- static VALUE
754
- frb_te_skip_to(VALUE self, VALUE rterm)
755
- {
771
+ static VALUE frb_te_skip_to(VALUE self, VALUE rterm) {
756
772
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
757
773
  return frb_te_get_set_term(self, te->skip_to(te, rs2s(rterm)));
758
774
  }
@@ -764,9 +780,7 @@ frb_te_skip_to(VALUE self, VALUE rterm)
764
780
  * Iterates through all the terms in the field, yielding the term and the
765
781
  * document frequency.
766
782
  */
767
- static VALUE
768
- frb_te_each(VALUE self)
769
- {
783
+ static VALUE frb_te_each(VALUE self) {
770
784
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
771
785
  char *term;
772
786
  int term_cnt = 0;
@@ -798,9 +812,7 @@ frb_te_each(VALUE self)
798
812
  * do_something()
799
813
  * end
800
814
  */
801
- static VALUE
802
- frb_te_set_field(VALUE self, VALUE rfield)
803
- {
815
+ static VALUE frb_te_set_field(VALUE self, VALUE rfield) {
804
816
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
805
817
  int field_num = 0;
806
818
  VALUE rfnum_map = rb_ivar_get(self, id_fld_num_map);
@@ -840,9 +852,7 @@ frb_te_set_field(VALUE self, VALUE rfield)
840
852
  * # ["cantaloupe",12]
841
853
  * # ]
842
854
  */
843
- static VALUE
844
- frb_te_to_json(int argc, VALUE *argv, VALUE self)
845
- {
855
+ static VALUE frb_te_to_json(int argc, VALUE *argv, VALUE self) {
846
856
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
847
857
  VALUE rjson;
848
858
  char *json, *jp;
@@ -867,8 +877,7 @@ frb_te_to_json(int argc, VALUE *argv, VALUE self)
867
877
  *(jp++) = ']';
868
878
  *(jp++) = ',';
869
879
  }
870
- }
871
- else {
880
+ } else {
872
881
  while (NULL != (term = te->next(te))) {
873
882
  /* enough room for for term after converting " to '"' and frequency
874
883
  * plus some extra for good measure */
@@ -904,17 +913,37 @@ frb_te_to_json(int argc, VALUE *argv, VALUE self)
904
913
  *
905
914
  ****************************************************************************/
906
915
 
907
- static void
908
- frb_tde_free(void *p)
909
- {
916
+ static void frb_tde_free(void *p) {
910
917
  FrtTermDocEnum *tde = (FrtTermDocEnum *)p;
911
918
  tde->close(tde);
912
919
  }
913
920
 
914
- static VALUE
915
- frb_get_tde(VALUE rir, FrtTermDocEnum *tde)
916
- {
917
- VALUE self = Data_Wrap_Struct(cTermDocEnum, NULL, &frb_tde_free, tde);
921
+ static size_t frb_tde_size(const void *p) {
922
+ return sizeof(FrtTermDocEnum);
923
+ (void)p;
924
+ }
925
+
926
+ const rb_data_type_t frb_term_doc_enum_t = {
927
+ .wrap_struct_name = "FrbTermDocEnum",
928
+ .function = {
929
+ .dmark = NULL,
930
+ .dfree = frb_tde_free,
931
+ .dsize = frb_tde_size,
932
+ .dcompact = NULL,
933
+ .reserved = {0},
934
+ },
935
+ .parent = NULL,
936
+ .data = NULL,
937
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
938
+ };
939
+
940
+ static VALUE frb_tde_alloc(VALUE rclass) {
941
+ FrtTermDocEnum *tde = FRT_ALLOC_AND_ZERO(FrtTermDocEnum);
942
+ return TypedData_Wrap_Struct(rclass, &frb_term_doc_enum_t, tde);
943
+ }
944
+
945
+ static VALUE frb_get_tde(VALUE rir, FrtTermDocEnum *tde) {
946
+ VALUE self = TypedData_Wrap_Struct(cTermDocEnum, &frb_term_doc_enum_t, tde);
918
947
  rb_ivar_set(self, id_fld_num_map, rb_ivar_get(rir, id_fld_num_map));
919
948
  return self;
920
949
  }
@@ -927,9 +956,7 @@ frb_get_tde(VALUE rir, FrtTermDocEnum *tde)
927
956
  * you can call next or each to skip through the documents and positions of
928
957
  * this particular term.
929
958
  */
930
- static VALUE
931
- frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm)
932
- {
959
+ static VALUE frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm) {
933
960
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
934
961
  char *term;
935
962
  VALUE rfnum_map = rb_ivar_get(self, id_fld_num_map);
@@ -939,8 +966,7 @@ frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm)
939
966
  if (rfnum != Qnil) {
940
967
  field_num = FIX2INT(rfnum);
941
968
  } else {
942
- rb_raise(rb_eArgError, "field %s doesn't exist in the index",
943
- rb_id2name(frb_field(rfield)));
969
+ rb_raise(rb_eArgError, "field %s doesn't exist in the index", rb_id2name(frb_field(rfield)));
944
970
  }
945
971
  tde->seek(tde, field_num, term);
946
972
  return self;
@@ -958,9 +984,7 @@ frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm)
958
984
  * However the +seek_term_enum+ method saves an index lookup so should offer
959
985
  * a large performance improvement.
960
986
  */
961
- static VALUE
962
- frb_tde_seek_te(VALUE self, VALUE rterm_enum)
963
- {
987
+ static VALUE frb_tde_seek_te(VALUE self, VALUE rterm_enum) {
964
988
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
965
989
  FrtTermEnum *te = (FrtTermEnum *)frb_rb_data_ptr(rterm_enum);
966
990
  tde->seek_te(tde, te);
@@ -973,9 +997,7 @@ frb_tde_seek_te(VALUE self, VALUE rterm_enum)
973
997
  *
974
998
  * Returns the current document number pointed to by the +term_doc_enum+.
975
999
  */
976
- static VALUE
977
- frb_tde_doc(VALUE self)
978
- {
1000
+ static VALUE frb_tde_doc(VALUE self) {
979
1001
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
980
1002
  return INT2FIX(tde->doc_num(tde));
981
1003
  }
@@ -987,9 +1009,7 @@ frb_tde_doc(VALUE self)
987
1009
  * Returns the frequency of the current document pointed to by the
988
1010
  * +term_doc_enum+.
989
1011
  */
990
- static VALUE
991
- frb_tde_freq(VALUE self)
992
- {
1012
+ static VALUE frb_tde_freq(VALUE self) {
993
1013
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
994
1014
  return INT2FIX(tde->freq(tde));
995
1015
  }
@@ -1001,9 +1021,7 @@ frb_tde_freq(VALUE self)
1001
1021
  * Move forward to the next document in the enumeration. Returns +true+ if
1002
1022
  * there is another document or +false+ otherwise.
1003
1023
  */
1004
- static VALUE
1005
- frb_tde_next(VALUE self)
1006
- {
1024
+ static VALUE frb_tde_next(VALUE self) {
1007
1025
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1008
1026
  return tde->next(tde) ? Qtrue : Qfalse;
1009
1027
  }
@@ -1015,9 +1033,7 @@ frb_tde_next(VALUE self)
1015
1033
  * Move forward to the next document in the enumeration. Returns +true+ if
1016
1034
  * there is another document or +false+ otherwise.
1017
1035
  */
1018
- static VALUE
1019
- frb_tde_next_position(VALUE self)
1020
- {
1036
+ static VALUE frb_tde_next_position(VALUE self) {
1021
1037
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1022
1038
  int pos;
1023
1039
  if (tde->next_position == NULL) {
@@ -1039,9 +1055,7 @@ frb_tde_next_position(VALUE self)
1039
1055
  * NOTE: this method can only be called once after each seek. If you need to
1040
1056
  * call +#each+ again then you should call +#seek+ again too.
1041
1057
  */
1042
- static VALUE
1043
- frb_tde_each(VALUE self)
1044
- {
1058
+ static VALUE frb_tde_each(VALUE self) {
1045
1059
  int doc_cnt = 0;
1046
1060
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1047
1061
  VALUE vals = rb_ary_new2(2);
@@ -1083,9 +1097,7 @@ frb_tde_each(VALUE self)
1083
1097
  * # [30,3]
1084
1098
  * # ]
1085
1099
  */
1086
- static VALUE
1087
- frb_tde_to_json(int argc, VALUE *argv, VALUE self)
1088
- {
1100
+ static VALUE frb_tde_to_json(int argc, VALUE *argv, VALUE self) {
1089
1101
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1090
1102
  VALUE rjson;
1091
1103
  char *json, *jp;
@@ -1157,9 +1169,7 @@ frb_tde_to_json(int argc, VALUE *argv, VALUE self)
1157
1169
  * puts " #{positions.join(', ')}"
1158
1170
  * end
1159
1171
  */
1160
- static VALUE
1161
- frb_tde_each_position(VALUE self)
1162
- {
1172
+ static VALUE frb_tde_each_position(VALUE self) {
1163
1173
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1164
1174
  int pos;
1165
1175
  if (tde->next_position == NULL) {
@@ -1180,9 +1190,7 @@ frb_tde_each_position(VALUE self)
1180
1190
  * Skip to the required document number +target+ and return true if there is
1181
1191
  * a document >= +target+.
1182
1192
  */
1183
- static VALUE
1184
- frb_tde_skip_to(VALUE self, VALUE rtarget)
1185
- {
1193
+ static VALUE frb_tde_skip_to(VALUE self, VALUE rtarget) {
1186
1194
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1187
1195
  return tde->skip_to(tde, FIX2INT(rtarget)) ? Qtrue : Qfalse;
1188
1196
  }
@@ -1193,9 +1201,7 @@ frb_tde_skip_to(VALUE self, VALUE rtarget)
1193
1201
  *
1194
1202
  ****************************************************************************/
1195
1203
 
1196
- static VALUE
1197
- frb_get_tv_offsets(FrtOffset *offset)
1198
- {
1204
+ static VALUE frb_get_tv_offsets(FrtOffset *offset) {
1199
1205
  return rb_struct_new(cTVOffsets,
1200
1206
  ULL2NUM((frt_u64)offset->start),
1201
1207
  ULL2NUM((frt_u64)offset->end),
@@ -1208,14 +1214,13 @@ frb_get_tv_offsets(FrtOffset *offset)
1208
1214
  *
1209
1215
  ****************************************************************************/
1210
1216
 
1211
- static VALUE
1212
- frb_get_tv_term(FrtTVTerm *tv_term)
1213
- {
1217
+ static VALUE frb_get_tv_term(FrtTVTerm *tv_term) {
1214
1218
  int i;
1215
1219
  const int freq = tv_term->freq;
1216
1220
  VALUE rtext;
1217
1221
  VALUE rpositions = Qnil;
1218
1222
  rtext = rb_str_new2(tv_term->text);
1223
+ rb_enc_associate(rtext, utf8_encoding);
1219
1224
  if (tv_term->positions) {
1220
1225
  int *positions = tv_term->positions;
1221
1226
  rpositions = rb_ary_new2(freq);
@@ -1232,9 +1237,7 @@ frb_get_tv_term(FrtTVTerm *tv_term)
1232
1237
  *
1233
1238
  ****************************************************************************/
1234
1239
 
1235
- static VALUE
1236
- frb_get_tv(FrtTermVector *tv)
1237
- {
1240
+ static VALUE frb_get_tv(FrtTermVector *tv) {
1238
1241
  int i;
1239
1242
  FrtTVTerm *terms = tv->terms;
1240
1243
  const int t_cnt = tv->term_cnt;
@@ -1265,19 +1268,18 @@ frb_get_tv(FrtTermVector *tv)
1265
1268
  *
1266
1269
  ****************************************************************************/
1267
1270
 
1268
- void
1269
- frb_iw_free(void *p)
1270
- {
1271
+ void frb_iw_free(void *p) {
1271
1272
  frt_iw_close((FrtIndexWriter *)p);
1272
1273
  }
1273
1274
 
1274
- void
1275
- frb_iw_mark(void *p)
1276
- {
1275
+ void frb_iw_mark(void *p) {
1277
1276
  FrtIndexWriter *iw = (FrtIndexWriter *)p;
1278
- frb_gc_mark(iw->analyzer);
1279
- frb_gc_mark(iw->store);
1280
- frb_gc_mark(iw->fis);
1277
+ if (iw->analyzer->ranalyzer)
1278
+ rb_gc_mark(iw->analyzer->ranalyzer);
1279
+ if (iw->store->rstore)
1280
+ rb_gc_mark(iw->store->rstore);
1281
+ if (iw->fis->rfis)
1282
+ rb_gc_mark(iw->fis->rfis);
1281
1283
  }
1282
1284
 
1283
1285
  /*
@@ -1288,11 +1290,11 @@ frb_iw_mark(void *p)
1288
1290
  * exclusively by the index writer. The garbage collector will do this
1289
1291
  * automatically if not called explicitly.
1290
1292
  */
1291
- static VALUE
1292
- frb_iw_close(VALUE self)
1293
- {
1293
+ static VALUE frb_iw_close(VALUE self) {
1294
1294
  FrtIndexWriter *iw = (FrtIndexWriter *)DATA_PTR(self);
1295
- Frt_Unwrap_Struct(self);
1295
+ ((struct RData *)(self))->data = NULL;
1296
+ ((struct RData *)(self))->dmark = NULL;
1297
+ ((struct RData *)(self))->dfree = NULL;
1296
1298
  frt_iw_close(iw);
1297
1299
  return Qnil;
1298
1300
  }
@@ -1321,9 +1323,31 @@ frb_iw_close(VALUE self)
1321
1323
  *
1322
1324
  * See FrtIndexWriter for more options.
1323
1325
  */
1324
- static VALUE
1325
- frb_iw_init(int argc, VALUE *argv, VALUE self)
1326
- {
1326
+ static size_t frb_index_writer_t_size(const void *p) {
1327
+ return sizeof(FrtIndexWriter);
1328
+ (void)p;
1329
+ }
1330
+
1331
+ const rb_data_type_t frb_index_writer_t = {
1332
+ .wrap_struct_name = "FrbIndexWriter",
1333
+ .function = {
1334
+ .dmark = frb_iw_mark,
1335
+ .dfree = frb_iw_free,
1336
+ .dsize = frb_index_writer_t_size,
1337
+ .dcompact = NULL,
1338
+ .reserved = {0},
1339
+ },
1340
+ .parent = NULL,
1341
+ .data = NULL,
1342
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
1343
+ };
1344
+
1345
+ static VALUE frb_iw_alloc(VALUE rclass) {
1346
+ FrtIndexWriter *iw = frt_iw_alloc();
1347
+ return TypedData_Wrap_Struct(rclass, &frb_index_writer_t, iw);
1348
+ }
1349
+
1350
+ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
1327
1351
  VALUE roptions, rval;
1328
1352
  bool create = false;
1329
1353
  bool create_if_missing = true;
@@ -1341,7 +1365,7 @@ frb_iw_init(int argc, VALUE *argv, VALUE self)
1341
1365
  Check_Type(roptions, T_HASH);
1342
1366
 
1343
1367
  if ((rval = rb_hash_aref(roptions, sym_dir)) != Qnil) {
1344
- Check_Type(rval, T_DATA);
1368
+ // Check_Type(rval, T_DATA);
1345
1369
  store = DATA_PTR(rval);
1346
1370
  } else if ((rval = rb_hash_aref(roptions, sym_path)) != Qnil) {
1347
1371
  StringValue(rval);
@@ -1349,17 +1373,9 @@ frb_iw_init(int argc, VALUE *argv, VALUE self)
1349
1373
  store = frt_open_fs_store(rs2s(rval));
1350
1374
  FRT_DEREF(store);
1351
1375
  }
1352
-
1353
- /* Let ruby's garbage collector handle the closing of the store
1354
- if (!close_dir) {
1355
- close_dir = RTEST(rb_hash_aref(roptions, sym_close_dir));
1356
- }
1357
- */
1358
1376
  /* use_compound_file defaults to true */
1359
1377
  config.use_compound_file =
1360
- (rb_hash_aref(roptions, sym_use_compound_file) == Qfalse)
1361
- ? false
1362
- : true;
1378
+ (rb_hash_aref(roptions, sym_use_compound_file) == Qfalse) ? false : true;
1363
1379
 
1364
1380
  if ((rval = rb_hash_aref(roptions, sym_analyzer)) != Qnil) {
1365
1381
  analyzer = frb_get_cwrapped_analyzer(rval);
@@ -1379,7 +1395,7 @@ frb_iw_init(int argc, VALUE *argv, VALUE self)
1379
1395
  SET_INT_ATTR(max_field_length);
1380
1396
  }
1381
1397
  if (NULL == store) {
1382
- store = frt_open_ram_store();
1398
+ store = frt_open_ram_store(NULL);
1383
1399
  FRT_DEREF(store);
1384
1400
  }
1385
1401
  if (!create && create_if_missing && !store->exists(store, "segments")) {
@@ -1388,26 +1404,29 @@ frb_iw_init(int argc, VALUE *argv, VALUE self)
1388
1404
  if (create) {
1389
1405
  FrtFieldInfos *fis;
1390
1406
  if ((rval = rb_hash_aref(roptions, sym_field_infos)) != Qnil) {
1391
- Data_Get_Struct(rval, FrtFieldInfos, fis);
1407
+ TypedData_Get_Struct(rval, FrtFieldInfos, &frb_field_infos_t, fis);
1392
1408
  frt_index_create(store, fis);
1393
1409
  } else {
1394
- fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES,
1395
- FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
1410
+ fis = frt_fis_new(FRT_STORE_YES, FRT_COMPRESSION_NONE, FRT_INDEX_YES, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
1396
1411
  frt_index_create(store, fis);
1397
1412
  frt_fis_deref(fis);
1398
1413
  }
1399
1414
  }
1400
1415
 
1401
- iw = frt_iw_open(store, analyzer, &config);
1402
-
1403
- Frt_Wrap_Struct(self, &frb_iw_mark, &frb_iw_free, iw);
1404
- default:
1416
+ TypedData_Get_Struct(self, FrtIndexWriter, &frb_index_writer_t, iw);
1417
+ iw = frt_iw_open(iw, store, analyzer, &config);
1418
+ FRT_XCATCHALL
1405
1419
  ex_code = xcontext.excode;
1406
1420
  msg = xcontext.msg;
1407
1421
  FRT_HANDLED();
1408
1422
  FRT_XENDTRY
1409
1423
 
1410
- if (ex_code && msg) { frb_raise(ex_code, msg); }
1424
+ if (ex_code && msg) {
1425
+ ((struct RData *)(self))->data = NULL;
1426
+ ((struct RData *)(self))->dmark = NULL;
1427
+ ((struct RData *)(self))->dfree = NULL;
1428
+ frb_raise(ex_code, msg);
1429
+ }
1411
1430
 
1412
1431
  if (rb_block_given_p()) {
1413
1432
  rb_yield(self);
@@ -1439,7 +1458,7 @@ frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
1439
1458
  return ST_CONTINUE;
1440
1459
  } else {
1441
1460
  FrtDocument *doc = (FrtDocument *)arg;
1442
- FrtSymbol field = frb_field(key);
1461
+ ID field = frb_field(key);
1443
1462
  VALUE val;
1444
1463
  FrtDocField *df;
1445
1464
  if (NULL == (df = frt_doc_get_field(doc, field))) {
@@ -1455,17 +1474,17 @@ frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
1455
1474
  df->destroy_data = true;
1456
1475
  for (i = 0; i < RARRAY_LEN(value); i++) {
1457
1476
  val = rb_obj_as_string(RARRAY_PTR(value)[i]);
1458
- frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val));
1477
+ frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1459
1478
  }
1460
1479
  }
1461
1480
  break;
1462
1481
  case T_STRING:
1463
- frt_df_add_data_len(df, rs2s(value), RSTRING_LEN(value));
1482
+ frt_df_add_data_len(df, rs2s(value), RSTRING_LEN(value), rb_enc_get(value));
1464
1483
  break;
1465
1484
  default:
1466
1485
  val = rb_obj_as_string(value);
1467
1486
  df->destroy_data = true;
1468
- frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val));
1487
+ frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1469
1488
  break;
1470
1489
  }
1471
1490
  frt_doc_add_field(doc, df);
@@ -1495,25 +1514,23 @@ frb_get_doc(VALUE rdoc)
1495
1514
  df->destroy_data = true;
1496
1515
  for (i = 0; i < RARRAY_LEN(rdoc); i++) {
1497
1516
  val = rb_obj_as_string(RARRAY_PTR(rdoc)[i]);
1498
- frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val));
1517
+ frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1499
1518
  }
1500
1519
  frt_doc_add_field(doc, df);
1501
1520
  }
1502
1521
  break;
1503
1522
  case T_SYMBOL:
1504
1523
  /* TODO: clean up this ugly cast */
1505
- df = frt_df_add_data(frt_df_new(fsym_content), (char *)rb_id2name(SYM2ID(rdoc)));
1524
+ df = frt_df_add_data(frt_df_new(fsym_content), (char *)rb_id2name(SYM2ID(rdoc)), rb_enc_get(rdoc));
1506
1525
  frt_doc_add_field(doc, df);
1507
1526
  break;
1508
1527
  case T_STRING:
1509
- df = frt_df_add_data_len(frt_df_new(fsym_content), rs2s(rdoc),
1510
- RSTRING_LEN(rdoc));
1528
+ df = frt_df_add_data_len(frt_df_new(fsym_content), rs2s(rdoc), RSTRING_LEN(rdoc), rb_enc_get(rdoc));
1511
1529
  frt_doc_add_field(doc, df);
1512
1530
  break;
1513
1531
  default:
1514
1532
  val = rb_obj_as_string(rdoc);
1515
- df = frt_df_add_data_len(frt_df_new(fsym_content), rstrdup(val),
1516
- RSTRING_LEN(val));
1533
+ df = frt_df_add_data_len(frt_df_new(fsym_content), rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1517
1534
  df->destroy_data = true;
1518
1535
  frt_doc_add_field(doc, df);
1519
1536
  break;
@@ -1575,6 +1592,48 @@ frb_iw_commit(VALUE self)
1575
1592
  return self;
1576
1593
  }
1577
1594
 
1595
+ /* index reader intermission */
1596
+ static VALUE frb_ir_close(VALUE self);
1597
+
1598
+ void frb_ir_free(void *p) {
1599
+ frt_ir_close((FrtIndexReader *)p);
1600
+ }
1601
+
1602
+ void frb_ir_mark(void *p) {
1603
+ FrtIndexReader *ir = (FrtIndexReader *)p;
1604
+ FrtMultiReader *mr = (FrtMultiReader *)p;
1605
+
1606
+ if (ir->type == FRT_MULTI_READER) {
1607
+ int i;
1608
+ for (i = 0; i < mr->r_cnt; i++) {
1609
+ if (mr->sub_readers[i]->rir)
1610
+ rb_gc_mark(mr->sub_readers[i]->rir);
1611
+ }
1612
+ } else {
1613
+ if (ir->store && ir->store->rstore)
1614
+ rb_gc_mark(ir->store->rstore);
1615
+ }
1616
+ }
1617
+
1618
+ static size_t frb_index_reader_t_size(const void *p) {
1619
+ return sizeof(FrtMultiReader);
1620
+ (void)p;
1621
+ }
1622
+
1623
+ const rb_data_type_t frb_index_reader_t = {
1624
+ .wrap_struct_name = "FrbIndexReader",
1625
+ .function = {
1626
+ .dmark = frb_ir_mark,
1627
+ .dfree = frb_ir_free,
1628
+ .dsize = frb_index_reader_t_size,
1629
+ .dcompact = NULL,
1630
+ .reserved = {0},
1631
+ },
1632
+ .parent = NULL,
1633
+ .data = NULL,
1634
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
1635
+ };
1636
+
1578
1637
  /*
1579
1638
  * call-seq:
1580
1639
  * iw.add_readers(reader_array) -> iw
@@ -1585,9 +1644,7 @@ frb_iw_commit(VALUE self)
1585
1644
  * machines. Then you can finish by merging all of the indexes into a single
1586
1645
  * index.
1587
1646
  */
1588
- static VALUE
1589
- frb_iw_add_readers(VALUE self, VALUE rreaders)
1590
- {
1647
+ static VALUE frb_iw_add_readers(VALUE self, VALUE rreaders) {
1591
1648
  FrtIndexWriter *iw = (FrtIndexWriter *)DATA_PTR(self);
1592
1649
  int i;
1593
1650
  FrtIndexReader **irs;
@@ -1597,7 +1654,7 @@ frb_iw_add_readers(VALUE self, VALUE rreaders)
1597
1654
  i = RARRAY_LEN(rreaders);
1598
1655
  while (i-- > 0) {
1599
1656
  FrtIndexReader *ir;
1600
- Data_Get_Struct(RARRAY_PTR(rreaders)[i], FrtIndexReader, ir);
1657
+ TypedData_Get_Struct(RARRAY_PTR(rreaders)[i], FrtIndexReader, &frb_index_reader_t, ir);
1601
1658
  irs[i] = ir;
1602
1659
  }
1603
1660
  frt_iw_add_readers(iw, irs, RARRAY_LEN(rreaders));
@@ -1943,26 +2000,50 @@ frb_iw_set_use_compound_file(VALUE self, VALUE rval)
1943
2000
  *
1944
2001
  ****************************************************************************/
1945
2002
 
1946
- static void
1947
- frb_lzd_data_free(void *p)
1948
- {
2003
+ static void frb_lzd_data_free(void *p) {
1949
2004
  frt_lazy_doc_close((FrtLazyDoc *)p);
1950
2005
  }
1951
2006
 
1952
- static VALUE
1953
- frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df)
1954
- {
2007
+ static size_t frb_lazy_doc_size(const void *p) {
2008
+ return sizeof(FrtLazyDoc);
2009
+ (void)p;
2010
+ }
2011
+
2012
+ const rb_data_type_t frb_lazy_doc_t = {
2013
+ .wrap_struct_name = "FrbLazyDoc",
2014
+ .function = {
2015
+ .dmark = NULL,
2016
+ .dfree = frb_lzd_data_free,
2017
+ .dsize = frb_lazy_doc_size,
2018
+ .dcompact = NULL,
2019
+ .reserved = {0},
2020
+ },
2021
+ .parent = NULL,
2022
+ .data = NULL,
2023
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
2024
+ };
2025
+
2026
+ static VALUE frb_lzd_alloc(VALUE klass) {
2027
+ FrtLazyDoc *ld = FRT_ALLOC(FrtLazyDoc);
2028
+ return TypedData_Wrap_Struct(klass, &frb_lazy_doc_t, ld);
2029
+ }
2030
+
2031
+ static VALUE frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
1955
2032
  VALUE rdata = Qnil;
1956
2033
  if (lazy_df) {
1957
2034
  if (lazy_df->size == 1) {
1958
2035
  char *data = frt_lazy_df_get_data(lazy_df, 0);
1959
- rdata = rb_str_new(data, lazy_df->len);
2036
+ rdata = rb_str_new(data, lazy_df->data[0].length);
2037
+ rb_enc_associate(rdata, lazy_df->data[0].encoding);
1960
2038
  } else {
1961
2039
  int i;
2040
+ VALUE rstr;
1962
2041
  rdata = rb_ary_new2(lazy_df->size);
1963
2042
  for (i = 0; i < lazy_df->size; i++) {
1964
2043
  char *data = frt_lazy_df_get_data(lazy_df, i);
1965
- rb_ary_store(rdata, i, rb_str_new(data, lazy_df->data[i].length));
2044
+ rstr = rb_str_new(data, lazy_df->data[i].length);
2045
+ rb_enc_associate(rstr, lazy_df->data[i].encoding);
2046
+ rb_ary_store(rdata, i, rstr);
1966
2047
  }
1967
2048
  }
1968
2049
  rb_hash_aset(self, rkey, rdata);
@@ -1977,11 +2058,9 @@ frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df)
1977
2058
  * This method is used internally to lazily load fields. You should never
1978
2059
  * really need to call it yourself.
1979
2060
  */
1980
- static VALUE
1981
- frb_lzd_default(VALUE self, VALUE rkey)
1982
- {
2061
+ static VALUE frb_lzd_default(VALUE self, VALUE rkey) {
1983
2062
  FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
1984
- FrtSymbol field = frb_field(rkey);
2063
+ ID field = frb_field(rkey);
1985
2064
  VALUE rfield = ID2SYM(field);
1986
2065
 
1987
2066
  return frb_lazy_df_load(self, rfield, frt_lazy_doc_get(lazy_doc, field));
@@ -1995,9 +2074,7 @@ frb_lzd_default(VALUE self, VALUE rkey)
1995
2074
  * to access any of these fields in the document the field will be loaded.
1996
2075
  * Try to access any other field an nil will be returned.
1997
2076
  */
1998
- static VALUE
1999
- frb_lzd_fields(VALUE self)
2000
- {
2077
+ static VALUE frb_lzd_fields(VALUE self) {
2001
2078
  return rb_ivar_get(self, id_fields);
2002
2079
  }
2003
2080
 
@@ -2007,9 +2084,7 @@ frb_lzd_fields(VALUE self)
2007
2084
  *
2008
2085
  * Load all unloaded fields in the document from the index.
2009
2086
  */
2010
- static VALUE
2011
- frb_lzd_load(VALUE self)
2012
- {
2087
+ static VALUE frb_lzd_load(VALUE self) {
2013
2088
  FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
2014
2089
  int i;
2015
2090
  for (i = 0; i < lazy_doc->size; i++) {
@@ -2019,9 +2094,7 @@ frb_lzd_load(VALUE self)
2019
2094
  return self;
2020
2095
  }
2021
2096
 
2022
- VALUE
2023
- frb_get_lazy_doc(FrtLazyDoc *lazy_doc)
2024
- {
2097
+ VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc) {
2025
2098
  int i;
2026
2099
  VALUE rfields = rb_ary_new2(lazy_doc->size);
2027
2100
 
@@ -2029,7 +2102,7 @@ frb_get_lazy_doc(FrtLazyDoc *lazy_doc)
2029
2102
  self = rb_hash_new();
2030
2103
  OBJSETUP(self, cLazyDoc, T_HASH);
2031
2104
 
2032
- rdata = Data_Wrap_Struct(cLazyDocData, NULL, &frb_lzd_data_free, lazy_doc);
2105
+ rdata = TypedData_Wrap_Struct(cLazyDocData, &frb_lazy_doc_t, lazy_doc);
2033
2106
  rb_ivar_set(self, id_data, rdata);
2034
2107
 
2035
2108
  for (i = 0; i < lazy_doc->size; i++) {
@@ -2046,32 +2119,6 @@ frb_get_lazy_doc(FrtLazyDoc *lazy_doc)
2046
2119
  *
2047
2120
  ****************************************************************************/
2048
2121
 
2049
- void
2050
- frb_ir_free(void *p)
2051
- {
2052
- object_del(p);
2053
- frt_ir_close((FrtIndexReader *)p);
2054
- }
2055
-
2056
- void
2057
- frb_ir_mark(void *p)
2058
- {
2059
- FrtIndexReader *ir = (FrtIndexReader *)p;
2060
- frb_gc_mark(ir->store);
2061
- }
2062
-
2063
- static VALUE frb_ir_close(VALUE self);
2064
-
2065
- void
2066
- frb_mr_mark(void *p)
2067
- {
2068
- FrtMultiReader *mr = (FrtMultiReader *)p;
2069
- int i;
2070
- for (i = 0; i < mr->r_cnt; i++) {
2071
- frb_gc_mark(mr->sub_readers[i]);
2072
- }
2073
- }
2074
-
2075
2122
  /*
2076
2123
  * call-seq:
2077
2124
  * IndexReader.new(dir) -> index_reader
@@ -2098,9 +2145,15 @@ frb_mr_mark(void *p)
2098
2145
  *
2099
2146
  * iw = IndexReader.new(["/path/to/index1", "/path/to/index2"])
2100
2147
  */
2101
- static VALUE
2102
- frb_ir_init(VALUE self, VALUE rdir)
2103
- {
2148
+
2149
+ static VALUE frb_ir_alloc(VALUE rclass) {
2150
+ // allocate for FrtSegmentReader, the largest of the Frt*Reader structs,
2151
+ // FrtIndexReader is part of it and later on its determined what its going to be
2152
+ FrtIndexReader *ir = (FrtIndexReader *)frt_sr_alloc();
2153
+ return TypedData_Wrap_Struct(rclass, &frb_index_reader_t, ir);
2154
+ }
2155
+
2156
+ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
2104
2157
  FrtStore *store = NULL;
2105
2158
  FrtIndexReader *ir;
2106
2159
  int i;
@@ -2120,7 +2173,7 @@ frb_ir_init(VALUE self, VALUE rdir)
2120
2173
  switch (TYPE(rdir)) {
2121
2174
  case T_DATA:
2122
2175
  if (CLASS_OF(rdir) == cIndexReader) {
2123
- Data_Get_Struct(rdir, FrtIndexReader, sub_readers[i]);
2176
+ TypedData_Get_Struct(rdir, FrtIndexReader, &frb_index_reader_t, sub_readers[i]);
2124
2177
  FRT_REF(sub_readers[i]);
2125
2178
  continue;
2126
2179
  } else if (RTEST(rb_obj_is_kind_of(rdir, cDirectory))) {
@@ -2145,10 +2198,10 @@ frb_ir_init(VALUE self, VALUE rdir)
2145
2198
  rs2s(rb_obj_as_string(rdir)));
2146
2199
  break;
2147
2200
  }
2148
- sub_readers[i] = frt_ir_open(store);
2201
+ sub_readers[i] = frt_ir_open(NULL, store);
2149
2202
  }
2150
- ir = frt_mr_open(sub_readers, reader_cnt);
2151
- Frt_Wrap_Struct(self, &frb_mr_mark, &frb_ir_free, ir);
2203
+ TypedData_Get_Struct(self, FrtIndexReader, &frb_index_reader_t, ir);
2204
+ ir = frt_mr_open(ir, sub_readers, reader_cnt);
2152
2205
  } else {
2153
2206
  switch (TYPE(rdir)) {
2154
2207
  case T_DATA:
@@ -2165,25 +2218,28 @@ frb_ir_init(VALUE self, VALUE rdir)
2165
2218
  rs2s(rb_obj_as_string(rdir)));
2166
2219
  break;
2167
2220
  }
2168
- ir = frt_ir_open(store);
2169
- Frt_Wrap_Struct(self, &frb_ir_mark, &frb_ir_free, ir);
2221
+ TypedData_Get_Struct(self, FrtIndexReader, &frb_index_reader_t, ir);
2222
+ ir = frt_ir_open(ir, store);
2170
2223
  }
2171
- default:
2224
+ FRT_XCATCHALL
2172
2225
  ex_code = xcontext.excode;
2173
2226
  msg = xcontext.msg;
2174
2227
  FRT_HANDLED();
2175
2228
  FRT_XENDTRY
2176
2229
 
2177
- if (ex_code && msg) { frb_raise(ex_code, msg); }
2230
+ if (ex_code && msg) {
2231
+ ((struct RData *)(self))->data = NULL;
2232
+ ((struct RData *)(self))->dmark = NULL;
2233
+ ((struct RData *)(self))->dfree = NULL;
2234
+ frb_raise(ex_code, msg);
2235
+ }
2178
2236
 
2179
- object_add(ir, self);
2237
+ ir->rir = self;
2180
2238
 
2181
2239
  fis = ir->fis;
2182
2240
  for (i = 0; i < fis->size; i++) {
2183
2241
  FrtFieldInfo *fi = fis->fields[i];
2184
- rb_hash_aset(rfield_num_map,
2185
- ID2SYM(fi->name),
2186
- INT2FIX(fi->number));
2242
+ rb_hash_aset(rfield_num_map, ID2SYM(fi->name), INT2FIX(fi->number));
2187
2243
  }
2188
2244
  rb_ivar_set(self, id_fld_num_map, rfield_num_map);
2189
2245
 
@@ -2281,8 +2337,9 @@ static VALUE
2281
2337
  frb_ir_close(VALUE self)
2282
2338
  {
2283
2339
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2284
- object_del(ir);
2285
- Frt_Unwrap_Struct(self);
2340
+ ((struct RData *)(self))->data = NULL;
2341
+ ((struct RData *)(self))->dmark = NULL;
2342
+ ((struct RData *)(self))->dfree = NULL;
2286
2343
  frt_ir_close(ir);
2287
2344
  return self;
2288
2345
  }
@@ -2482,7 +2539,7 @@ frb_ir_term_vector(VALUE self, VALUE rdoc_id, VALUE rfield)
2482
2539
  static void
2483
2540
  frb_add_each_tv(void *key, void *value, void *rtvs)
2484
2541
  {
2485
- rb_hash_aset((VALUE)rtvs, ID2SYM((FrtSymbol)key), frb_get_tv(value));
2542
+ rb_hash_aset((VALUE)rtvs, ID2SYM((ID)key), frb_get_tv(value));
2486
2543
  }
2487
2544
 
2488
2545
  /*
@@ -2769,9 +2826,14 @@ frb_ir_version(VALUE self)
2769
2826
  * | | want to highlight matches.
2770
2827
  * | | or print match excerpts a la
2771
2828
  * | | Google search.
2829
+ * -------------|-------------------------|------------------------------
2830
+ * :compression | :no (default) | Don't compress stored field
2831
+ * | |
2832
+ * | :brotli | Compress field using Brotli
2772
2833
  * | |
2773
- * | :compressed | Store field in compressed
2774
- * | | format.
2834
+ * | :bz2 | Compress field using BZip2
2835
+ * | |
2836
+ * | :lz4 | Compress field using LZ4
2775
2837
  * -------------|-------------------------|------------------------------
2776
2838
  * :index | :no | Do not make this field
2777
2839
  * | | searchable.
@@ -2831,7 +2893,7 @@ frb_ir_version(VALUE self)
2831
2893
  * fi = FieldInfo.new(:created_on, :index => :untokenized_omit_norms,
2832
2894
  * :term_vector => :no)
2833
2895
  *
2834
- * fi = FieldInfo.new(:image, :store => :compressed, :index => :no,
2896
+ * fi = FieldInfo.new(:image, :store => :yes, :compression => :brotli, :index => :no,
2835
2897
  * :term_vector => :no)
2836
2898
  */
2837
2899
  static void
@@ -2841,8 +2903,11 @@ Init_FieldInfo(void)
2841
2903
  sym_index = ID2SYM(rb_intern("index"));
2842
2904
  sym_term_vector = ID2SYM(rb_intern("term_vector"));
2843
2905
 
2844
- sym_compress = ID2SYM(rb_intern("compress"));
2845
- sym_compressed = ID2SYM(rb_intern("compressed"));
2906
+ sym_brotli = ID2SYM(rb_intern("brotli"));
2907
+ sym_bz2 = ID2SYM(rb_intern("bz2"));
2908
+ sym_lz4 = ID2SYM(rb_intern("lz4"));
2909
+ // sym_level = ID2SYM(rb_intern("level"));
2910
+ sym_compression = ID2SYM(rb_intern("compression"));
2846
2911
 
2847
2912
  sym_untokenized = ID2SYM(rb_intern("untokenized"));
2848
2913
  sym_omit_norms = ID2SYM(rb_intern("omit_norms"));
@@ -2853,7 +2918,7 @@ Init_FieldInfo(void)
2853
2918
  sym_with_positions_offsets = ID2SYM(rb_intern("with_positions_offsets"));
2854
2919
 
2855
2920
  cFieldInfo = rb_define_class_under(mIndex, "FieldInfo", rb_cObject);
2856
- rb_define_alloc_func(cFieldInfo, frb_data_alloc);
2921
+ rb_define_alloc_func(cFieldInfo, frb_fi_alloc);
2857
2922
 
2858
2923
  rb_define_method(cFieldInfo, "initialize", frb_fi_init, -1);
2859
2924
  rb_define_method(cFieldInfo, "name", frb_fi_name, 0);
@@ -2897,7 +2962,7 @@ Init_FieldInfo(void)
2897
2962
  * field_infos.add_field(:created_on, :index => :untokenized_omit_norms,
2898
2963
  * :term_vector => :no)
2899
2964
  *
2900
- * field_infos.add_field(:image, :store => :compressed, :index => :no,
2965
+ * field_infos.add_field(:image, :store => :yes, :compression => :brotli, :index => :no,
2901
2966
  * :term_vector => :no)
2902
2967
  *
2903
2968
  * field_infos.create_index("/path/to/index")
@@ -2913,13 +2978,11 @@ Init_FieldInfo(void)
2913
2978
  * along. If you add a document to the index which has fields that the index
2914
2979
  * doesn't know about then the default properties are used for the new field.
2915
2980
  */
2916
- static void
2917
- Init_FieldInfos(void)
2918
- {
2981
+ static void Init_FieldInfos(void) {
2919
2982
  Init_FieldInfo();
2920
2983
 
2921
2984
  cFieldInfos = rb_define_class_under(mIndex, "FieldInfos", rb_cObject);
2922
- rb_define_alloc_func(cFieldInfos, frb_data_alloc);
2985
+ rb_define_alloc_func(cFieldInfos, frb_fis_alloc);
2923
2986
 
2924
2987
  rb_define_method(cFieldInfos, "initialize", frb_fis_init, -1);
2925
2988
  rb_define_method(cFieldInfos, "to_a", frb_fis_to_a, 0);
@@ -2958,12 +3021,11 @@ Init_FieldInfos(void)
2958
3021
  * end
2959
3022
  */
2960
3023
  static void
2961
- Init_TermEnum(void)
2962
- {
3024
+ Init_TermEnum(void) {
2963
3025
  id_term = rb_intern("@term");
2964
3026
 
2965
3027
  cTermEnum = rb_define_class_under(mIndex, "TermEnum", rb_cObject);
2966
- rb_define_alloc_func(cTermEnum, frb_data_alloc);
3028
+ rb_define_alloc_func(cTermEnum, frb_te_alloc);
2967
3029
 
2968
3030
  rb_define_method(cTermEnum, "next?", frb_te_next, 0);
2969
3031
  rb_define_method(cTermEnum, "term", frb_te_term, 0);
@@ -3007,14 +3069,12 @@ Init_TermEnum(void)
3007
3069
  * puts " #{positions.join(', ')}"
3008
3070
  * end
3009
3071
  */
3010
- static void
3011
- Init_TermDocEnum(void)
3012
- {
3072
+ static void Init_TermDocEnum(void) {
3013
3073
  id_fld_num_map = rb_intern("@field_num_map");
3014
3074
  id_field_num = rb_intern("@field_num");
3015
3075
 
3016
3076
  cTermDocEnum = rb_define_class_under(mIndex, "TermDocEnum", rb_cObject);
3017
- rb_define_alloc_func(cTermDocEnum, frb_data_alloc);
3077
+ rb_define_alloc_func(cTermDocEnum, frb_tde_alloc);
3018
3078
  rb_define_method(cTermDocEnum, "seek", frb_tde_seek, 2);
3019
3079
  rb_define_method(cTermDocEnum, "seek_term_enum", frb_tde_seek_te, 1);
3020
3080
  rb_define_method(cTermDocEnum, "doc", frb_tde_doc, 0);
@@ -3027,10 +3087,6 @@ Init_TermDocEnum(void)
3027
3087
  rb_define_method(cTermDocEnum, "to_json", frb_tde_to_json, -1);
3028
3088
  }
3029
3089
 
3030
- /* rdochack
3031
- cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
3032
- */
3033
-
3034
3090
  /*
3035
3091
  * Document-class: Ferret::Index::TermVector::TVOffsets
3036
3092
  *
@@ -3047,13 +3103,8 @@ cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
3047
3103
  *
3048
3104
  * See the Analysis module for more information on setting the offsets.
3049
3105
  */
3050
- static void
3051
- Init_TVOffsets(void)
3052
- {
3106
+ static void Init_TVOffsets(void) {
3053
3107
  const char *tv_offsets_class = "TVOffsets";
3054
- /* rdochack
3055
- cTVOffsets = rb_define_class_under(cTermVector, "TVOffsets", rb_cObject);
3056
- */
3057
3108
  cTVOffsets = rb_struct_define(tv_offsets_class, "start", "end", NULL);
3058
3109
  rb_set_class_path(cTVOffsets, cTermVector, tv_offsets_class);
3059
3110
  rb_const_set(mIndex, rb_intern(tv_offsets_class), cTVOffsets);
@@ -3074,13 +3125,8 @@ Init_TVOffsets(void)
3074
3125
  * tv_term = tv.find {|tvt| tvt.term = "fox"}
3075
3126
  * offsets = tv_term.positions.collect {|pos| tv.offsets[pos]}
3076
3127
  */
3077
- static void
3078
- Init_TVTerm(void)
3079
- {
3128
+ static void Init_TVTerm(void) {
3080
3129
  const char *tv_term_class = "TVTerm";
3081
- /* rdochack
3082
- cTVTerm = rb_define_class_under(cTermVector, "TVTerm", rb_cObject);
3083
- */
3084
3130
  cTVTerm = rb_struct_define(tv_term_class, "text", "freq", "positions", NULL);
3085
3131
  rb_set_class_path(cTVTerm, cTermVector, tv_term_class);
3086
3132
  rb_const_set(mIndex, rb_intern(tv_term_class), cTVTerm);
@@ -3116,15 +3162,9 @@ Init_TVTerm(void)
3116
3162
  * particular that you need to store both positions and offsets if you want
3117
3163
  * to associate offsets with particular terms.
3118
3164
  */
3119
- static void
3120
- Init_TermVector(void)
3121
- {
3165
+ static void Init_TermVector(void) {
3122
3166
  const char *tv_class = "TermVector";
3123
- /* rdochack
3124
- cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
3125
- */
3126
- cTermVector = rb_struct_define(tv_class,
3127
- "field", "terms", "offsets", NULL);
3167
+ cTermVector = rb_struct_define(tv_class, "field", "terms", "offsets", NULL);
3128
3168
  rb_set_class_path(cTermVector, mIndex, tv_class);
3129
3169
  rb_const_set(mIndex, rb_intern(tv_class), cTermVector);
3130
3170
 
@@ -3245,112 +3285,80 @@ Init_TermVector(void)
3245
3285
  *
3246
3286
  * index_writer.delete(:id, "/path/to/indexed/file")
3247
3287
  */
3248
- void
3249
- Init_IndexWriter(void)
3250
- {
3288
+ void Init_IndexWriter(void) {
3251
3289
  id_boost = rb_intern("boost");
3252
3290
 
3253
- sym_create = ID2SYM(rb_intern("create"));
3254
- sym_create_if_missing = ID2SYM(rb_intern("create_if_missing"));
3255
- sym_field_infos = ID2SYM(rb_intern("field_infos"));
3291
+ sym_create = ID2SYM(rb_intern("create"));
3292
+ sym_create_if_missing = ID2SYM(rb_intern("create_if_missing"));
3293
+ sym_field_infos = ID2SYM(rb_intern("field_infos"));
3256
3294
 
3257
- sym_chunk_size = ID2SYM(rb_intern("chunk_size"));
3258
- sym_max_buffer_memory = ID2SYM(rb_intern("max_buffer_memory"));
3259
- sym_index_interval = ID2SYM(rb_intern("term_index_interval"));
3260
- sym_skip_interval = ID2SYM(rb_intern("doc_skip_interval"));
3261
- sym_merge_factor = ID2SYM(rb_intern("merge_factor"));
3262
- sym_max_buffered_docs = ID2SYM(rb_intern("max_buffered_docs"));
3263
- sym_max_merge_docs = ID2SYM(rb_intern("max_merge_docs"));
3264
- sym_max_field_length = ID2SYM(rb_intern("max_field_length"));
3265
- sym_use_compound_file = ID2SYM(rb_intern("use_compound_file"));
3295
+ sym_chunk_size = ID2SYM(rb_intern("chunk_size"));
3296
+ sym_max_buffer_memory = ID2SYM(rb_intern("max_buffer_memory"));
3297
+ sym_index_interval = ID2SYM(rb_intern("term_index_interval"));
3298
+ sym_skip_interval = ID2SYM(rb_intern("doc_skip_interval"));
3299
+ sym_merge_factor = ID2SYM(rb_intern("merge_factor"));
3300
+ sym_max_buffered_docs = ID2SYM(rb_intern("max_buffered_docs"));
3301
+ sym_max_merge_docs = ID2SYM(rb_intern("max_merge_docs"));
3302
+ sym_max_field_length = ID2SYM(rb_intern("max_field_length"));
3303
+ sym_use_compound_file = ID2SYM(rb_intern("use_compound_file"));
3266
3304
 
3267
3305
  cIndexWriter = rb_define_class_under(mIndex, "IndexWriter", rb_cObject);
3268
- rb_define_alloc_func(cIndexWriter, frb_data_alloc);
3306
+ rb_define_alloc_func(cIndexWriter, frb_iw_alloc);
3269
3307
 
3270
3308
  rb_define_const(cIndexWriter, "WRITE_LOCK_TIMEOUT", INT2FIX(1));
3271
3309
  rb_define_const(cIndexWriter, "COMMIT_LOCK_TIMEOUT", INT2FIX(10));
3272
- rb_define_const(cIndexWriter, "WRITE_LOCK_NAME",
3273
- rb_str_new2(FRT_WRITE_LOCK_NAME));
3274
- rb_define_const(cIndexWriter, "COMMIT_LOCK_NAME",
3275
- rb_str_new2(FRT_COMMIT_LOCK_NAME));
3276
- rb_define_const(cIndexWriter, "DEFAULT_CHUNK_SIZE",
3277
- INT2FIX(frt_default_config.chunk_size));
3278
- rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFER_MEMORY",
3279
- INT2FIX(frt_default_config.max_buffer_memory));
3280
- rb_define_const(cIndexWriter, "DEFAULT_TERM_INDEX_INTERVAL",
3281
- INT2FIX(frt_default_config.index_interval));
3282
- rb_define_const(cIndexWriter, "DEFAULT_DOC_SKIP_INTERVAL",
3283
- INT2FIX(frt_default_config.skip_interval));
3284
- rb_define_const(cIndexWriter, "DEFAULT_MERGE_FACTOR",
3285
- INT2FIX(frt_default_config.merge_factor));
3286
- rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFERED_DOCS",
3287
- INT2FIX(frt_default_config.max_buffered_docs));
3288
- rb_define_const(cIndexWriter, "DEFAULT_MAX_MERGE_DOCS",
3289
- INT2FIX(frt_default_config.max_merge_docs));
3290
- rb_define_const(cIndexWriter, "DEFAULT_MAX_FIELD_LENGTH",
3291
- INT2FIX(frt_default_config.max_field_length));
3292
- rb_define_const(cIndexWriter, "DEFAULT_USE_COMPOUND_FILE",
3293
- frt_default_config.use_compound_file ? Qtrue : Qfalse);
3294
-
3295
- rb_define_method(cIndexWriter, "initialize", frb_iw_init, -1);
3296
- rb_define_method(cIndexWriter, "doc_count", frb_iw_get_doc_count, 0);
3297
- rb_define_method(cIndexWriter, "close", frb_iw_close, 0);
3298
- rb_define_method(cIndexWriter, "add_document", frb_iw_add_doc, 1);
3299
- rb_define_method(cIndexWriter, "<<", frb_iw_add_doc, 1);
3300
- rb_define_method(cIndexWriter, "optimize", frb_iw_optimize, 0);
3301
- rb_define_method(cIndexWriter, "commit", frb_iw_commit, 0);
3302
- rb_define_method(cIndexWriter, "add_readers", frb_iw_add_readers, 1);
3303
- rb_define_method(cIndexWriter, "delete", frb_iw_delete, 2);
3304
- rb_define_method(cIndexWriter, "field_infos", frb_iw_field_infos, 0);
3305
- rb_define_method(cIndexWriter, "analyzer", frb_iw_get_analyzer, 0);
3306
- rb_define_method(cIndexWriter, "analyzer=", frb_iw_set_analyzer, 1);
3307
- rb_define_method(cIndexWriter, "version", frb_iw_version, 0);
3308
-
3309
- rb_define_method(cIndexWriter, "chunk_size",
3310
- frb_iw_get_chunk_size, 0);
3311
- rb_define_method(cIndexWriter, "chunk_size=",
3312
- frb_iw_set_chunk_size, 1);
3313
-
3314
- rb_define_method(cIndexWriter, "max_buffer_memory",
3315
- frb_iw_get_max_buffer_memory, 0);
3316
- rb_define_method(cIndexWriter, "max_buffer_memory=",
3317
- frb_iw_set_max_buffer_memory, 1);
3318
-
3319
- rb_define_method(cIndexWriter, "term_index_interval",
3320
- frb_iw_get_index_interval, 0);
3321
- rb_define_method(cIndexWriter, "term_index_interval=",
3322
- frb_iw_set_index_interval, 1);
3323
-
3324
- rb_define_method(cIndexWriter, "doc_skip_interval",
3325
- frb_iw_get_skip_interval, 0);
3326
- rb_define_method(cIndexWriter, "doc_skip_interval=",
3327
- frb_iw_set_skip_interval, 1);
3328
-
3329
- rb_define_method(cIndexWriter, "merge_factor",
3330
- frb_iw_get_merge_factor, 0);
3331
- rb_define_method(cIndexWriter, "merge_factor=",
3332
- frb_iw_set_merge_factor, 1);
3333
-
3334
- rb_define_method(cIndexWriter, "max_buffered_docs",
3335
- frb_iw_get_max_buffered_docs, 0);
3336
- rb_define_method(cIndexWriter, "max_buffered_docs=",
3337
- frb_iw_set_max_buffered_docs, 1);
3338
-
3339
- rb_define_method(cIndexWriter, "max_merge_docs",
3340
- frb_iw_get_max_merge_docs, 0);
3341
- rb_define_method(cIndexWriter, "max_merge_docs=",
3342
- frb_iw_set_max_merge_docs, 1);
3343
-
3344
- rb_define_method(cIndexWriter, "max_field_length",
3345
- frb_iw_get_max_field_length, 0);
3346
- rb_define_method(cIndexWriter, "max_field_length=",
3347
- frb_iw_set_max_field_length, 1);
3348
-
3349
- rb_define_method(cIndexWriter, "use_compound_file",
3350
- frb_iw_get_use_compound_file, 0);
3351
- rb_define_method(cIndexWriter, "use_compound_file=",
3352
- frb_iw_set_use_compound_file, 1);
3310
+ rb_define_const(cIndexWriter, "WRITE_LOCK_NAME", rb_str_new2(FRT_WRITE_LOCK_NAME));
3311
+ rb_define_const(cIndexWriter, "COMMIT_LOCK_NAME", rb_str_new2(FRT_COMMIT_LOCK_NAME));
3312
+ rb_define_const(cIndexWriter, "DEFAULT_CHUNK_SIZE", INT2FIX(frt_default_config.chunk_size));
3313
+ rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFER_MEMORY", INT2FIX(frt_default_config.max_buffer_memory));
3314
+ rb_define_const(cIndexWriter, "DEFAULT_TERM_INDEX_INTERVAL", INT2FIX(frt_default_config.index_interval));
3315
+ rb_define_const(cIndexWriter, "DEFAULT_DOC_SKIP_INTERVAL", INT2FIX(frt_default_config.skip_interval));
3316
+ rb_define_const(cIndexWriter, "DEFAULT_MERGE_FACTOR", INT2FIX(frt_default_config.merge_factor));
3317
+ rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFERED_DOCS", INT2FIX(frt_default_config.max_buffered_docs));
3318
+ rb_define_const(cIndexWriter, "DEFAULT_MAX_MERGE_DOCS", INT2FIX(frt_default_config.max_merge_docs));
3319
+ rb_define_const(cIndexWriter, "DEFAULT_MAX_FIELD_LENGTH", INT2FIX(frt_default_config.max_field_length));
3320
+ rb_define_const(cIndexWriter, "DEFAULT_USE_COMPOUND_FILE", frt_default_config.use_compound_file ? Qtrue : Qfalse);
3321
+
3322
+ rb_define_method(cIndexWriter, "initialize", frb_iw_init, -1);
3323
+ rb_define_method(cIndexWriter, "doc_count", frb_iw_get_doc_count, 0);
3324
+ rb_define_method(cIndexWriter, "close", frb_iw_close, 0);
3325
+ rb_define_method(cIndexWriter, "add_document", frb_iw_add_doc, 1);
3326
+ rb_define_method(cIndexWriter, "<<", frb_iw_add_doc, 1);
3327
+ rb_define_method(cIndexWriter, "optimize", frb_iw_optimize, 0);
3328
+ rb_define_method(cIndexWriter, "commit", frb_iw_commit, 0);
3329
+ rb_define_method(cIndexWriter, "add_readers", frb_iw_add_readers, 1);
3330
+ rb_define_method(cIndexWriter, "delete", frb_iw_delete, 2);
3331
+ rb_define_method(cIndexWriter, "field_infos", frb_iw_field_infos, 0);
3332
+ rb_define_method(cIndexWriter, "analyzer", frb_iw_get_analyzer, 0);
3333
+ rb_define_method(cIndexWriter, "analyzer=", frb_iw_set_analyzer, 1);
3334
+ rb_define_method(cIndexWriter, "version", frb_iw_version, 0);
3335
+
3336
+ rb_define_method(cIndexWriter, "chunk_size", frb_iw_get_chunk_size, 0);
3337
+ rb_define_method(cIndexWriter, "chunk_size=", frb_iw_set_chunk_size, 1);
3338
+
3339
+ rb_define_method(cIndexWriter, "max_buffer_memory", frb_iw_get_max_buffer_memory, 0);
3340
+ rb_define_method(cIndexWriter, "max_buffer_memory=", frb_iw_set_max_buffer_memory, 1);
3341
+
3342
+ rb_define_method(cIndexWriter, "term_index_interval", frb_iw_get_index_interval, 0);
3343
+ rb_define_method(cIndexWriter, "term_index_interval=", frb_iw_set_index_interval, 1);
3344
+
3345
+ rb_define_method(cIndexWriter, "doc_skip_interval", frb_iw_get_skip_interval, 0);
3346
+ rb_define_method(cIndexWriter, "doc_skip_interval=", frb_iw_set_skip_interval, 1);
3353
3347
 
3348
+ rb_define_method(cIndexWriter, "merge_factor", frb_iw_get_merge_factor, 0);
3349
+ rb_define_method(cIndexWriter, "merge_factor=", frb_iw_set_merge_factor, 1);
3350
+
3351
+ rb_define_method(cIndexWriter, "max_buffered_docs", frb_iw_get_max_buffered_docs, 0);
3352
+ rb_define_method(cIndexWriter, "max_buffered_docs=", frb_iw_set_max_buffered_docs, 1);
3353
+
3354
+ rb_define_method(cIndexWriter, "max_merge_docs", frb_iw_get_max_merge_docs, 0);
3355
+ rb_define_method(cIndexWriter, "max_merge_docs=", frb_iw_set_max_merge_docs, 1);
3356
+
3357
+ rb_define_method(cIndexWriter, "max_field_length", frb_iw_get_max_field_length, 0);
3358
+ rb_define_method(cIndexWriter, "max_field_length=", frb_iw_set_max_field_length, 1);
3359
+
3360
+ rb_define_method(cIndexWriter, "use_compound_file", frb_iw_get_use_compound_file, 0);
3361
+ rb_define_method(cIndexWriter, "use_compound_file=", frb_iw_set_use_compound_file, 1);
3354
3362
  }
3355
3363
 
3356
3364
  /*
@@ -3383,18 +3391,16 @@ Init_IndexWriter(void)
3383
3391
  * doc.values #=> ["the title", "the content"]
3384
3392
  * doc.fields #=> [:title, :content]
3385
3393
  */
3386
- void
3387
- Init_LazyDoc(void)
3388
- {
3394
+ void Init_LazyDoc(void) {
3389
3395
  id_fields = rb_intern("@fields");
3390
3396
 
3391
3397
  cLazyDoc = rb_define_class_under(mIndex, "LazyDoc", rb_cHash);
3392
- rb_define_method(cLazyDoc, "default", frb_lzd_default, 1);
3393
- rb_define_method(cLazyDoc, "load", frb_lzd_load, 0);
3394
- rb_define_method(cLazyDoc, "fields", frb_lzd_fields, 0);
3398
+ rb_define_method(cLazyDoc, "default", frb_lzd_default, 1);
3399
+ rb_define_method(cLazyDoc, "load", frb_lzd_load, 0);
3400
+ rb_define_method(cLazyDoc, "fields", frb_lzd_fields, 0);
3395
3401
 
3396
3402
  cLazyDocData = rb_define_class_under(cLazyDoc, "LazyDocData", rb_cObject);
3397
- rb_define_alloc_func(cLazyDocData, frb_data_alloc);
3403
+ rb_define_alloc_func(cLazyDocData, frb_lzd_alloc);
3398
3404
  }
3399
3405
 
3400
3406
  /*
@@ -3407,41 +3413,39 @@ Init_LazyDoc(void)
3407
3413
  * index, accessing term-vectors or deleting documents by document id. It is
3408
3414
  * also used internally by IndexSearcher.
3409
3415
  */
3410
- void
3411
- Init_IndexReader(void)
3412
- {
3416
+ void Init_IndexReader(void) {
3413
3417
  cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
3414
- rb_define_alloc_func(cIndexReader, frb_data_alloc);
3415
- rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
3416
- rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
3417
- rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
3418
- rb_define_method(cIndexReader, "get_norms_into",frb_ir_get_norms_into, 3);
3419
- rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
3420
- rb_define_method(cIndexReader, "close", frb_ir_close, 0);
3421
- rb_define_method(cIndexReader, "has_deletions?",frb_ir_has_deletions, 0);
3422
- rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
3423
- rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
3424
- rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
3425
- rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
3426
- rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
3427
- rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
3428
- rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
3429
- rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
3430
- rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
3431
- rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
3432
- rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
3433
- rb_define_method(cIndexReader, "term_positions",frb_ir_term_positions, 0);
3434
- rb_define_method(cIndexReader, "term_docs_for", frb_ir_term_docs_for, 2);
3418
+ rb_define_alloc_func(cIndexReader, frb_ir_alloc);
3419
+ rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
3420
+ rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
3421
+ rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
3422
+ rb_define_method(cIndexReader, "get_norms_into", frb_ir_get_norms_into, 3);
3423
+ rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
3424
+ rb_define_method(cIndexReader, "close", frb_ir_close, 0);
3425
+ rb_define_method(cIndexReader, "has_deletions?", frb_ir_has_deletions, 0);
3426
+ rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
3427
+ rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
3428
+ rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
3429
+ rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
3430
+ rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
3431
+ rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
3432
+ rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
3433
+ rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
3434
+ rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
3435
+ rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
3436
+ rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
3437
+ rb_define_method(cIndexReader, "term_positions", frb_ir_term_positions, 0);
3438
+ rb_define_method(cIndexReader, "term_docs_for", frb_ir_term_docs_for, 2);
3435
3439
  rb_define_method(cIndexReader, "term_positions_for", frb_ir_t_pos_for, 2);
3436
- rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
3437
- rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
3438
- rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
3439
- rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
3440
- rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
3441
- rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
3442
- rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
3440
+ rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
3441
+ rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
3442
+ rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
3443
+ rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
3444
+ rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
3445
+ rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
3446
+ rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
3443
3447
  rb_define_method(cIndexReader, "tokenized_fields", frb_ir_tk_fields, 0);
3444
- rb_define_method(cIndexReader, "version", frb_ir_version, 0);
3448
+ rb_define_method(cIndexReader, "version", frb_ir_version, 0);
3445
3449
  }
3446
3450
 
3447
3451
  /* rdoc hack
@@ -3466,9 +3470,7 @@ extern VALUE mFerret = rb_define_module("Ferret");
3466
3470
  * building tag clouds, creating more-like-this queries, custom highlighting
3467
3471
  * etc. They are also useful for index browsers.
3468
3472
  */
3469
- void
3470
- Init_Index(void)
3471
- {
3473
+ void Init_Index(void) {
3472
3474
  mIndex = rb_define_module_under(mFerret, "Index");
3473
3475
 
3474
3476
  sym_boost = ID2SYM(rb_intern("boost"));