isomorfeus-ferret 0.12.7 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (164) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +54 -1
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
  9. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
  10. data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
  11. data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
  12. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  13. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  14. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  15. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  16. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  17. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  18. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  19. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  20. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  21. data/ext/isomorfeus_ferret_ext/frb_index.c +492 -474
  22. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  23. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  24. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  25. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  26. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  27. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  28. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  29. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  30. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  31. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  32. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  33. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
  34. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  42. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  43. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  44. data/ext/isomorfeus_ferret_ext/frt_index.c +580 -399
  45. data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
  46. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  47. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  48. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
  49. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  50. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  51. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  52. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  53. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  54. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  55. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  56. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  57. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  58. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  59. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  60. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  61. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
  62. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  63. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  64. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  65. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  66. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  67. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  68. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  69. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  70. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
  71. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  72. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  73. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  74. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  75. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  76. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  77. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  78. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  79. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  80. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  81. data/ext/isomorfeus_ferret_ext/test.c +1 -2
  82. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  83. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  84. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  85. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  86. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  87. data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
  88. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  89. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  90. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  91. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  92. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  93. data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
  94. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  95. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  96. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  97. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  98. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  99. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  100. data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
  101. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  102. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  103. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  104. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  105. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  106. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  107. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  108. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  109. data/lib/isomorfeus/ferret/version.rb +1 -1
  110. metadata +27 -57
  111. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  112. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  113. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  114. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  115. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  116. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  117. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  118. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  119. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  120. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  144. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  145. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  146. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  147. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  148. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  149. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  150. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  151. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  152. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  153. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  154. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  155. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  156. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  157. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  158. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  159. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  160. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  161. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  162. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  163. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  164. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -5,8 +5,7 @@
5
5
 
6
6
  #define FRT_BV_INIT_CAPA 256
7
7
 
8
- typedef struct FrtBitVector
9
- {
8
+ typedef struct FrtBitVector {
10
9
  /** The bits are held in an array of 32-bit integers */
11
10
  frt_u32 *bits;
12
11
 
@@ -26,6 +25,7 @@ typedef struct FrtBitVector
26
25
 
27
26
  bool extends_as_ones : 1;
28
27
  int ref_cnt;
28
+ VALUE rbv;
29
29
  } FrtBitVector;
30
30
 
31
31
  /**
@@ -74,9 +74,7 @@ extern void frt_bv_destroy(FrtBitVector *bv);
74
74
  * cause any bugs in this code but could cause problems if users are relying
75
75
  * on the fact that size is accurate.
76
76
  */
77
- static FRT_ATTR_ALWAYS_INLINE
78
- void frt_bv_set_value(FrtBitVector *bv, int bit, bool value)
79
- {
77
+ static FRT_ATTR_ALWAYS_INLINE void frt_bv_set_value(FrtBitVector *bv, int bit, bool value) {
80
78
  frt_u32 *word_p;
81
79
  int word = bit >> 5;
82
80
  frt_u32 bitmask = 1 << (bit & 31);
@@ -120,9 +118,7 @@ void frt_bv_set_value(FrtBitVector *bv, int bit, bool value)
120
118
  * @param bv the FrtBitVector to set the bit in
121
119
  * @param index the index of the bit to set
122
120
  */
123
- static FRT_ATTR_ALWAYS_INLINE
124
- void frt_bv_set(FrtBitVector *bv, int bit)
125
- {
121
+ static FRT_ATTR_ALWAYS_INLINE void frt_bv_set(FrtBitVector *bv, int bit) {
126
122
  frt_bv_set_value(bv, bit, 1);
127
123
  }
128
124
 
@@ -153,9 +149,7 @@ void frt_bv_set(FrtBitVector *bv, int bit)
153
149
  * @param bv the FrtBitVector to set the bit in
154
150
  * @param index the index of the bit to set
155
151
  */
156
- static FRT_ATTR_ALWAYS_INLINE
157
- void frt_bv_set_fast(FrtBitVector *bv, int bit)
158
- {
152
+ static FRT_ATTR_ALWAYS_INLINE void frt_bv_set_fast(FrtBitVector *bv, int bit) {
159
153
  bv->count++;
160
154
  bv->size = bit + 1;
161
155
  bv->bits[bit >> 5] |= (1 << (bit & 31));
@@ -170,9 +164,7 @@ void frt_bv_set_fast(FrtBitVector *bv, int bit)
170
164
  * @param index the index of the bit to check
171
165
  * @return 1 if the bit was set, 0 otherwise
172
166
  */
173
- static FRT_ATTR_ALWAYS_INLINE
174
- int frt_bv_get(FrtBitVector *bv, int bit)
175
- {
167
+ static FRT_ATTR_ALWAYS_INLINE int frt_bv_get(FrtBitVector *bv, int bit) {
176
168
  /* out of range so return 0 because it can't have been set */
177
169
  if (unlikely(bit >= bv->size)) {
178
170
  return bv->extends_as_ones;
@@ -188,9 +180,7 @@ int frt_bv_get(FrtBitVector *bv, int bit)
188
180
  * @param bv the FrtBitVector to unset the bit in
189
181
  * @param index the index of the bit to unset
190
182
  */
191
- static FRT_ATTR_ALWAYS_INLINE
192
- void frt_bv_unset(FrtBitVector *bv, int bit)
193
- {
183
+ static FRT_ATTR_ALWAYS_INLINE void frt_bv_unset(FrtBitVector *bv, int bit) {
194
184
  frt_bv_set_value(bv, bit, 0);
195
185
  }
196
186
 
@@ -213,9 +203,7 @@ extern void frt_bv_clear(FrtBitVector *bv);
213
203
  * @return the number of set bits in the FrtBitVector. FrtBitVector.count is also
214
204
  * set
215
205
  */
216
- static FRT_ATTR_ALWAYS_INLINE
217
- int frt_bv_recount(FrtBitVector *bv)
218
- {
206
+ static FRT_ATTR_ALWAYS_INLINE int frt_bv_recount(FrtBitVector *bv) {
219
207
  unsigned int extra = ((bv->size & 31) >> 3) + 1;
220
208
  unsigned int len = bv->size >> 5;
221
209
  unsigned int idx, count = 0;
@@ -230,8 +218,7 @@ int frt_bv_recount(FrtBitVector *bv)
230
218
  case 2: count += frt_count_zeros(bv->bits[idx] | 0xffff00ff);
231
219
  case 1: count += frt_count_zeros(bv->bits[idx] | 0xffffff00);
232
220
  }
233
- }
234
- else {
221
+ } else {
235
222
  for (idx = 0; idx < len; ++idx) {
236
223
  count += frt_count_ones(bv->bits[idx]);
237
224
  }
@@ -263,9 +250,7 @@ extern void frt_bv_scan_reset(FrtBitVector *bv);
263
250
  * @param bv the FrtBitVector to scan
264
251
  * @return the next set bit's index or -1 if no more bits are set
265
252
  */
266
- static FRT_ATTR_ALWAYS_INLINE
267
- int frt_bv_scan_next_from(FrtBitVector *bv, const int bit)
268
- {
253
+ static FRT_ATTR_ALWAYS_INLINE int frt_bv_scan_next_from(FrtBitVector *bv, const int bit) {
269
254
  frt_u32 pos = bit >> 5;
270
255
  frt_u32 word = bv->bits[pos];
271
256
 
@@ -276,8 +261,7 @@ int frt_bv_scan_next_from(FrtBitVector *bv, const int bit)
276
261
  word &= (frt_u32)~0 << (bit & 31);
277
262
  if (word) {
278
263
  goto done;
279
- }
280
- else {
264
+ } else {
281
265
  frt_u32 word_size = FRT_TO_WORD(bv->size);
282
266
  for (pos++; pos < word_size; ++pos)
283
267
  {
@@ -286,7 +270,7 @@ int frt_bv_scan_next_from(FrtBitVector *bv, const int bit)
286
270
  }
287
271
  }
288
272
  return -1;
289
- done:
273
+ done:
290
274
  return bv->curr_bit = (pos << 5) + frt_count_trailing_zeros(word);
291
275
  }
292
276
 
@@ -313,9 +297,7 @@ int frt_bv_scan_next(FrtBitVector *bv)
313
297
  * @param bv the FrtBitVector to scan
314
298
  * @return the next unset bit's index or -1 if no more bits are unset
315
299
  */
316
- static FRT_ATTR_ALWAYS_INLINE
317
- int frt_bv_scan_next_unset_from(FrtBitVector *bv, const int bit)
318
- {
300
+ static FRT_ATTR_ALWAYS_INLINE int frt_bv_scan_next_unset_from(FrtBitVector *bv, const int bit) {
319
301
  frt_u32 pos = bit >> 5;
320
302
  frt_u32 word = bv->bits[pos];
321
303
 
@@ -326,8 +308,7 @@ int frt_bv_scan_next_unset_from(FrtBitVector *bv, const int bit)
326
308
  word |= (1 << (bit & 31)) - 1;
327
309
  if (~word) {
328
310
  goto done;
329
- }
330
- else {
311
+ } else {
331
312
  frt_u32 word_size = FRT_TO_WORD(bv->size);
332
313
  for (pos++; pos < word_size; ++pos)
333
314
  {
@@ -336,7 +317,7 @@ int frt_bv_scan_next_unset_from(FrtBitVector *bv, const int bit)
336
317
  }
337
318
  }
338
319
  return -1;
339
- done:
320
+ done:
340
321
  return bv->curr_bit = (pos << 5) + frt_count_trailing_ones(word);
341
322
  }
342
323
 
@@ -372,12 +353,9 @@ extern int frt_bv_eq(FrtBitVector *bv1, FrtBitVector *bv2);
372
353
  */
373
354
  extern unsigned long long frt_bv_hash(FrtBitVector *bv);
374
355
 
375
- static FRT_ATTR_ALWAYS_INLINE
376
- void frt_bv_capa(FrtBitVector *bv, int capa, int size)
377
- {
356
+ static FRT_ATTR_ALWAYS_INLINE void frt_bv_capa(FrtBitVector *bv, int capa, int size) {
378
357
  int word_size = FRT_TO_WORD(size);
379
- if (bv->capa < capa)
380
- {
358
+ if (bv->capa < capa) {
381
359
  FRT_REALLOC_N(bv->bits, frt_u32, capa);
382
360
  bv->capa = capa;
383
361
  memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
@@ -408,11 +386,11 @@ void frt_bv_capa(FrtBitVector *bv, int capa, int size)
408
386
  int i; \
409
387
  int a_wsz = FRT_TO_WORD(a->size); \
410
388
  int b_wsz = FRT_TO_WORD(b->size); \
411
- int max_size = FRT_MAX(a->size, b->size); \
412
- int min_size = FRT_MIN(a->size, b->size); \
389
+ int max_size = FRT_MAX(a->size, b->size); \
390
+ int min_size = FRT_MIN(a->size, b->size); \
413
391
  int max_word_size = FRT_TO_WORD(max_size); \
414
392
  int min_word_size = FRT_TO_WORD(min_size); \
415
- int capa = FRT_MAX(frt_round2(max_word_size), 4); \
393
+ int capa = FRT_MAX(frt_round2(max_word_size), 4); \
416
394
  \
417
395
  bv->extends_as_ones = (a->extends_as_ones op b->extends_as_ones); \
418
396
  frt_bv_capa(bv, capa, max_size); \
@@ -432,33 +410,22 @@ void frt_bv_capa(FrtBitVector *bv, int capa, int size)
432
410
  frt_bv_recount(bv); \
433
411
  } while(0)
434
412
 
435
- static FRT_ATTR_ALWAYS_INLINE
436
- FrtBitVector *frt_bv_and_i(FrtBitVector *bv,
437
- FrtBitVector *a, FrtBitVector *b)
438
- {
413
+ static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_and_i(FrtBitVector *bv, FrtBitVector *a, FrtBitVector *b) {
439
414
  FRT_BV_OP(bv, a, b, &, frt_bv_and_ext);
440
415
  return bv;
441
416
  }
442
417
 
443
- static FRT_ATTR_ALWAYS_INLINE
444
- FrtBitVector *frt_bv_or_i(FrtBitVector *bv,
445
- FrtBitVector *a, FrtBitVector *b)
446
- {
418
+ static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_or_i(FrtBitVector *bv, FrtBitVector *a, FrtBitVector *b) {
447
419
  FRT_BV_OP(bv, a, b, |, frt_bv_or_ext);
448
420
  return bv;
449
421
  }
450
422
 
451
- static FRT_ATTR_ALWAYS_INLINE
452
- FrtBitVector *frt_bv_xor_i(FrtBitVector *bv,
453
- FrtBitVector *a, FrtBitVector *b)
454
- {
423
+ static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_xor_i(FrtBitVector *bv, FrtBitVector *a, FrtBitVector *b) {
455
424
  FRT_BV_OP(bv, a, b, ^, frt_bv_xor_ext);
456
425
  return bv;
457
426
  }
458
427
 
459
- static FRT_ATTR_ALWAYS_INLINE
460
- FrtBitVector *frt_bv_not_i(FrtBitVector *bv, FrtBitVector *bv1)
461
- {
428
+ static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_not_i(FrtBitVector *bv, FrtBitVector *bv1) {
462
429
  int i;
463
430
  int word_size = FRT_TO_WORD(bv1->size);
464
431
  int capa = FRT_MAX(frt_round2(word_size), 4);
@@ -484,9 +451,7 @@ FrtBitVector *frt_bv_not_i(FrtBitVector *bv, FrtBitVector *bv1)
484
451
  * @param bv2 second FrtBitVector to AND
485
452
  * @return A FrtBitVector with all bits set that are set in both bv1 and bv2
486
453
  */
487
- static FRT_ATTR_ALWAYS_INLINE
488
- FrtBitVector *frt_bv_and(FrtBitVector *bv1, FrtBitVector *bv2)
489
- {
454
+ static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_and(FrtBitVector *bv1, FrtBitVector *bv2) {
490
455
  return frt_bv_and_i(frt_bv_new(), bv1, bv2);
491
456
  }
492
457
 
@@ -498,9 +463,7 @@ FrtBitVector *frt_bv_and(FrtBitVector *bv1, FrtBitVector *bv2)
498
463
  * @param bv2 second FrtBitVector to OR
499
464
  * @return A FrtBitVector with all bits set that are set in both bv1 and bv2
500
465
  */
501
- static FRT_ATTR_ALWAYS_INLINE
502
- FrtBitVector *frt_bv_or(FrtBitVector *bv1, FrtBitVector *bv2)
503
- {
466
+ static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_or(FrtBitVector *bv1, FrtBitVector *bv2) {
504
467
  return frt_bv_or_i(frt_bv_new(), bv1, bv2);
505
468
  }
506
469
 
@@ -513,9 +476,7 @@ FrtBitVector *frt_bv_or(FrtBitVector *bv1, FrtBitVector *bv2)
513
476
  * @param bv2 second FrtBitVector to XOR
514
477
  * @return A FrtBitVector with all bits set that are equal in bv1 and bv2
515
478
  */
516
- static FRT_ATTR_ALWAYS_INLINE
517
- FrtBitVector *frt_bv_xor(FrtBitVector *bv1, FrtBitVector *bv2)
518
- {
479
+ static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_xor(FrtBitVector *bv1, FrtBitVector *bv2) {
519
480
  return frt_bv_xor_i(frt_bv_new(), bv1, bv2);
520
481
  }
521
482
 
@@ -525,9 +486,7 @@ FrtBitVector *frt_bv_xor(FrtBitVector *bv1, FrtBitVector *bv2)
525
486
  * @param bv FrtBitVector to flip
526
487
  * @return A FrtBitVector with all bits set that are set in both bv1 and bv2
527
488
  */
528
- static FRT_ATTR_ALWAYS_INLINE
529
- FrtBitVector *frt_bv_not(FrtBitVector *bv)
530
- {
489
+ static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_not(FrtBitVector *bv) {
531
490
  return frt_bv_not_i(frt_bv_new(), bv);
532
491
  }
533
492
 
@@ -539,9 +498,7 @@ FrtBitVector *frt_bv_not(FrtBitVector *bv)
539
498
  * @return A FrtBitVector
540
499
  * @return bv1 with all bits set that where set in both bv1 and bv2
541
500
  */
542
- static FRT_ATTR_ALWAYS_INLINE
543
- FrtBitVector *frt_bv_and_x(FrtBitVector *bv1, FrtBitVector *bv2)
544
- {
501
+ static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_and_x(FrtBitVector *bv1, FrtBitVector *bv2) {
545
502
  return frt_bv_and_i(bv1, bv1, bv2);
546
503
  }
547
504
 
@@ -552,9 +509,7 @@ FrtBitVector *frt_bv_and_x(FrtBitVector *bv1, FrtBitVector *bv2)
552
509
  * @param bv2 second FrtBitVector to OR
553
510
  * @return bv1
554
511
  */
555
- static FRT_ATTR_ALWAYS_INLINE
556
- FrtBitVector *frt_bv_or_x(FrtBitVector *bv1, FrtBitVector *bv2)
557
- {
512
+ static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_or_x(FrtBitVector *bv1, FrtBitVector *bv2) {
558
513
  return frt_bv_or_i(bv1, bv1, bv2);
559
514
  }
560
515
 
@@ -565,9 +520,7 @@ FrtBitVector *frt_bv_or_x(FrtBitVector *bv1, FrtBitVector *bv2)
565
520
  * @param bv2 second FrtBitVector to XOR
566
521
  * @return bv1
567
522
  */
568
- static FRT_ATTR_ALWAYS_INLINE
569
- FrtBitVector *frt_bv_xor_x(FrtBitVector *bv1, FrtBitVector *bv2)
570
- {
523
+ static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_xor_x(FrtBitVector *bv1, FrtBitVector *bv2) {
571
524
  return frt_bv_xor_i(bv1, bv1, bv2);
572
525
  }
573
526
 
@@ -577,9 +530,7 @@ FrtBitVector *frt_bv_xor_x(FrtBitVector *bv1, FrtBitVector *bv2)
577
530
  * @param bv FrtBitVector to flip
578
531
  * @return A +bv+ with all it's bits flipped
579
532
  */
580
- static FRT_ATTR_ALWAYS_INLINE
581
- FrtBitVector *frt_bv_not_x(FrtBitVector *bv)
582
- {
533
+ static FRT_ATTR_ALWAYS_INLINE FrtBitVector *frt_bv_not_x(FrtBitVector *bv) {
583
534
  return frt_bv_not_i(bv, bv);
584
535
  }
585
536
 
@@ -7,39 +7,38 @@
7
7
  *
8
8
  ****************************************************************************/
9
9
 
10
- FrtDocField *frt_df_new(FrtSymbol name)
11
- {
10
+ FrtDocField *frt_df_new(ID name) {
12
11
  FrtDocField *df = FRT_ALLOC(FrtDocField);
13
12
  df->name = name;
14
13
  df->size = 0;
15
14
  df->capa = FRT_DF_INIT_CAPA;
16
15
  df->data = FRT_ALLOC_N(char *, df->capa);
17
16
  df->lengths = FRT_ALLOC_N(int, df->capa);
17
+ df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
18
18
  df->destroy_data = false;
19
19
  df->boost = 1.0f;
20
20
  return df;
21
21
  }
22
22
 
23
- FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len)
24
- {
23
+ FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len, rb_encoding *encoding) {
25
24
  if (df->size >= df->capa) {
26
25
  df->capa <<= 2;
27
26
  FRT_REALLOC_N(df->data, char *, df->capa);
28
27
  FRT_REALLOC_N(df->lengths, int, df->capa);
28
+ FRT_REALLOC_N(df->encodings, rb_encoding *, df->capa);
29
29
  }
30
30
  df->data[df->size] = data;
31
31
  df->lengths[df->size] = len;
32
+ df->encodings[df->size] = encoding;
32
33
  df->size++;
33
34
  return df;
34
35
  }
35
36
 
36
- FrtDocField *frt_df_add_data(FrtDocField *df, char *data)
37
- {
38
- return frt_df_add_data_len(df, data, strlen(data));
37
+ FrtDocField *frt_df_add_data(FrtDocField *df, char *data, rb_encoding *encoding) {
38
+ return frt_df_add_data_len(df, data, strlen(data), encoding);
39
39
  }
40
40
 
41
- void frt_df_destroy(FrtDocField *df)
42
- {
41
+ void frt_df_destroy(FrtDocField *df) {
43
42
  if (df->destroy_data) {
44
43
  int i;
45
44
  for (i = 0; i < df->size; i++) {
@@ -48,15 +47,16 @@ void frt_df_destroy(FrtDocField *df)
48
47
  }
49
48
  free(df->data);
50
49
  free(df->lengths);
50
+ free(df->encodings);
51
51
  free(df);
52
52
  }
53
53
 
54
54
  /*
55
55
  * Format for one item is: name: "data"
56
56
  * for more items : name: ["data", "data", "data"]
57
+ * internally used for testing, thus encoding can be ignored
57
58
  */
58
- char *frt_df_to_s(FrtDocField *df)
59
- {
59
+ char *frt_df_to_s(FrtDocField *df) {
60
60
  const char *df_name = rb_id2name(df->name);
61
61
  int i, len = 0, namelen = strlen(df_name);
62
62
  char *str, *s;
@@ -94,8 +94,7 @@ char *frt_df_to_s(FrtDocField *df)
94
94
  *
95
95
  ****************************************************************************/
96
96
 
97
- FrtDocument *frt_doc_new()
98
- {
97
+ FrtDocument *frt_doc_new(void) {
99
98
  FrtDocument *doc = FRT_ALLOC(FrtDocument);
100
99
  doc->field_dict = frt_h_new_ptr((frt_free_ft)&frt_df_destroy);
101
100
  doc->size = 0;
@@ -105,8 +104,7 @@ FrtDocument *frt_doc_new()
105
104
  return doc;
106
105
  }
107
106
 
108
- FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df)
109
- {
107
+ FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df) {
110
108
  if (!frt_h_set_safe(doc->field_dict, (void *)df->name, df)) {
111
109
  FRT_RAISE(FRT_EXCEPTION, "tried to add %s field which alread existed\n",
112
110
  rb_id2name(df->name));
@@ -120,15 +118,12 @@ FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df)
120
118
  return df;
121
119
  }
122
120
 
123
- FrtDocField *frt_doc_get_field(FrtDocument *doc, FrtSymbol name)
124
- {
121
+ FrtDocField *frt_doc_get_field(FrtDocument *doc, ID name) {
125
122
  return (FrtDocField *)frt_h_get(doc->field_dict, (void *)name);
126
123
  }
127
124
 
128
- void frt_doc_destroy(FrtDocument *doc)
129
- {
125
+ void frt_doc_destroy(FrtDocument *doc) {
130
126
  frt_h_destroy(doc->field_dict);
131
127
  free(doc->fields);
132
128
  free(doc);
133
129
  }
134
-
@@ -3,6 +3,7 @@
3
3
 
4
4
  #include "frt_global.h"
5
5
  #include "frt_hash.h"
6
+ #include <ruby/encoding.h>
6
7
 
7
8
  /****************************************************************************
8
9
  *
@@ -11,21 +12,21 @@
11
12
  ****************************************************************************/
12
13
 
13
14
  #define FRT_DF_INIT_CAPA 1
14
- typedef struct FrtDocField
15
- {
16
- FrtSymbol name;
15
+ typedef struct FrtDocField {
16
+ ID name;
17
17
  int size;
18
18
  int capa;
19
19
  int *lengths;
20
+ rb_encoding **encodings; /* used for processing */
20
21
  char **data;
21
22
  float boost;
23
+ FrtCompressionType compression;
22
24
  bool destroy_data : 1;
23
- bool is_compressed : 1;
24
25
  } FrtDocField;
25
26
 
26
- extern FrtDocField *frt_df_new(FrtSymbol name);
27
- extern FrtDocField *frt_df_add_data(FrtDocField *df, char *data);
28
- extern FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len);
27
+ extern FrtDocField *frt_df_new(ID name);
28
+ extern FrtDocField *frt_df_add_data(FrtDocField *df, char *data, rb_encoding *encoding);
29
+ extern FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len, rb_encoding *encoding);
29
30
  extern void frt_df_destroy(FrtDocField *df);
30
31
  extern char *frt_df_to_s(FrtDocField *df);
31
32
 
@@ -36,8 +37,7 @@ extern char *frt_df_to_s(FrtDocField *df);
36
37
  ****************************************************************************/
37
38
 
38
39
  #define FRT_DOC_INIT_CAPA 8
39
- typedef struct FrtDocument
40
- {
40
+ typedef struct FrtDocument {
41
41
  FrtHash *field_dict;
42
42
  int size;
43
43
  int capa;
@@ -47,7 +47,7 @@ typedef struct FrtDocument
47
47
 
48
48
  extern FrtDocument *frt_doc_new();
49
49
  extern FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df);
50
- extern FrtDocField *frt_doc_get_field(FrtDocument *doc, FrtSymbol name);
50
+ extern FrtDocField *frt_doc_get_field(FrtDocument *doc, ID name);
51
51
  extern void frt_doc_destroy(FrtDocument *doc);
52
52
 
53
53
  #endif
@@ -32,13 +32,11 @@ char frt_xmsg_buffer_final[FRT_XMSG_BUFFER_FINAL_SIZE];
32
32
  static frt_thread_key_t exception_stack_key;
33
33
  static frt_thread_once_t exception_stack_key_once = FRT_THREAD_ONCE_INIT;
34
34
 
35
- static void exception_stack_alloc(void)
36
- {
35
+ static void exception_stack_alloc(void) {
37
36
  frt_thread_key_create(&exception_stack_key, NULL);
38
37
  }
39
38
 
40
- void frt_xpush_context(frt_xcontext_t *context)
41
- {
39
+ void frt_xpush_context(frt_xcontext_t *context) {
42
40
  frt_xcontext_t *top_context;
43
41
  frt_thread_once(&exception_stack_key_once, *exception_stack_alloc);
44
42
  top_context = (frt_xcontext_t *)frt_thread_getspecific(exception_stack_key);
@@ -48,18 +46,14 @@ void frt_xpush_context(frt_xcontext_t *context)
48
46
  context->in_finally = false;
49
47
  }
50
48
 
51
- static void frt_xraise_context(frt_xcontext_t *context,
52
- volatile int excode,
53
- const char *const msg)
54
- {
49
+ static void frt_xraise_context(frt_xcontext_t *context, volatile int excode, const char *const msg) {
55
50
  context->msg = msg;
56
51
  context->excode = excode;
57
52
  context->handled = false;
58
53
  longjmp(context->jbuf, excode);
59
54
  }
60
55
 
61
- void frt_xraise(int excode, const char *const msg)
62
- {
56
+ void frt_xraise(int excode, const char *const msg) {
63
57
  frt_xcontext_t *top_context;
64
58
  frt_thread_once(&exception_stack_key_once, *exception_stack_alloc);
65
59
  top_context = (frt_xcontext_t *)frt_thread_getspecific(exception_stack_key);
@@ -77,8 +71,7 @@ void frt_xraise(int excode, const char *const msg)
77
71
  }
78
72
  }
79
73
 
80
- void frt_xpop_context()
81
- {
74
+ void frt_xpop_context(void) {
82
75
  frt_xcontext_t *top_cxt, *context;
83
76
  frt_thread_once(&exception_stack_key_once, *exception_stack_alloc);
84
77
  top_cxt = (frt_xcontext_t *)frt_thread_getspecific(exception_stack_key);
@@ -1,6 +1,8 @@
1
1
  #include <string.h>
2
2
  #include "frt_field_index.h"
3
3
 
4
+ #undef close
5
+
4
6
  /***************************************************************************
5
7
  *
6
8
  * FrtFieldIndex
@@ -30,9 +32,7 @@ static void field_index_destroy(void *p)
30
32
  free(self);
31
33
  }
32
34
 
33
- FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, FrtSymbol field,
34
- const FrtFieldIndexClass *klass)
35
- {
35
+ FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, ID field, const FrtFieldIndexClass *klass) {
36
36
  int length = 0;
37
37
  FrtTermEnum *volatile te = NULL;
38
38
  FrtTermDocEnum *volatile tde = NULL;
@@ -10,11 +10,11 @@
10
10
  ***************************************************************************/
11
11
 
12
12
  typedef struct FrtStringIndex {
13
- int size;
13
+ int size;
14
14
  long *index;
15
15
  char **values;
16
- int v_size;
17
- int v_capa;
16
+ int v_size;
17
+ int v_capa;
18
18
  } FrtStringIndex;
19
19
 
20
20
  typedef struct FrtFieldIndexClass FrtFieldIndexClass;
@@ -26,9 +26,9 @@ struct FrtFieldIndexClass {
26
26
  };
27
27
 
28
28
  typedef struct FrtFieldIndex {
29
- FrtSymbol field;
29
+ ID field;
30
30
  const FrtFieldIndexClass *klass;
31
- void *index;
31
+ void *index;
32
32
  } FrtFieldIndex;
33
33
 
34
34
  extern const FrtFieldIndexClass FRT_INTEGER_FIELD_INDEX_CLASS;
@@ -36,7 +36,6 @@ extern const FrtFieldIndexClass FRT_FLOAT_FIELD_INDEX_CLASS;
36
36
  extern const FrtFieldIndexClass FRT_STRING_FIELD_INDEX_CLASS;
37
37
  extern const FrtFieldIndexClass FRT_BYTE_FIELD_INDEX_CLASS;
38
38
 
39
- extern FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, FrtSymbol field,
40
- const FrtFieldIndexClass *klass);
39
+ extern FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, ID field, const FrtFieldIndexClass *klass);
41
40
 
42
41
  #endif