isomorfeus-ferret 0.12.6 → 0.13.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (249) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +85 -16
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  91. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  92. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  93. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  94. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  95. data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
  96. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  97. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  98. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  99. data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
  100. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  101. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  102. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  103. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  104. data/ext/isomorfeus_ferret_ext/frb_index.c +513 -464
  105. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  106. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  107. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  108. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  109. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  110. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  111. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  112. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  113. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  114. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  115. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  116. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -9
  117. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  118. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  119. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  120. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  121. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -0
  122. data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
  123. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
  124. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  125. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  126. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  127. data/ext/isomorfeus_ferret_ext/frt_index.c +714 -384
  128. data/ext/isomorfeus_ferret_ext/frt_index.h +274 -290
  129. data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
  130. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  131. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  132. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
  133. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  134. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  135. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  136. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  137. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  138. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  139. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  140. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  141. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  142. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  143. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  144. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  145. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +46 -84
  146. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  147. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  148. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  149. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  150. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  151. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  152. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  153. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  154. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
  155. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  156. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  157. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  158. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  159. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  160. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  161. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  162. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  163. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  164. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  165. data/ext/isomorfeus_ferret_ext/test.c +0 -17
  166. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  167. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  168. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  169. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  170. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  171. data/ext/isomorfeus_ferret_ext/test_fields.c +111 -100
  172. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  173. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  174. data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
  175. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  176. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  177. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  178. data/ext/isomorfeus_ferret_ext/test_index.c +373 -363
  179. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  180. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  181. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  182. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  183. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  184. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  185. data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
  186. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  187. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  188. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  189. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  190. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  191. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  192. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  193. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  194. data/lib/isomorfeus/ferret/version.rb +1 -1
  195. metadata +113 -58
  196. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  197. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  198. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  199. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  200. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  201. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  202. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  203. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  204. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  205. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  206. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  207. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  208. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  209. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  210. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  211. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  212. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  213. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  214. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  215. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  216. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  217. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  218. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  219. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  220. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  221. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  222. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  223. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  224. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  225. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  226. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  227. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  228. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  229. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  230. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  231. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  232. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  233. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  234. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  235. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  236. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  237. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  238. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  239. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  240. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  241. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  242. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  243. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  244. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  245. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  246. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  247. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  248. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  249. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -1,6 +1,8 @@
1
1
  #include "frt_index.h"
2
2
  #include "isomorfeus_ferret.h"
3
- #include <ruby/st.h>
3
+ #include <ruby.h>
4
+
5
+ #undef close
4
6
 
5
7
  VALUE mIndex;
6
8
 
@@ -41,6 +43,11 @@ static VALUE sym_store;
41
43
  static VALUE sym_index;
42
44
  static VALUE sym_term_vector;
43
45
 
46
+ static VALUE sym_brotli;
47
+ static VALUE sym_bz2;
48
+ static VALUE sym_lz4;
49
+ static VALUE sym_compression;
50
+
44
51
  static VALUE sym_untokenized;
45
52
  static VALUE sym_omit_norms;
46
53
  static VALUE sym_untokenized_omit_norms;
@@ -49,7 +56,7 @@ static VALUE sym_with_positions;
49
56
  static VALUE sym_with_offsets;
50
57
  static VALUE sym_with_positions_offsets;
51
58
 
52
- static FrtSymbol fsym_content;
59
+ static ID fsym_content;
53
60
 
54
61
  static ID id_term;
55
62
  static ID id_fields;
@@ -67,20 +74,11 @@ extern VALUE frb_get_analyzer(FrtAnalyzer *a);
67
74
  *
68
75
  ****************************************************************************/
69
76
 
70
- static void
71
- frb_fi_free(void *p)
72
- {
73
- object_del(p);
77
+ static void frb_fi_free(void *p) {
74
78
  frt_fi_deref((FrtFieldInfo *)p);
75
79
  }
76
80
 
77
- static void
78
- frb_fi_get_params(VALUE roptions,
79
- FrtStoreValue *store,
80
- FrtIndexValue *index,
81
- FrtTermVectorValue *term_vector,
82
- float *boost)
83
- {
81
+ static void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost) {
84
82
  VALUE v;
85
83
  Check_Type(roptions, T_HASH);
86
84
  v = rb_hash_aref(roptions, sym_boost);
@@ -98,8 +96,24 @@ frb_fi_get_params(VALUE roptions,
98
96
  } else if (v == Qnil) {
99
97
  /* leave as default */
100
98
  } else {
101
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :store."
102
- " Please choose from [:yes, :no]",
99
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]",
100
+ rb_id2name(SYM2ID(v)));
101
+ }
102
+
103
+ v = rb_hash_aref(roptions, sym_compression);
104
+ if (Qnil != v) Check_Type(v, T_SYMBOL);
105
+ if (v == sym_no || v == sym_false || v == Qfalse) {
106
+ *compression = FRT_COMPRESSION_NONE;
107
+ } else if (v == sym_yes || v == sym_true || v == Qtrue || v == sym_brotli) {
108
+ *compression = FRT_COMPRESSION_BROTLI;
109
+ } else if (v == sym_bz2) {
110
+ *compression = FRT_COMPRESSION_BZ2;
111
+ } else if (v == sym_lz4) {
112
+ *compression = FRT_COMPRESSION_LZ4;
113
+ } else if (v == Qnil) {
114
+ /* leave as default */
115
+ } else {
116
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]",
103
117
  rb_id2name(SYM2ID(v)));
104
118
  }
105
119
 
@@ -118,10 +132,8 @@ frb_fi_get_params(VALUE roptions,
118
132
  } else if (v == Qnil) {
119
133
  /* leave as default */
120
134
  } else {
121
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :index."
122
- " Please choose from [:no, :yes, :untokenized, "
123
- ":omit_norms, :untokenized_omit_norms]",
124
- rb_id2name(SYM2ID(v)));
135
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :index. Please choose from [:no, :yes, :untokenized, "
136
+ ":omit_norms, :untokenized_omit_norms]", rb_id2name(SYM2ID(v)));
125
137
  }
126
138
 
127
139
  v = rb_hash_aref(roptions, sym_term_vector);
@@ -139,28 +151,38 @@ frb_fi_get_params(VALUE roptions,
139
151
  } else if (v == Qnil) {
140
152
  /* leave as default */
141
153
  } else {
142
- rb_raise(rb_eArgError, ":%s isn't a valid argument for "
143
- ":term_vector. Please choose from [:no, :yes, "
144
- ":with_positions, :with_offsets, "
145
- ":with_positions_offsets]",
146
- rb_id2name(SYM2ID(v)));
154
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :term_vector. Please choose from [:no, :yes, "
155
+ ":with_positions, :with_offsets, :with_positions_offsets]", rb_id2name(SYM2ID(v)));
147
156
  }
148
157
  }
149
158
 
150
- static VALUE
151
- frb_get_field_info(FrtFieldInfo *fi)
152
- {
159
+ static size_t frb_fi_size(const void *p) {
160
+ return sizeof(FrtFieldInfo);
161
+ (void)p;
162
+ }
153
163
 
154
- VALUE rfi = Qnil;
164
+ const rb_data_type_t frb_field_info_t = {
165
+ .wrap_struct_name = "FrbFieldInfo",
166
+ .function = {
167
+ .dmark = NULL,
168
+ .dfree = frb_fi_free,
169
+ .dsize = frb_fi_size,
170
+ .dcompact = NULL,
171
+ .reserved = {0},
172
+ },
173
+ .parent = NULL,
174
+ .data = NULL,
175
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
176
+ };
177
+
178
+ static VALUE frb_get_field_info(FrtFieldInfo *fi) {
155
179
  if (fi) {
156
- rfi = object_get(fi);
157
- if (rfi == Qnil) {
158
- rfi = Data_Wrap_Struct(cFieldInfo, NULL, &frb_fi_free, fi);
180
+ if (fi->rfi == 0 || fi->rfi == Qnil) {
181
+ fi->rfi = TypedData_Wrap_Struct(cFieldInfo, &frb_field_info_t, fi);
159
182
  FRT_REF(fi);
160
- object_add(fi, rfi);
161
183
  }
162
184
  }
163
- return rfi;
185
+ return fi->rfi;
164
186
  }
165
187
 
166
188
  /*
@@ -168,28 +190,32 @@ frb_get_field_info(FrtFieldInfo *fi)
168
190
  * FieldInfo.new(name, options = {}) -> field_info
169
191
  *
170
192
  * Create a new FieldInfo object with the name +name+ and the properties
171
- * specified in +options+. The available options are [:store, :index,
172
- * :term_vector, :boost]. See the description of FieldInfo for more
193
+ * specified in +options+. The available options are [:store, :compression,
194
+ * :index, :term_vector, :boost]. See the description of FieldInfo for more
173
195
  * information on these properties.
174
196
  */
175
- static VALUE
176
- frb_fi_init(int argc, VALUE *argv, VALUE self)
177
- {
197
+ static VALUE frb_fi_alloc(VALUE rclass) {
198
+ FrtFieldInfo *fi = frt_fi_alloc();
199
+ return TypedData_Wrap_Struct(rclass, &frb_field_info_t, fi);
200
+ }
201
+
202
+ static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
178
203
  VALUE roptions, rname;
179
204
  FrtFieldInfo *fi;
205
+ TypedData_Get_Struct(self, FrtFieldInfo, &frb_field_info_t, fi);
180
206
  FrtStoreValue store = FRT_STORE_YES;
207
+ FrtCompressionType compression = FRT_COMPRESSION_NONE;
181
208
  FrtIndexValue index = FRT_INDEX_YES;
182
209
  FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
183
210
  float boost = 1.0f;
184
211
 
185
212
  rb_scan_args(argc, argv, "11", &rname, &roptions);
186
213
  if (argc > 1) {
187
- frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
214
+ frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
188
215
  }
189
- fi = frt_fi_new(frb_field(rname), store, index, term_vector);
216
+ fi = frt_fi_init(fi, frb_field(rname), store, compression, index, term_vector);
190
217
  fi->boost = boost;
191
- Frt_Wrap_Struct(self, NULL, &frb_fi_free, fi);
192
- object_add(fi, self);
218
+ fi->rfi = self;
193
219
  return self;
194
220
  }
195
221
 
@@ -199,9 +225,7 @@ frb_fi_init(int argc, VALUE *argv, VALUE self)
199
225
  *
200
226
  * Return the name of the field
201
227
  */
202
- static VALUE
203
- frb_fi_name(VALUE self)
204
- {
228
+ static VALUE frb_fi_name(VALUE self) {
205
229
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
206
230
  return rb_str_new_cstr(rb_id2name(fi->name));
207
231
  }
@@ -212,22 +236,29 @@ frb_fi_name(VALUE self)
212
236
  *
213
237
  * Return true if the field is stored in the index.
214
238
  */
215
- static VALUE
216
- frb_fi_is_stored(VALUE self)
217
- {
239
+ static VALUE frb_fi_is_stored(VALUE self) {
218
240
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
219
241
  return fi_is_stored(fi) ? Qtrue : Qfalse;
220
242
  }
221
243
 
244
+ /*
245
+ * call-seq:
246
+ * fi.compressed? -> bool
247
+ *
248
+ * Return true if the field is stored in the index in compressed format.
249
+ */
250
+ static VALUE frb_fi_is_compressed(VALUE self) {
251
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
252
+ return fi_is_compressed(fi) ? Qtrue : Qfalse;
253
+ }
254
+
222
255
  /*
223
256
  * call-seq:
224
257
  * fi.indexed? -> bool
225
258
  *
226
259
  * Return true if the field is indexed, ie searchable in the index.
227
260
  */
228
- static VALUE
229
- frb_fi_is_indexed(VALUE self)
230
- {
261
+ static VALUE frb_fi_is_indexed(VALUE self) {
231
262
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
232
263
  return fi_is_indexed(fi) ? Qtrue : Qfalse;
233
264
  }
@@ -243,9 +274,7 @@ frb_fi_is_indexed(VALUE self)
243
274
  *
244
275
  * A field can only be tokenized if it is indexed.
245
276
  */
246
- static VALUE
247
- frb_fi_is_tokenized(VALUE self)
248
- {
277
+ static VALUE frb_fi_is_tokenized(VALUE self) {
249
278
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
250
279
  return fi_is_tokenized(fi) ? Qtrue : Qfalse;
251
280
  }
@@ -261,9 +290,7 @@ frb_fi_is_tokenized(VALUE self)
261
290
  * boost and it will use less memory, especially for indexes which have a
262
291
  * large number of documents.
263
292
  */
264
- static VALUE
265
- frb_fi_omit_norms(VALUE self)
266
- {
293
+ static VALUE frb_fi_omit_norms(VALUE self) {
267
294
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
268
295
  return fi_omit_norms(fi) ? Qtrue : Qfalse;
269
296
  }
@@ -274,9 +301,7 @@ frb_fi_omit_norms(VALUE self)
274
301
  *
275
302
  * Return true if the term-vectors are stored for this field.
276
303
  */
277
- static VALUE
278
- frb_fi_store_term_vector(VALUE self)
279
- {
304
+ static VALUE frb_fi_store_term_vector(VALUE self) {
280
305
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
281
306
  return fi_store_term_vector(fi) ? Qtrue : Qfalse;
282
307
  }
@@ -287,9 +312,7 @@ frb_fi_store_term_vector(VALUE self)
287
312
  *
288
313
  * Return true if positions are stored with the term-vectors for this field.
289
314
  */
290
- static VALUE
291
- frb_fi_store_positions(VALUE self)
292
- {
315
+ static VALUE frb_fi_store_positions(VALUE self) {
293
316
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
294
317
  return fi_store_positions(fi) ? Qtrue : Qfalse;
295
318
  }
@@ -300,9 +323,7 @@ frb_fi_store_positions(VALUE self)
300
323
  *
301
324
  * Return true if offsets are stored with the term-vectors for this field.
302
325
  */
303
- static VALUE
304
- frb_fi_store_offsets(VALUE self)
305
- {
326
+ static VALUE frb_fi_store_offsets(VALUE self) {
306
327
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
307
328
  return fi_store_offsets(fi) ? Qtrue : Qfalse;
308
329
  }
@@ -315,9 +336,7 @@ frb_fi_store_offsets(VALUE self)
315
336
  *
316
337
  * fi.indexed? and not fi.omit_norms?
317
338
  */
318
- static VALUE
319
- frb_fi_has_norms(VALUE self)
320
- {
339
+ static VALUE frb_fi_has_norms(VALUE self) {
321
340
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
322
341
  return fi_has_norms(fi) ? Qtrue : Qfalse;
323
342
  }
@@ -328,9 +347,7 @@ frb_fi_has_norms(VALUE self)
328
347
  *
329
348
  * Return the default boost for this field
330
349
  */
331
- static VALUE
332
- frb_fi_boost(VALUE self)
333
- {
350
+ static VALUE frb_fi_boost(VALUE self) {
334
351
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
335
352
  return rb_float_new((double)fi->boost);
336
353
  }
@@ -341,9 +358,7 @@ frb_fi_boost(VALUE self)
341
358
  *
342
359
  * Return a string representation of the FieldInfo object.
343
360
  */
344
- static VALUE
345
- frb_fi_to_s(VALUE self)
346
- {
361
+ static VALUE frb_fi_to_s(VALUE self) {
347
362
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
348
363
  char *fi_s = frt_fi_to_s(fi);
349
364
  VALUE rfi_s = rb_str_new2(fi_s);
@@ -357,39 +372,47 @@ frb_fi_to_s(VALUE self)
357
372
  *
358
373
  ****************************************************************************/
359
374
 
360
- static void
361
- frb_fis_free(void *p)
362
- {
363
- object_del(p);
375
+ static void frb_fis_free(void *p) {
364
376
  frt_fis_deref((FrtFieldInfos *)p);
365
377
  }
366
378
 
367
- static void
368
- frb_fis_mark(void *p)
369
- {
379
+ static void frb_fis_mark(void *p) {
370
380
  int i;
371
381
  FrtFieldInfos *fis = (FrtFieldInfos *)p;
372
382
 
373
383
  for (i = 0; i < fis->size; i++) {
374
- frb_gc_mark(fis->fields[i]);
384
+ if (fis->fields[i]->rfi)
385
+ rb_gc_mark(fis->fields[i]->rfi);
375
386
  }
376
387
  }
377
388
 
378
- static VALUE
379
- frb_get_field_infos(FrtFieldInfos *fis)
380
- {
389
+ static size_t frb_field_infos_t_size(const void *p) {
390
+ return sizeof(FrtFieldInfos);
391
+ (void)p;
392
+ }
393
+
394
+ const rb_data_type_t frb_field_infos_t = {
395
+ .wrap_struct_name = "FrbFieldInfos",
396
+ .function = {
397
+ .dmark = frb_fis_mark,
398
+ .dfree = frb_fis_free,
399
+ .dsize = frb_field_infos_t_size,
400
+ .dcompact = NULL,
401
+ .reserved = {0},
402
+ },
403
+ .parent = NULL,
404
+ .data = NULL,
405
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
406
+ };
381
407
 
382
- VALUE rfis = Qnil;
408
+ static VALUE frb_get_field_infos(FrtFieldInfos *fis) {
383
409
  if (fis) {
384
- rfis = object_get(fis);
385
- if (rfis == Qnil) {
386
- rfis = Data_Wrap_Struct(cFieldInfos, &frb_fis_mark, &frb_fis_free,
387
- fis);
410
+ if (fis->rfis == 0 || fis->rfis == Qnil) {
411
+ fis->rfis = TypedData_Wrap_Struct(cFieldInfos, &frb_field_infos_t, fis);
388
412
  FRT_REF(fis);
389
- object_add(fis, rfis);
390
413
  }
391
414
  }
392
- return rfis;
415
+ return fis->rfis;
393
416
  }
394
417
 
395
418
  /*
@@ -400,23 +423,28 @@ frb_get_field_infos(FrtFieldInfos *fis)
400
423
  * specified in the +default+ hash parameter. See FieldInfo for available
401
424
  * property values.
402
425
  */
403
- static VALUE
404
- frb_fis_init(int argc, VALUE *argv, VALUE self)
405
- {
426
+
427
+ static VALUE frb_fis_alloc(VALUE rclass) {
428
+ FrtFieldInfos *fis = frt_fis_alloc();
429
+ return TypedData_Wrap_Struct(rclass, &frb_field_infos_t, fis);
430
+ }
431
+
432
+ static VALUE frb_fis_init(int argc, VALUE *argv, VALUE self) {
406
433
  VALUE roptions;
407
434
  FrtFieldInfos *fis;
435
+ TypedData_Get_Struct(self, FrtFieldInfos, &frb_field_infos_t, fis);
408
436
  FrtStoreValue store = FRT_STORE_YES;
437
+ FrtCompressionType compression = FRT_COMPRESSION_NONE;
409
438
  FrtIndexValue index = FRT_INDEX_YES;
410
439
  FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
411
440
  float boost;
412
441
 
413
442
  rb_scan_args(argc, argv, "01", &roptions);
414
443
  if (argc > 0) {
415
- frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
444
+ frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
416
445
  }
417
- fis = frt_fis_new(store, index, term_vector);
418
- Frt_Wrap_Struct(self, &frb_fis_mark, &frb_fis_free, fis);
419
- object_add(fis, self);
446
+ fis = frt_fis_init(fis, store, compression, index, term_vector);
447
+ fis->rfis = self;
420
448
  return self;
421
449
  }
422
450
 
@@ -427,9 +455,7 @@ frb_fis_init(int argc, VALUE *argv, VALUE self)
427
455
  * Return an array of the FieldInfo objects contained but this FieldInfos
428
456
  * object.
429
457
  */
430
- static VALUE
431
- frb_fis_to_a(VALUE self)
432
- {
458
+ static VALUE frb_fis_to_a(VALUE self) {
433
459
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
434
460
  VALUE rary = rb_ary_new();
435
461
  int i;
@@ -452,9 +478,7 @@ frb_fis_to_a(VALUE self)
452
478
  * fi = fis[:name]
453
479
  * fi = fis[2]
454
480
  */
455
- static VALUE
456
- frb_fis_get(VALUE self, VALUE ridx)
457
- {
481
+ static VALUE frb_fis_get(VALUE self, VALUE ridx) {
458
482
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
459
483
  VALUE rfi = Qnil;
460
484
  switch (TYPE(ridx)) {
@@ -493,9 +517,7 @@ frb_fis_get(VALUE self, VALUE ridx)
493
517
  * Add a FieldInfo object. Use the FieldInfos#add_field method where
494
518
  * possible.
495
519
  */
496
- static VALUE
497
- frb_fis_add(VALUE self, VALUE rfi)
498
- {
520
+ static VALUE frb_fis_add(VALUE self, VALUE rfi) {
499
521
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
500
522
  FrtFieldInfo *fi = (FrtFieldInfo *)frb_rb_data_ptr(rfi);
501
523
  frt_fis_add_field(fis, fi);
@@ -516,6 +538,7 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
516
538
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
517
539
  FrtFieldInfo *fi;
518
540
  FrtStoreValue store = fis->store;
541
+ FrtCompressionType compression = fis->compression;
519
542
  FrtIndexValue index = fis->index;
520
543
  FrtTermVectorValue term_vector = fis->term_vector;
521
544
  float boost = 1.0f;
@@ -523,9 +546,9 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
523
546
 
524
547
  rb_scan_args(argc, argv, "11", &rname, &roptions);
525
548
  if (argc > 1) {
526
- frb_fi_get_params(roptions, &store, &index, &term_vector, &boost);
549
+ frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
527
550
  }
528
- fi = frt_fi_new(frb_field(rname), store, index, term_vector);
551
+ fi = frt_fi_new(frb_field(rname), store, compression, index, term_vector);
529
552
  fi->boost = boost;
530
553
  frt_fis_add_field(fis, fi);
531
554
  return self;
@@ -652,28 +675,46 @@ frb_fis_get_tk_fields(VALUE self)
652
675
  *
653
676
  ****************************************************************************/
654
677
 
655
- static void
656
- frb_te_free(void *p)
657
- {
678
+ static void frb_te_free(void *p) {
658
679
  FrtTermEnum *te = (FrtTermEnum *)p;
659
680
  te->close(te);
660
681
  }
661
682
 
662
- static VALUE
663
- frb_te_get_set_term(VALUE self, const char *term)
664
- {
683
+ static size_t frb_te_size(const void *p) {
684
+ return sizeof(FrtTermEnum);
685
+ (void)p;
686
+ }
687
+
688
+ const rb_data_type_t frb_term_enum_t = {
689
+ .wrap_struct_name = "FrbTermEnum",
690
+ .function = {
691
+ .dmark = NULL,
692
+ .dfree = frb_te_free,
693
+ .dsize = frb_te_size,
694
+ .dcompact = NULL,
695
+ .reserved = {0},
696
+ },
697
+ .parent = NULL,
698
+ .data = NULL,
699
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
700
+ };
701
+
702
+ static VALUE frb_te_alloc(VALUE rclass) {
703
+ FrtTermEnum *te = FRT_ALLOC_AND_ZERO(FrtTermEnum);
704
+ return TypedData_Wrap_Struct(rclass, &frb_term_enum_t, te);
705
+ }
706
+
707
+ static VALUE frb_te_get_set_term(VALUE self, const char *term) {
665
708
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
666
709
  VALUE str = term ? rb_str_new(term, te->curr_term_len) : Qnil;
667
710
  rb_ivar_set(self, id_term, str);
668
711
  return str;
669
712
  }
670
713
 
671
- static VALUE
672
- frb_get_te(VALUE rir, FrtTermEnum *te)
673
- {
714
+ static VALUE frb_get_te(VALUE rir, FrtTermEnum *te) {
674
715
  VALUE self = Qnil;
675
716
  if (te != NULL) {
676
- self = Data_Wrap_Struct(cTermEnum, NULL, &frb_te_free, te);
717
+ self = TypedData_Wrap_Struct(cTermEnum, &frb_term_enum_t, te);
677
718
  frb_te_get_set_term(self, te->curr_term);
678
719
  rb_ivar_set(self, id_fld_num_map, rb_ivar_get(rir, id_fld_num_map));
679
720
  }
@@ -686,9 +727,7 @@ frb_get_te(VALUE rir, FrtTermEnum *te)
686
727
  *
687
728
  * Returns the next term in the enumeration or nil otherwise.
688
729
  */
689
- static VALUE
690
- frb_te_next(VALUE self)
691
- {
730
+ static VALUE frb_te_next(VALUE self) {
692
731
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
693
732
  return frb_te_get_set_term(self, te->next(te));
694
733
  }
@@ -700,9 +739,7 @@ frb_te_next(VALUE self)
700
739
  * Returns the current term pointed to by the enum. This method should only
701
740
  * be called after a successful call to TermEnum#next.
702
741
  */
703
- static VALUE
704
- frb_te_term(VALUE self)
705
- {
742
+ static VALUE frb_te_term(VALUE self) {
706
743
  return rb_ivar_get(self, id_term);
707
744
  }
708
745
 
@@ -714,9 +751,7 @@ frb_te_term(VALUE self)
714
751
  * That is the number of documents that this term appears in. The method
715
752
  * should only be called after a successful call to TermEnum#next.
716
753
  */
717
- static VALUE
718
- frb_te_doc_freq(VALUE self)
719
- {
754
+ static VALUE frb_te_doc_freq(VALUE self) {
720
755
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
721
756
  return INT2FIX(te->curr_ti.doc_freq);
722
757
  }
@@ -732,9 +767,7 @@ frb_te_doc_freq(VALUE self)
732
767
  *
733
768
  * Returns the first term greater than or equal to +target+
734
769
  */
735
- static VALUE
736
- frb_te_skip_to(VALUE self, VALUE rterm)
737
- {
770
+ static VALUE frb_te_skip_to(VALUE self, VALUE rterm) {
738
771
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
739
772
  return frb_te_get_set_term(self, te->skip_to(te, rs2s(rterm)));
740
773
  }
@@ -746,9 +779,7 @@ frb_te_skip_to(VALUE self, VALUE rterm)
746
779
  * Iterates through all the terms in the field, yielding the term and the
747
780
  * document frequency.
748
781
  */
749
- static VALUE
750
- frb_te_each(VALUE self)
751
- {
782
+ static VALUE frb_te_each(VALUE self) {
752
783
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
753
784
  char *term;
754
785
  int term_cnt = 0;
@@ -780,9 +811,7 @@ frb_te_each(VALUE self)
780
811
  * do_something()
781
812
  * end
782
813
  */
783
- static VALUE
784
- frb_te_set_field(VALUE self, VALUE rfield)
785
- {
814
+ static VALUE frb_te_set_field(VALUE self, VALUE rfield) {
786
815
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
787
816
  int field_num = 0;
788
817
  VALUE rfnum_map = rb_ivar_get(self, id_fld_num_map);
@@ -822,9 +851,7 @@ frb_te_set_field(VALUE self, VALUE rfield)
822
851
  * # ["cantaloupe",12]
823
852
  * # ]
824
853
  */
825
- static VALUE
826
- frb_te_to_json(int argc, VALUE *argv, VALUE self)
827
- {
854
+ static VALUE frb_te_to_json(int argc, VALUE *argv, VALUE self) {
828
855
  FrtTermEnum *te = (FrtTermEnum *)DATA_PTR(self);
829
856
  VALUE rjson;
830
857
  char *json, *jp;
@@ -849,8 +876,7 @@ frb_te_to_json(int argc, VALUE *argv, VALUE self)
849
876
  *(jp++) = ']';
850
877
  *(jp++) = ',';
851
878
  }
852
- }
853
- else {
879
+ } else {
854
880
  while (NULL != (term = te->next(te))) {
855
881
  /* enough room for for term after converting " to '"' and frequency
856
882
  * plus some extra for good measure */
@@ -886,17 +912,37 @@ frb_te_to_json(int argc, VALUE *argv, VALUE self)
886
912
  *
887
913
  ****************************************************************************/
888
914
 
889
- static void
890
- frb_tde_free(void *p)
891
- {
915
+ static void frb_tde_free(void *p) {
892
916
  FrtTermDocEnum *tde = (FrtTermDocEnum *)p;
893
917
  tde->close(tde);
894
918
  }
895
919
 
896
- static VALUE
897
- frb_get_tde(VALUE rir, FrtTermDocEnum *tde)
898
- {
899
- VALUE self = Data_Wrap_Struct(cTermDocEnum, NULL, &frb_tde_free, tde);
920
+ static size_t frb_tde_size(const void *p) {
921
+ return sizeof(FrtTermDocEnum);
922
+ (void)p;
923
+ }
924
+
925
+ const rb_data_type_t frb_term_doc_enum_t = {
926
+ .wrap_struct_name = "FrbTermDocEnum",
927
+ .function = {
928
+ .dmark = NULL,
929
+ .dfree = frb_tde_free,
930
+ .dsize = frb_tde_size,
931
+ .dcompact = NULL,
932
+ .reserved = {0},
933
+ },
934
+ .parent = NULL,
935
+ .data = NULL,
936
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
937
+ };
938
+
939
+ static VALUE frb_tde_alloc(VALUE rclass) {
940
+ FrtTermDocEnum *tde = FRT_ALLOC_AND_ZERO(FrtTermDocEnum);
941
+ return TypedData_Wrap_Struct(rclass, &frb_term_doc_enum_t, tde);
942
+ }
943
+
944
+ static VALUE frb_get_tde(VALUE rir, FrtTermDocEnum *tde) {
945
+ VALUE self = TypedData_Wrap_Struct(cTermDocEnum, &frb_term_doc_enum_t, tde);
900
946
  rb_ivar_set(self, id_fld_num_map, rb_ivar_get(rir, id_fld_num_map));
901
947
  return self;
902
948
  }
@@ -909,9 +955,7 @@ frb_get_tde(VALUE rir, FrtTermDocEnum *tde)
909
955
  * you can call next or each to skip through the documents and positions of
910
956
  * this particular term.
911
957
  */
912
- static VALUE
913
- frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm)
914
- {
958
+ static VALUE frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm) {
915
959
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
916
960
  char *term;
917
961
  VALUE rfnum_map = rb_ivar_get(self, id_fld_num_map);
@@ -921,8 +965,7 @@ frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm)
921
965
  if (rfnum != Qnil) {
922
966
  field_num = FIX2INT(rfnum);
923
967
  } else {
924
- rb_raise(rb_eArgError, "field %s doesn't exist in the index",
925
- rb_id2name(frb_field(rfield)));
968
+ rb_raise(rb_eArgError, "field %s doesn't exist in the index", rb_id2name(frb_field(rfield)));
926
969
  }
927
970
  tde->seek(tde, field_num, term);
928
971
  return self;
@@ -940,9 +983,7 @@ frb_tde_seek(VALUE self, VALUE rfield, VALUE rterm)
940
983
  * However the +seek_term_enum+ method saves an index lookup so should offer
941
984
  * a large performance improvement.
942
985
  */
943
- static VALUE
944
- frb_tde_seek_te(VALUE self, VALUE rterm_enum)
945
- {
986
+ static VALUE frb_tde_seek_te(VALUE self, VALUE rterm_enum) {
946
987
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
947
988
  FrtTermEnum *te = (FrtTermEnum *)frb_rb_data_ptr(rterm_enum);
948
989
  tde->seek_te(tde, te);
@@ -955,9 +996,7 @@ frb_tde_seek_te(VALUE self, VALUE rterm_enum)
955
996
  *
956
997
  * Returns the current document number pointed to by the +term_doc_enum+.
957
998
  */
958
- static VALUE
959
- frb_tde_doc(VALUE self)
960
- {
999
+ static VALUE frb_tde_doc(VALUE self) {
961
1000
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
962
1001
  return INT2FIX(tde->doc_num(tde));
963
1002
  }
@@ -969,9 +1008,7 @@ frb_tde_doc(VALUE self)
969
1008
  * Returns the frequency of the current document pointed to by the
970
1009
  * +term_doc_enum+.
971
1010
  */
972
- static VALUE
973
- frb_tde_freq(VALUE self)
974
- {
1011
+ static VALUE frb_tde_freq(VALUE self) {
975
1012
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
976
1013
  return INT2FIX(tde->freq(tde));
977
1014
  }
@@ -983,9 +1020,7 @@ frb_tde_freq(VALUE self)
983
1020
  * Move forward to the next document in the enumeration. Returns +true+ if
984
1021
  * there is another document or +false+ otherwise.
985
1022
  */
986
- static VALUE
987
- frb_tde_next(VALUE self)
988
- {
1023
+ static VALUE frb_tde_next(VALUE self) {
989
1024
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
990
1025
  return tde->next(tde) ? Qtrue : Qfalse;
991
1026
  }
@@ -997,9 +1032,7 @@ frb_tde_next(VALUE self)
997
1032
  * Move forward to the next document in the enumeration. Returns +true+ if
998
1033
  * there is another document or +false+ otherwise.
999
1034
  */
1000
- static VALUE
1001
- frb_tde_next_position(VALUE self)
1002
- {
1035
+ static VALUE frb_tde_next_position(VALUE self) {
1003
1036
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1004
1037
  int pos;
1005
1038
  if (tde->next_position == NULL) {
@@ -1021,9 +1054,7 @@ frb_tde_next_position(VALUE self)
1021
1054
  * NOTE: this method can only be called once after each seek. If you need to
1022
1055
  * call +#each+ again then you should call +#seek+ again too.
1023
1056
  */
1024
- static VALUE
1025
- frb_tde_each(VALUE self)
1026
- {
1057
+ static VALUE frb_tde_each(VALUE self) {
1027
1058
  int doc_cnt = 0;
1028
1059
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1029
1060
  VALUE vals = rb_ary_new2(2);
@@ -1065,9 +1096,7 @@ frb_tde_each(VALUE self)
1065
1096
  * # [30,3]
1066
1097
  * # ]
1067
1098
  */
1068
- static VALUE
1069
- frb_tde_to_json(int argc, VALUE *argv, VALUE self)
1070
- {
1099
+ static VALUE frb_tde_to_json(int argc, VALUE *argv, VALUE self) {
1071
1100
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1072
1101
  VALUE rjson;
1073
1102
  char *json, *jp;
@@ -1139,9 +1168,7 @@ frb_tde_to_json(int argc, VALUE *argv, VALUE self)
1139
1168
  * puts " #{positions.join(', ')}"
1140
1169
  * end
1141
1170
  */
1142
- static VALUE
1143
- frb_tde_each_position(VALUE self)
1144
- {
1171
+ static VALUE frb_tde_each_position(VALUE self) {
1145
1172
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1146
1173
  int pos;
1147
1174
  if (tde->next_position == NULL) {
@@ -1162,9 +1189,7 @@ frb_tde_each_position(VALUE self)
1162
1189
  * Skip to the required document number +target+ and return true if there is
1163
1190
  * a document >= +target+.
1164
1191
  */
1165
- static VALUE
1166
- frb_tde_skip_to(VALUE self, VALUE rtarget)
1167
- {
1192
+ static VALUE frb_tde_skip_to(VALUE self, VALUE rtarget) {
1168
1193
  FrtTermDocEnum *tde = (FrtTermDocEnum *)DATA_PTR(self);
1169
1194
  return tde->skip_to(tde, FIX2INT(rtarget)) ? Qtrue : Qfalse;
1170
1195
  }
@@ -1175,9 +1200,7 @@ frb_tde_skip_to(VALUE self, VALUE rtarget)
1175
1200
  *
1176
1201
  ****************************************************************************/
1177
1202
 
1178
- static VALUE
1179
- frb_get_tv_offsets(FrtOffset *offset)
1180
- {
1203
+ static VALUE frb_get_tv_offsets(FrtOffset *offset) {
1181
1204
  return rb_struct_new(cTVOffsets,
1182
1205
  ULL2NUM((frt_u64)offset->start),
1183
1206
  ULL2NUM((frt_u64)offset->end),
@@ -1190,9 +1213,7 @@ frb_get_tv_offsets(FrtOffset *offset)
1190
1213
  *
1191
1214
  ****************************************************************************/
1192
1215
 
1193
- static VALUE
1194
- frb_get_tv_term(FrtTVTerm *tv_term)
1195
- {
1216
+ static VALUE frb_get_tv_term(FrtTVTerm *tv_term) {
1196
1217
  int i;
1197
1218
  const int freq = tv_term->freq;
1198
1219
  VALUE rtext;
@@ -1214,9 +1235,7 @@ frb_get_tv_term(FrtTVTerm *tv_term)
1214
1235
  *
1215
1236
  ****************************************************************************/
1216
1237
 
1217
- static VALUE
1218
- frb_get_tv(FrtTermVector *tv)
1219
- {
1238
+ static VALUE frb_get_tv(FrtTermVector *tv) {
1220
1239
  int i;
1221
1240
  FrtTVTerm *terms = tv->terms;
1222
1241
  const int t_cnt = tv->term_cnt;
@@ -1247,19 +1266,18 @@ frb_get_tv(FrtTermVector *tv)
1247
1266
  *
1248
1267
  ****************************************************************************/
1249
1268
 
1250
- void
1251
- frb_iw_free(void *p)
1252
- {
1269
+ void frb_iw_free(void *p) {
1253
1270
  frt_iw_close((FrtIndexWriter *)p);
1254
1271
  }
1255
1272
 
1256
- void
1257
- frb_iw_mark(void *p)
1258
- {
1273
+ void frb_iw_mark(void *p) {
1259
1274
  FrtIndexWriter *iw = (FrtIndexWriter *)p;
1260
- frb_gc_mark(iw->analyzer);
1261
- frb_gc_mark(iw->store);
1262
- frb_gc_mark(iw->fis);
1275
+ if (iw->analyzer->ranalyzer)
1276
+ rb_gc_mark(iw->analyzer->ranalyzer);
1277
+ if (iw->store->rstore)
1278
+ rb_gc_mark(iw->store->rstore);
1279
+ if (iw->fis->rfis)
1280
+ rb_gc_mark(iw->fis->rfis);
1263
1281
  }
1264
1282
 
1265
1283
  /*
@@ -1270,11 +1288,11 @@ frb_iw_mark(void *p)
1270
1288
  * exclusively by the index writer. The garbage collector will do this
1271
1289
  * automatically if not called explicitly.
1272
1290
  */
1273
- static VALUE
1274
- frb_iw_close(VALUE self)
1275
- {
1291
+ static VALUE frb_iw_close(VALUE self) {
1276
1292
  FrtIndexWriter *iw = (FrtIndexWriter *)DATA_PTR(self);
1277
- Frt_Unwrap_Struct(self);
1293
+ ((struct RData *)(self))->data = NULL;
1294
+ ((struct RData *)(self))->dmark = NULL;
1295
+ ((struct RData *)(self))->dfree = NULL;
1278
1296
  frt_iw_close(iw);
1279
1297
  return Qnil;
1280
1298
  }
@@ -1303,9 +1321,31 @@ frb_iw_close(VALUE self)
1303
1321
  *
1304
1322
  * See FrtIndexWriter for more options.
1305
1323
  */
1306
- static VALUE
1307
- frb_iw_init(int argc, VALUE *argv, VALUE self)
1308
- {
1324
+ static size_t frb_index_writer_t_size(const void *p) {
1325
+ return sizeof(FrtIndexWriter);
1326
+ (void)p;
1327
+ }
1328
+
1329
+ const rb_data_type_t frb_index_writer_t = {
1330
+ .wrap_struct_name = "FrbIndexWriter",
1331
+ .function = {
1332
+ .dmark = frb_iw_mark,
1333
+ .dfree = frb_iw_free,
1334
+ .dsize = frb_index_writer_t_size,
1335
+ .dcompact = NULL,
1336
+ .reserved = {0},
1337
+ },
1338
+ .parent = NULL,
1339
+ .data = NULL,
1340
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
1341
+ };
1342
+
1343
+ static VALUE frb_iw_alloc(VALUE rclass) {
1344
+ FrtIndexWriter *iw = frt_iw_alloc();
1345
+ return TypedData_Wrap_Struct(rclass, &frb_index_writer_t, iw);
1346
+ }
1347
+
1348
+ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
1309
1349
  VALUE roptions, rval;
1310
1350
  bool create = false;
1311
1351
  bool create_if_missing = true;
@@ -1323,7 +1363,7 @@ frb_iw_init(int argc, VALUE *argv, VALUE self)
1323
1363
  Check_Type(roptions, T_HASH);
1324
1364
 
1325
1365
  if ((rval = rb_hash_aref(roptions, sym_dir)) != Qnil) {
1326
- Check_Type(rval, T_DATA);
1366
+ // Check_Type(rval, T_DATA);
1327
1367
  store = DATA_PTR(rval);
1328
1368
  } else if ((rval = rb_hash_aref(roptions, sym_path)) != Qnil) {
1329
1369
  StringValue(rval);
@@ -1331,17 +1371,9 @@ frb_iw_init(int argc, VALUE *argv, VALUE self)
1331
1371
  store = frt_open_fs_store(rs2s(rval));
1332
1372
  FRT_DEREF(store);
1333
1373
  }
1334
-
1335
- /* Let ruby's garbage collector handle the closing of the store
1336
- if (!close_dir) {
1337
- close_dir = RTEST(rb_hash_aref(roptions, sym_close_dir));
1338
- }
1339
- */
1340
1374
  /* use_compound_file defaults to true */
1341
1375
  config.use_compound_file =
1342
- (rb_hash_aref(roptions, sym_use_compound_file) == Qfalse)
1343
- ? false
1344
- : true;
1376
+ (rb_hash_aref(roptions, sym_use_compound_file) == Qfalse) ? false : true;
1345
1377
 
1346
1378
  if ((rval = rb_hash_aref(roptions, sym_analyzer)) != Qnil) {
1347
1379
  analyzer = frb_get_cwrapped_analyzer(rval);
@@ -1361,7 +1393,7 @@ frb_iw_init(int argc, VALUE *argv, VALUE self)
1361
1393
  SET_INT_ATTR(max_field_length);
1362
1394
  }
1363
1395
  if (NULL == store) {
1364
- store = frt_open_ram_store();
1396
+ store = frt_open_ram_store(NULL);
1365
1397
  FRT_DEREF(store);
1366
1398
  }
1367
1399
  if (!create && create_if_missing && !store->exists(store, "segments")) {
@@ -1370,26 +1402,29 @@ frb_iw_init(int argc, VALUE *argv, VALUE self)
1370
1402
  if (create) {
1371
1403
  FrtFieldInfos *fis;
1372
1404
  if ((rval = rb_hash_aref(roptions, sym_field_infos)) != Qnil) {
1373
- Data_Get_Struct(rval, FrtFieldInfos, fis);
1405
+ TypedData_Get_Struct(rval, FrtFieldInfos, &frb_field_infos_t, fis);
1374
1406
  frt_index_create(store, fis);
1375
1407
  } else {
1376
- fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES,
1377
- FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
1408
+ fis = frt_fis_new(FRT_STORE_YES, FRT_COMPRESSION_NONE, FRT_INDEX_YES, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
1378
1409
  frt_index_create(store, fis);
1379
1410
  frt_fis_deref(fis);
1380
1411
  }
1381
1412
  }
1382
1413
 
1383
- iw = frt_iw_open(store, analyzer, &config);
1384
-
1385
- Frt_Wrap_Struct(self, &frb_iw_mark, &frb_iw_free, iw);
1386
- default:
1414
+ TypedData_Get_Struct(self, FrtIndexWriter, &frb_index_writer_t, iw);
1415
+ iw = frt_iw_open(iw, store, analyzer, &config);
1416
+ FRT_XCATCHALL
1387
1417
  ex_code = xcontext.excode;
1388
1418
  msg = xcontext.msg;
1389
1419
  FRT_HANDLED();
1390
1420
  FRT_XENDTRY
1391
1421
 
1392
- if (ex_code && msg) { frb_raise(ex_code, msg); }
1422
+ if (ex_code && msg) {
1423
+ ((struct RData *)(self))->data = NULL;
1424
+ ((struct RData *)(self))->dmark = NULL;
1425
+ ((struct RData *)(self))->dfree = NULL;
1426
+ frb_raise(ex_code, msg);
1427
+ }
1393
1428
 
1394
1429
  if (rb_block_given_p()) {
1395
1430
  rb_yield(self);
@@ -1421,7 +1456,7 @@ frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
1421
1456
  return ST_CONTINUE;
1422
1457
  } else {
1423
1458
  FrtDocument *doc = (FrtDocument *)arg;
1424
- FrtSymbol field = frb_field(key);
1459
+ ID field = frb_field(key);
1425
1460
  VALUE val;
1426
1461
  FrtDocField *df;
1427
1462
  if (NULL == (df = frt_doc_get_field(doc, field))) {
@@ -1437,17 +1472,17 @@ frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
1437
1472
  df->destroy_data = true;
1438
1473
  for (i = 0; i < RARRAY_LEN(value); i++) {
1439
1474
  val = rb_obj_as_string(RARRAY_PTR(value)[i]);
1440
- frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val));
1475
+ frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1441
1476
  }
1442
1477
  }
1443
1478
  break;
1444
1479
  case T_STRING:
1445
- frt_df_add_data_len(df, rs2s(value), RSTRING_LEN(value));
1480
+ frt_df_add_data_len(df, rs2s(value), RSTRING_LEN(value), rb_enc_get(value));
1446
1481
  break;
1447
1482
  default:
1448
1483
  val = rb_obj_as_string(value);
1449
1484
  df->destroy_data = true;
1450
- frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val));
1485
+ frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1451
1486
  break;
1452
1487
  }
1453
1488
  frt_doc_add_field(doc, df);
@@ -1477,25 +1512,23 @@ frb_get_doc(VALUE rdoc)
1477
1512
  df->destroy_data = true;
1478
1513
  for (i = 0; i < RARRAY_LEN(rdoc); i++) {
1479
1514
  val = rb_obj_as_string(RARRAY_PTR(rdoc)[i]);
1480
- frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val));
1515
+ frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1481
1516
  }
1482
1517
  frt_doc_add_field(doc, df);
1483
1518
  }
1484
1519
  break;
1485
1520
  case T_SYMBOL:
1486
1521
  /* TODO: clean up this ugly cast */
1487
- df = frt_df_add_data(frt_df_new(fsym_content), (char *)rb_id2name(SYM2ID(rdoc)));
1522
+ df = frt_df_add_data(frt_df_new(fsym_content), (char *)rb_id2name(SYM2ID(rdoc)), rb_enc_get(rdoc));
1488
1523
  frt_doc_add_field(doc, df);
1489
1524
  break;
1490
1525
  case T_STRING:
1491
- df = frt_df_add_data_len(frt_df_new(fsym_content), rs2s(rdoc),
1492
- RSTRING_LEN(rdoc));
1526
+ df = frt_df_add_data_len(frt_df_new(fsym_content), rs2s(rdoc), RSTRING_LEN(rdoc), rb_enc_get(rdoc));
1493
1527
  frt_doc_add_field(doc, df);
1494
1528
  break;
1495
1529
  default:
1496
1530
  val = rb_obj_as_string(rdoc);
1497
- df = frt_df_add_data_len(frt_df_new(fsym_content), rstrdup(val),
1498
- RSTRING_LEN(val));
1531
+ df = frt_df_add_data_len(frt_df_new(fsym_content), rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1499
1532
  df->destroy_data = true;
1500
1533
  frt_doc_add_field(doc, df);
1501
1534
  break;
@@ -1557,6 +1590,48 @@ frb_iw_commit(VALUE self)
1557
1590
  return self;
1558
1591
  }
1559
1592
 
1593
+ /* index reader intermission */
1594
+ static VALUE frb_ir_close(VALUE self);
1595
+
1596
+ void frb_ir_free(void *p) {
1597
+ frt_ir_close((FrtIndexReader *)p);
1598
+ }
1599
+
1600
+ void frb_ir_mark(void *p) {
1601
+ FrtIndexReader *ir = (FrtIndexReader *)p;
1602
+ FrtMultiReader *mr = (FrtMultiReader *)p;
1603
+
1604
+ if (ir->type == FRT_MULTI_READER) {
1605
+ int i;
1606
+ for (i = 0; i < mr->r_cnt; i++) {
1607
+ if (mr->sub_readers[i]->rir)
1608
+ rb_gc_mark(mr->sub_readers[i]->rir);
1609
+ }
1610
+ } else {
1611
+ if (ir->store && ir->store->rstore)
1612
+ rb_gc_mark(ir->store->rstore);
1613
+ }
1614
+ }
1615
+
1616
+ static size_t frb_index_reader_t_size(const void *p) {
1617
+ return sizeof(FrtMultiReader);
1618
+ (void)p;
1619
+ }
1620
+
1621
+ const rb_data_type_t frb_index_reader_t = {
1622
+ .wrap_struct_name = "FrbIndexReader",
1623
+ .function = {
1624
+ .dmark = frb_ir_mark,
1625
+ .dfree = frb_ir_free,
1626
+ .dsize = frb_index_reader_t_size,
1627
+ .dcompact = NULL,
1628
+ .reserved = {0},
1629
+ },
1630
+ .parent = NULL,
1631
+ .data = NULL,
1632
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
1633
+ };
1634
+
1560
1635
  /*
1561
1636
  * call-seq:
1562
1637
  * iw.add_readers(reader_array) -> iw
@@ -1567,9 +1642,7 @@ frb_iw_commit(VALUE self)
1567
1642
  * machines. Then you can finish by merging all of the indexes into a single
1568
1643
  * index.
1569
1644
  */
1570
- static VALUE
1571
- frb_iw_add_readers(VALUE self, VALUE rreaders)
1572
- {
1645
+ static VALUE frb_iw_add_readers(VALUE self, VALUE rreaders) {
1573
1646
  FrtIndexWriter *iw = (FrtIndexWriter *)DATA_PTR(self);
1574
1647
  int i;
1575
1648
  FrtIndexReader **irs;
@@ -1579,7 +1652,7 @@ frb_iw_add_readers(VALUE self, VALUE rreaders)
1579
1652
  i = RARRAY_LEN(rreaders);
1580
1653
  while (i-- > 0) {
1581
1654
  FrtIndexReader *ir;
1582
- Data_Get_Struct(RARRAY_PTR(rreaders)[i], FrtIndexReader, ir);
1655
+ TypedData_Get_Struct(RARRAY_PTR(rreaders)[i], FrtIndexReader, &frb_index_reader_t, ir);
1583
1656
  irs[i] = ir;
1584
1657
  }
1585
1658
  frt_iw_add_readers(iw, irs, RARRAY_LEN(rreaders));
@@ -1925,26 +1998,50 @@ frb_iw_set_use_compound_file(VALUE self, VALUE rval)
1925
1998
  *
1926
1999
  ****************************************************************************/
1927
2000
 
1928
- static void
1929
- frb_lzd_data_free(void *p)
1930
- {
2001
+ static void frb_lzd_data_free(void *p) {
1931
2002
  frt_lazy_doc_close((FrtLazyDoc *)p);
1932
2003
  }
1933
2004
 
1934
- static VALUE
1935
- frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df)
1936
- {
2005
+ static size_t frb_lazy_doc_size(const void *p) {
2006
+ return sizeof(FrtLazyDoc);
2007
+ (void)p;
2008
+ }
2009
+
2010
+ const rb_data_type_t frb_lazy_doc_t = {
2011
+ .wrap_struct_name = "FrbLazyDoc",
2012
+ .function = {
2013
+ .dmark = NULL,
2014
+ .dfree = frb_lzd_data_free,
2015
+ .dsize = frb_lazy_doc_size,
2016
+ .dcompact = NULL,
2017
+ .reserved = {0},
2018
+ },
2019
+ .parent = NULL,
2020
+ .data = NULL,
2021
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
2022
+ };
2023
+
2024
+ static VALUE frb_lzd_alloc(VALUE klass) {
2025
+ FrtLazyDoc *ld = FRT_ALLOC(FrtLazyDoc);
2026
+ return TypedData_Wrap_Struct(klass, &frb_lazy_doc_t, ld);
2027
+ }
2028
+
2029
+ static VALUE frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
1937
2030
  VALUE rdata = Qnil;
1938
2031
  if (lazy_df) {
1939
2032
  if (lazy_df->size == 1) {
1940
2033
  char *data = frt_lazy_df_get_data(lazy_df, 0);
1941
- rdata = rb_str_new(data, lazy_df->len);
2034
+ rdata = rb_str_new(data, lazy_df->data[0].length);
2035
+ rb_enc_associate(rdata, lazy_df->data[0].encoding);
1942
2036
  } else {
1943
2037
  int i;
2038
+ VALUE rstr;
1944
2039
  rdata = rb_ary_new2(lazy_df->size);
1945
2040
  for (i = 0; i < lazy_df->size; i++) {
1946
2041
  char *data = frt_lazy_df_get_data(lazy_df, i);
1947
- rb_ary_store(rdata, i, rb_str_new(data, lazy_df->data[i].length));
2042
+ rstr = rb_str_new(data, lazy_df->data[i].length);
2043
+ rb_enc_associate(rstr, lazy_df->data[i].encoding);
2044
+ rb_ary_store(rdata, i, rstr);
1948
2045
  }
1949
2046
  }
1950
2047
  rb_hash_aset(self, rkey, rdata);
@@ -1959,11 +2056,9 @@ frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df)
1959
2056
  * This method is used internally to lazily load fields. You should never
1960
2057
  * really need to call it yourself.
1961
2058
  */
1962
- static VALUE
1963
- frb_lzd_default(VALUE self, VALUE rkey)
1964
- {
2059
+ static VALUE frb_lzd_default(VALUE self, VALUE rkey) {
1965
2060
  FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
1966
- FrtSymbol field = frb_field(rkey);
2061
+ ID field = frb_field(rkey);
1967
2062
  VALUE rfield = ID2SYM(field);
1968
2063
 
1969
2064
  return frb_lazy_df_load(self, rfield, frt_lazy_doc_get(lazy_doc, field));
@@ -1977,9 +2072,7 @@ frb_lzd_default(VALUE self, VALUE rkey)
1977
2072
  * to access any of these fields in the document the field will be loaded.
1978
2073
  * Try to access any other field an nil will be returned.
1979
2074
  */
1980
- static VALUE
1981
- frb_lzd_fields(VALUE self)
1982
- {
2075
+ static VALUE frb_lzd_fields(VALUE self) {
1983
2076
  return rb_ivar_get(self, id_fields);
1984
2077
  }
1985
2078
 
@@ -1989,9 +2082,7 @@ frb_lzd_fields(VALUE self)
1989
2082
  *
1990
2083
  * Load all unloaded fields in the document from the index.
1991
2084
  */
1992
- static VALUE
1993
- frb_lzd_load(VALUE self)
1994
- {
2085
+ static VALUE frb_lzd_load(VALUE self) {
1995
2086
  FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
1996
2087
  int i;
1997
2088
  for (i = 0; i < lazy_doc->size; i++) {
@@ -2001,9 +2092,7 @@ frb_lzd_load(VALUE self)
2001
2092
  return self;
2002
2093
  }
2003
2094
 
2004
- VALUE
2005
- frb_get_lazy_doc(FrtLazyDoc *lazy_doc)
2006
- {
2095
+ VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc) {
2007
2096
  int i;
2008
2097
  VALUE rfields = rb_ary_new2(lazy_doc->size);
2009
2098
 
@@ -2011,7 +2100,7 @@ frb_get_lazy_doc(FrtLazyDoc *lazy_doc)
2011
2100
  self = rb_hash_new();
2012
2101
  OBJSETUP(self, cLazyDoc, T_HASH);
2013
2102
 
2014
- rdata = Data_Wrap_Struct(cLazyDocData, NULL, &frb_lzd_data_free, lazy_doc);
2103
+ rdata = TypedData_Wrap_Struct(cLazyDocData, &frb_lazy_doc_t, lazy_doc);
2015
2104
  rb_ivar_set(self, id_data, rdata);
2016
2105
 
2017
2106
  for (i = 0; i < lazy_doc->size; i++) {
@@ -2028,32 +2117,6 @@ frb_get_lazy_doc(FrtLazyDoc *lazy_doc)
2028
2117
  *
2029
2118
  ****************************************************************************/
2030
2119
 
2031
- void
2032
- frb_ir_free(void *p)
2033
- {
2034
- object_del(p);
2035
- frt_ir_close((FrtIndexReader *)p);
2036
- }
2037
-
2038
- void
2039
- frb_ir_mark(void *p)
2040
- {
2041
- FrtIndexReader *ir = (FrtIndexReader *)p;
2042
- frb_gc_mark(ir->store);
2043
- }
2044
-
2045
- static VALUE frb_ir_close(VALUE self);
2046
-
2047
- void
2048
- frb_mr_mark(void *p)
2049
- {
2050
- FrtMultiReader *mr = (FrtMultiReader *)p;
2051
- int i;
2052
- for (i = 0; i < mr->r_cnt; i++) {
2053
- frb_gc_mark(mr->sub_readers[i]);
2054
- }
2055
- }
2056
-
2057
2120
  /*
2058
2121
  * call-seq:
2059
2122
  * IndexReader.new(dir) -> index_reader
@@ -2080,9 +2143,15 @@ frb_mr_mark(void *p)
2080
2143
  *
2081
2144
  * iw = IndexReader.new(["/path/to/index1", "/path/to/index2"])
2082
2145
  */
2083
- static VALUE
2084
- frb_ir_init(VALUE self, VALUE rdir)
2085
- {
2146
+
2147
+ static VALUE frb_ir_alloc(VALUE rclass) {
2148
+ // allocate for FrtSegmentReader, the largest of the Frt*Reader structs,
2149
+ // FrtIndexReader is part of it and later on its determined what its going to be
2150
+ FrtIndexReader *ir = (FrtIndexReader *)frt_sr_alloc();
2151
+ return TypedData_Wrap_Struct(rclass, &frb_index_reader_t, ir);
2152
+ }
2153
+
2154
+ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
2086
2155
  FrtStore *store = NULL;
2087
2156
  FrtIndexReader *ir;
2088
2157
  int i;
@@ -2102,7 +2171,7 @@ frb_ir_init(VALUE self, VALUE rdir)
2102
2171
  switch (TYPE(rdir)) {
2103
2172
  case T_DATA:
2104
2173
  if (CLASS_OF(rdir) == cIndexReader) {
2105
- Data_Get_Struct(rdir, FrtIndexReader, sub_readers[i]);
2174
+ TypedData_Get_Struct(rdir, FrtIndexReader, &frb_index_reader_t, sub_readers[i]);
2106
2175
  FRT_REF(sub_readers[i]);
2107
2176
  continue;
2108
2177
  } else if (RTEST(rb_obj_is_kind_of(rdir, cDirectory))) {
@@ -2127,10 +2196,10 @@ frb_ir_init(VALUE self, VALUE rdir)
2127
2196
  rs2s(rb_obj_as_string(rdir)));
2128
2197
  break;
2129
2198
  }
2130
- sub_readers[i] = frt_ir_open(store);
2199
+ sub_readers[i] = frt_ir_open(NULL, store);
2131
2200
  }
2132
- ir = frt_mr_open(sub_readers, reader_cnt);
2133
- Frt_Wrap_Struct(self, &frb_mr_mark, &frb_ir_free, ir);
2201
+ TypedData_Get_Struct(self, FrtIndexReader, &frb_index_reader_t, ir);
2202
+ ir = frt_mr_open(ir, sub_readers, reader_cnt);
2134
2203
  } else {
2135
2204
  switch (TYPE(rdir)) {
2136
2205
  case T_DATA:
@@ -2147,25 +2216,28 @@ frb_ir_init(VALUE self, VALUE rdir)
2147
2216
  rs2s(rb_obj_as_string(rdir)));
2148
2217
  break;
2149
2218
  }
2150
- ir = frt_ir_open(store);
2151
- Frt_Wrap_Struct(self, &frb_ir_mark, &frb_ir_free, ir);
2219
+ TypedData_Get_Struct(self, FrtIndexReader, &frb_index_reader_t, ir);
2220
+ ir = frt_ir_open(ir, store);
2152
2221
  }
2153
- default:
2222
+ FRT_XCATCHALL
2154
2223
  ex_code = xcontext.excode;
2155
2224
  msg = xcontext.msg;
2156
2225
  FRT_HANDLED();
2157
2226
  FRT_XENDTRY
2158
2227
 
2159
- if (ex_code && msg) { frb_raise(ex_code, msg); }
2228
+ if (ex_code && msg) {
2229
+ ((struct RData *)(self))->data = NULL;
2230
+ ((struct RData *)(self))->dmark = NULL;
2231
+ ((struct RData *)(self))->dfree = NULL;
2232
+ frb_raise(ex_code, msg);
2233
+ }
2160
2234
 
2161
- object_add(ir, self);
2235
+ ir->rir = self;
2162
2236
 
2163
2237
  fis = ir->fis;
2164
2238
  for (i = 0; i < fis->size; i++) {
2165
2239
  FrtFieldInfo *fi = fis->fields[i];
2166
- rb_hash_aset(rfield_num_map,
2167
- ID2SYM(fi->name),
2168
- INT2FIX(fi->number));
2240
+ rb_hash_aset(rfield_num_map, ID2SYM(fi->name), INT2FIX(fi->number));
2169
2241
  }
2170
2242
  rb_ivar_set(self, id_fld_num_map, rfield_num_map);
2171
2243
 
@@ -2263,8 +2335,9 @@ static VALUE
2263
2335
  frb_ir_close(VALUE self)
2264
2336
  {
2265
2337
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2266
- object_del(ir);
2267
- Frt_Unwrap_Struct(self);
2338
+ ((struct RData *)(self))->data = NULL;
2339
+ ((struct RData *)(self))->dmark = NULL;
2340
+ ((struct RData *)(self))->dfree = NULL;
2268
2341
  frt_ir_close(ir);
2269
2342
  return self;
2270
2343
  }
@@ -2464,7 +2537,7 @@ frb_ir_term_vector(VALUE self, VALUE rdoc_id, VALUE rfield)
2464
2537
  static void
2465
2538
  frb_add_each_tv(void *key, void *value, void *rtvs)
2466
2539
  {
2467
- rb_hash_aset((VALUE)rtvs, ID2SYM((FrtSymbol)key), frb_get_tv(value));
2540
+ rb_hash_aset((VALUE)rtvs, ID2SYM((ID)key), frb_get_tv(value));
2468
2541
  }
2469
2542
 
2470
2543
  /*
@@ -2698,7 +2771,7 @@ frb_ir_version(VALUE self)
2698
2771
  * == Summary
2699
2772
  *
2700
2773
  * The FieldInfo class is the field descriptor for the index. It specifies
2701
- * whether a field should be indexed and
2774
+ * whether a field is compressed or not or whether it should be indexed and
2702
2775
  * tokenized. Every field has a name which must be a symbol. There are three
2703
2776
  * properties that you can set, +:store+, +:index+ and +:term_vector+. You
2704
2777
  * can also set the default +:boost+ for a field as well.
@@ -2708,8 +2781,8 @@ frb_ir_version(VALUE self)
2708
2781
  * === :store
2709
2782
  *
2710
2783
  * The +:store+ property allows you to specify how a field is stored. You can
2711
- * leave a field unstored (+:no+), store it in it's original format (+:yes+).
2712
- * By default the document
2784
+ * leave a field unstored (+:no+), store it in it's original format (+:yes+)
2785
+ * or store it in compressed format (+:compressed+). By default the document
2713
2786
  * is stored in its original format. If the field is large and it is stored
2714
2787
  * elsewhere where it is easily accessible you might want to leave it
2715
2788
  * unstored. This will keep the index size a lot smaller and make the
@@ -2752,6 +2825,14 @@ frb_ir_version(VALUE self)
2752
2825
  * | | or print match excerpts a la
2753
2826
  * | | Google search.
2754
2827
  * -------------|-------------------------|------------------------------
2828
+ * :compression | :no (default) | Don't compress stored field
2829
+ * | |
2830
+ * | :brotli | Compress field using Brotli
2831
+ * | |
2832
+ * | :bz2 | Compress field using BZip2
2833
+ * | |
2834
+ * | :lz4 | Compress field using LZ4
2835
+ * -------------|-------------------------|------------------------------
2755
2836
  * :index | :no | Do not make this field
2756
2837
  * | | searchable.
2757
2838
  * | |
@@ -2809,6 +2890,9 @@ frb_ir_version(VALUE self)
2809
2890
  *
2810
2891
  * fi = FieldInfo.new(:created_on, :index => :untokenized_omit_norms,
2811
2892
  * :term_vector => :no)
2893
+ *
2894
+ * fi = FieldInfo.new(:image, :store => :yes, :compression => :brotli, :index => :no,
2895
+ * :term_vector => :no)
2812
2896
  */
2813
2897
  static void
2814
2898
  Init_FieldInfo(void)
@@ -2817,6 +2901,12 @@ Init_FieldInfo(void)
2817
2901
  sym_index = ID2SYM(rb_intern("index"));
2818
2902
  sym_term_vector = ID2SYM(rb_intern("term_vector"));
2819
2903
 
2904
+ sym_brotli = ID2SYM(rb_intern("brotli"));
2905
+ sym_bz2 = ID2SYM(rb_intern("bz2"));
2906
+ sym_lz4 = ID2SYM(rb_intern("lz4"));
2907
+ // sym_level = ID2SYM(rb_intern("level"));
2908
+ sym_compression = ID2SYM(rb_intern("compression"));
2909
+
2820
2910
  sym_untokenized = ID2SYM(rb_intern("untokenized"));
2821
2911
  sym_omit_norms = ID2SYM(rb_intern("omit_norms"));
2822
2912
  sym_untokenized_omit_norms = ID2SYM(rb_intern("untokenized_omit_norms"));
@@ -2826,11 +2916,12 @@ Init_FieldInfo(void)
2826
2916
  sym_with_positions_offsets = ID2SYM(rb_intern("with_positions_offsets"));
2827
2917
 
2828
2918
  cFieldInfo = rb_define_class_under(mIndex, "FieldInfo", rb_cObject);
2829
- rb_define_alloc_func(cFieldInfo, frb_data_alloc);
2919
+ rb_define_alloc_func(cFieldInfo, frb_fi_alloc);
2830
2920
 
2831
2921
  rb_define_method(cFieldInfo, "initialize", frb_fi_init, -1);
2832
2922
  rb_define_method(cFieldInfo, "name", frb_fi_name, 0);
2833
2923
  rb_define_method(cFieldInfo, "stored?", frb_fi_is_stored, 0);
2924
+ rb_define_method(cFieldInfo, "compressed?", frb_fi_is_compressed, 0);
2834
2925
  rb_define_method(cFieldInfo, "indexed?", frb_fi_is_indexed, 0);
2835
2926
  rb_define_method(cFieldInfo, "tokenized?", frb_fi_is_tokenized, 0);
2836
2927
  rb_define_method(cFieldInfo, "omit_norms?", frb_fi_omit_norms, 0);
@@ -2869,6 +2960,9 @@ Init_FieldInfo(void)
2869
2960
  * field_infos.add_field(:created_on, :index => :untokenized_omit_norms,
2870
2961
  * :term_vector => :no)
2871
2962
  *
2963
+ * field_infos.add_field(:image, :store => :yes, :compression => :brotli, :index => :no,
2964
+ * :term_vector => :no)
2965
+ *
2872
2966
  * field_infos.create_index("/path/to/index")
2873
2967
  *
2874
2968
  * == Default Properties
@@ -2882,13 +2976,11 @@ Init_FieldInfo(void)
2882
2976
  * along. If you add a document to the index which has fields that the index
2883
2977
  * doesn't know about then the default properties are used for the new field.
2884
2978
  */
2885
- static void
2886
- Init_FieldInfos(void)
2887
- {
2979
+ static void Init_FieldInfos(void) {
2888
2980
  Init_FieldInfo();
2889
2981
 
2890
2982
  cFieldInfos = rb_define_class_under(mIndex, "FieldInfos", rb_cObject);
2891
- rb_define_alloc_func(cFieldInfos, frb_data_alloc);
2983
+ rb_define_alloc_func(cFieldInfos, frb_fis_alloc);
2892
2984
 
2893
2985
  rb_define_method(cFieldInfos, "initialize", frb_fis_init, -1);
2894
2986
  rb_define_method(cFieldInfos, "to_a", frb_fis_to_a, 0);
@@ -2927,12 +3019,11 @@ Init_FieldInfos(void)
2927
3019
  * end
2928
3020
  */
2929
3021
  static void
2930
- Init_TermEnum(void)
2931
- {
3022
+ Init_TermEnum(void) {
2932
3023
  id_term = rb_intern("@term");
2933
3024
 
2934
3025
  cTermEnum = rb_define_class_under(mIndex, "TermEnum", rb_cObject);
2935
- rb_define_alloc_func(cTermEnum, frb_data_alloc);
3026
+ rb_define_alloc_func(cTermEnum, frb_te_alloc);
2936
3027
 
2937
3028
  rb_define_method(cTermEnum, "next?", frb_te_next, 0);
2938
3029
  rb_define_method(cTermEnum, "term", frb_te_term, 0);
@@ -2976,14 +3067,12 @@ Init_TermEnum(void)
2976
3067
  * puts " #{positions.join(', ')}"
2977
3068
  * end
2978
3069
  */
2979
- static void
2980
- Init_TermDocEnum(void)
2981
- {
3070
+ static void Init_TermDocEnum(void) {
2982
3071
  id_fld_num_map = rb_intern("@field_num_map");
2983
3072
  id_field_num = rb_intern("@field_num");
2984
3073
 
2985
3074
  cTermDocEnum = rb_define_class_under(mIndex, "TermDocEnum", rb_cObject);
2986
- rb_define_alloc_func(cTermDocEnum, frb_data_alloc);
3075
+ rb_define_alloc_func(cTermDocEnum, frb_tde_alloc);
2987
3076
  rb_define_method(cTermDocEnum, "seek", frb_tde_seek, 2);
2988
3077
  rb_define_method(cTermDocEnum, "seek_term_enum", frb_tde_seek_te, 1);
2989
3078
  rb_define_method(cTermDocEnum, "doc", frb_tde_doc, 0);
@@ -3016,9 +3105,7 @@ cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
3016
3105
  *
3017
3106
  * See the Analysis module for more information on setting the offsets.
3018
3107
  */
3019
- static void
3020
- Init_TVOffsets(void)
3021
- {
3108
+ static void Init_TVOffsets(void) {
3022
3109
  const char *tv_offsets_class = "TVOffsets";
3023
3110
  /* rdochack
3024
3111
  cTVOffsets = rb_define_class_under(cTermVector, "TVOffsets", rb_cObject);
@@ -3214,112 +3301,80 @@ Init_TermVector(void)
3214
3301
  *
3215
3302
  * index_writer.delete(:id, "/path/to/indexed/file")
3216
3303
  */
3217
- void
3218
- Init_IndexWriter(void)
3219
- {
3304
+ void Init_IndexWriter(void) {
3220
3305
  id_boost = rb_intern("boost");
3221
3306
 
3222
- sym_create = ID2SYM(rb_intern("create"));
3223
- sym_create_if_missing = ID2SYM(rb_intern("create_if_missing"));
3224
- sym_field_infos = ID2SYM(rb_intern("field_infos"));
3307
+ sym_create = ID2SYM(rb_intern("create"));
3308
+ sym_create_if_missing = ID2SYM(rb_intern("create_if_missing"));
3309
+ sym_field_infos = ID2SYM(rb_intern("field_infos"));
3225
3310
 
3226
- sym_chunk_size = ID2SYM(rb_intern("chunk_size"));
3227
- sym_max_buffer_memory = ID2SYM(rb_intern("max_buffer_memory"));
3228
- sym_index_interval = ID2SYM(rb_intern("term_index_interval"));
3229
- sym_skip_interval = ID2SYM(rb_intern("doc_skip_interval"));
3230
- sym_merge_factor = ID2SYM(rb_intern("merge_factor"));
3231
- sym_max_buffered_docs = ID2SYM(rb_intern("max_buffered_docs"));
3232
- sym_max_merge_docs = ID2SYM(rb_intern("max_merge_docs"));
3233
- sym_max_field_length = ID2SYM(rb_intern("max_field_length"));
3234
- sym_use_compound_file = ID2SYM(rb_intern("use_compound_file"));
3311
+ sym_chunk_size = ID2SYM(rb_intern("chunk_size"));
3312
+ sym_max_buffer_memory = ID2SYM(rb_intern("max_buffer_memory"));
3313
+ sym_index_interval = ID2SYM(rb_intern("term_index_interval"));
3314
+ sym_skip_interval = ID2SYM(rb_intern("doc_skip_interval"));
3315
+ sym_merge_factor = ID2SYM(rb_intern("merge_factor"));
3316
+ sym_max_buffered_docs = ID2SYM(rb_intern("max_buffered_docs"));
3317
+ sym_max_merge_docs = ID2SYM(rb_intern("max_merge_docs"));
3318
+ sym_max_field_length = ID2SYM(rb_intern("max_field_length"));
3319
+ sym_use_compound_file = ID2SYM(rb_intern("use_compound_file"));
3235
3320
 
3236
3321
  cIndexWriter = rb_define_class_under(mIndex, "IndexWriter", rb_cObject);
3237
- rb_define_alloc_func(cIndexWriter, frb_data_alloc);
3322
+ rb_define_alloc_func(cIndexWriter, frb_iw_alloc);
3238
3323
 
3239
3324
  rb_define_const(cIndexWriter, "WRITE_LOCK_TIMEOUT", INT2FIX(1));
3240
3325
  rb_define_const(cIndexWriter, "COMMIT_LOCK_TIMEOUT", INT2FIX(10));
3241
- rb_define_const(cIndexWriter, "WRITE_LOCK_NAME",
3242
- rb_str_new2(FRT_WRITE_LOCK_NAME));
3243
- rb_define_const(cIndexWriter, "COMMIT_LOCK_NAME",
3244
- rb_str_new2(FRT_COMMIT_LOCK_NAME));
3245
- rb_define_const(cIndexWriter, "DEFAULT_CHUNK_SIZE",
3246
- INT2FIX(frt_default_config.chunk_size));
3247
- rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFER_MEMORY",
3248
- INT2FIX(frt_default_config.max_buffer_memory));
3249
- rb_define_const(cIndexWriter, "DEFAULT_TERM_INDEX_INTERVAL",
3250
- INT2FIX(frt_default_config.index_interval));
3251
- rb_define_const(cIndexWriter, "DEFAULT_DOC_SKIP_INTERVAL",
3252
- INT2FIX(frt_default_config.skip_interval));
3253
- rb_define_const(cIndexWriter, "DEFAULT_MERGE_FACTOR",
3254
- INT2FIX(frt_default_config.merge_factor));
3255
- rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFERED_DOCS",
3256
- INT2FIX(frt_default_config.max_buffered_docs));
3257
- rb_define_const(cIndexWriter, "DEFAULT_MAX_MERGE_DOCS",
3258
- INT2FIX(frt_default_config.max_merge_docs));
3259
- rb_define_const(cIndexWriter, "DEFAULT_MAX_FIELD_LENGTH",
3260
- INT2FIX(frt_default_config.max_field_length));
3261
- rb_define_const(cIndexWriter, "DEFAULT_USE_COMPOUND_FILE",
3262
- frt_default_config.use_compound_file ? Qtrue : Qfalse);
3263
-
3264
- rb_define_method(cIndexWriter, "initialize", frb_iw_init, -1);
3265
- rb_define_method(cIndexWriter, "doc_count", frb_iw_get_doc_count, 0);
3266
- rb_define_method(cIndexWriter, "close", frb_iw_close, 0);
3267
- rb_define_method(cIndexWriter, "add_document", frb_iw_add_doc, 1);
3268
- rb_define_method(cIndexWriter, "<<", frb_iw_add_doc, 1);
3269
- rb_define_method(cIndexWriter, "optimize", frb_iw_optimize, 0);
3270
- rb_define_method(cIndexWriter, "commit", frb_iw_commit, 0);
3271
- rb_define_method(cIndexWriter, "add_readers", frb_iw_add_readers, 1);
3272
- rb_define_method(cIndexWriter, "delete", frb_iw_delete, 2);
3273
- rb_define_method(cIndexWriter, "field_infos", frb_iw_field_infos, 0);
3274
- rb_define_method(cIndexWriter, "analyzer", frb_iw_get_analyzer, 0);
3275
- rb_define_method(cIndexWriter, "analyzer=", frb_iw_set_analyzer, 1);
3276
- rb_define_method(cIndexWriter, "version", frb_iw_version, 0);
3277
-
3278
- rb_define_method(cIndexWriter, "chunk_size",
3279
- frb_iw_get_chunk_size, 0);
3280
- rb_define_method(cIndexWriter, "chunk_size=",
3281
- frb_iw_set_chunk_size, 1);
3282
-
3283
- rb_define_method(cIndexWriter, "max_buffer_memory",
3284
- frb_iw_get_max_buffer_memory, 0);
3285
- rb_define_method(cIndexWriter, "max_buffer_memory=",
3286
- frb_iw_set_max_buffer_memory, 1);
3287
-
3288
- rb_define_method(cIndexWriter, "term_index_interval",
3289
- frb_iw_get_index_interval, 0);
3290
- rb_define_method(cIndexWriter, "term_index_interval=",
3291
- frb_iw_set_index_interval, 1);
3292
-
3293
- rb_define_method(cIndexWriter, "doc_skip_interval",
3294
- frb_iw_get_skip_interval, 0);
3295
- rb_define_method(cIndexWriter, "doc_skip_interval=",
3296
- frb_iw_set_skip_interval, 1);
3297
-
3298
- rb_define_method(cIndexWriter, "merge_factor",
3299
- frb_iw_get_merge_factor, 0);
3300
- rb_define_method(cIndexWriter, "merge_factor=",
3301
- frb_iw_set_merge_factor, 1);
3302
-
3303
- rb_define_method(cIndexWriter, "max_buffered_docs",
3304
- frb_iw_get_max_buffered_docs, 0);
3305
- rb_define_method(cIndexWriter, "max_buffered_docs=",
3306
- frb_iw_set_max_buffered_docs, 1);
3307
-
3308
- rb_define_method(cIndexWriter, "max_merge_docs",
3309
- frb_iw_get_max_merge_docs, 0);
3310
- rb_define_method(cIndexWriter, "max_merge_docs=",
3311
- frb_iw_set_max_merge_docs, 1);
3312
-
3313
- rb_define_method(cIndexWriter, "max_field_length",
3314
- frb_iw_get_max_field_length, 0);
3315
- rb_define_method(cIndexWriter, "max_field_length=",
3316
- frb_iw_set_max_field_length, 1);
3317
-
3318
- rb_define_method(cIndexWriter, "use_compound_file",
3319
- frb_iw_get_use_compound_file, 0);
3320
- rb_define_method(cIndexWriter, "use_compound_file=",
3321
- frb_iw_set_use_compound_file, 1);
3326
+ rb_define_const(cIndexWriter, "WRITE_LOCK_NAME", rb_str_new2(FRT_WRITE_LOCK_NAME));
3327
+ rb_define_const(cIndexWriter, "COMMIT_LOCK_NAME", rb_str_new2(FRT_COMMIT_LOCK_NAME));
3328
+ rb_define_const(cIndexWriter, "DEFAULT_CHUNK_SIZE", INT2FIX(frt_default_config.chunk_size));
3329
+ rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFER_MEMORY", INT2FIX(frt_default_config.max_buffer_memory));
3330
+ rb_define_const(cIndexWriter, "DEFAULT_TERM_INDEX_INTERVAL", INT2FIX(frt_default_config.index_interval));
3331
+ rb_define_const(cIndexWriter, "DEFAULT_DOC_SKIP_INTERVAL", INT2FIX(frt_default_config.skip_interval));
3332
+ rb_define_const(cIndexWriter, "DEFAULT_MERGE_FACTOR", INT2FIX(frt_default_config.merge_factor));
3333
+ rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFERED_DOCS", INT2FIX(frt_default_config.max_buffered_docs));
3334
+ rb_define_const(cIndexWriter, "DEFAULT_MAX_MERGE_DOCS", INT2FIX(frt_default_config.max_merge_docs));
3335
+ rb_define_const(cIndexWriter, "DEFAULT_MAX_FIELD_LENGTH", INT2FIX(frt_default_config.max_field_length));
3336
+ rb_define_const(cIndexWriter, "DEFAULT_USE_COMPOUND_FILE", frt_default_config.use_compound_file ? Qtrue : Qfalse);
3337
+
3338
+ rb_define_method(cIndexWriter, "initialize", frb_iw_init, -1);
3339
+ rb_define_method(cIndexWriter, "doc_count", frb_iw_get_doc_count, 0);
3340
+ rb_define_method(cIndexWriter, "close", frb_iw_close, 0);
3341
+ rb_define_method(cIndexWriter, "add_document", frb_iw_add_doc, 1);
3342
+ rb_define_method(cIndexWriter, "<<", frb_iw_add_doc, 1);
3343
+ rb_define_method(cIndexWriter, "optimize", frb_iw_optimize, 0);
3344
+ rb_define_method(cIndexWriter, "commit", frb_iw_commit, 0);
3345
+ rb_define_method(cIndexWriter, "add_readers", frb_iw_add_readers, 1);
3346
+ rb_define_method(cIndexWriter, "delete", frb_iw_delete, 2);
3347
+ rb_define_method(cIndexWriter, "field_infos", frb_iw_field_infos, 0);
3348
+ rb_define_method(cIndexWriter, "analyzer", frb_iw_get_analyzer, 0);
3349
+ rb_define_method(cIndexWriter, "analyzer=", frb_iw_set_analyzer, 1);
3350
+ rb_define_method(cIndexWriter, "version", frb_iw_version, 0);
3351
+
3352
+ rb_define_method(cIndexWriter, "chunk_size", frb_iw_get_chunk_size, 0);
3353
+ rb_define_method(cIndexWriter, "chunk_size=", frb_iw_set_chunk_size, 1);
3354
+
3355
+ rb_define_method(cIndexWriter, "max_buffer_memory", frb_iw_get_max_buffer_memory, 0);
3356
+ rb_define_method(cIndexWriter, "max_buffer_memory=", frb_iw_set_max_buffer_memory, 1);
3357
+
3358
+ rb_define_method(cIndexWriter, "term_index_interval", frb_iw_get_index_interval, 0);
3359
+ rb_define_method(cIndexWriter, "term_index_interval=", frb_iw_set_index_interval, 1);
3360
+
3361
+ rb_define_method(cIndexWriter, "doc_skip_interval", frb_iw_get_skip_interval, 0);
3362
+ rb_define_method(cIndexWriter, "doc_skip_interval=", frb_iw_set_skip_interval, 1);
3322
3363
 
3364
+ rb_define_method(cIndexWriter, "merge_factor", frb_iw_get_merge_factor, 0);
3365
+ rb_define_method(cIndexWriter, "merge_factor=", frb_iw_set_merge_factor, 1);
3366
+
3367
+ rb_define_method(cIndexWriter, "max_buffered_docs", frb_iw_get_max_buffered_docs, 0);
3368
+ rb_define_method(cIndexWriter, "max_buffered_docs=", frb_iw_set_max_buffered_docs, 1);
3369
+
3370
+ rb_define_method(cIndexWriter, "max_merge_docs", frb_iw_get_max_merge_docs, 0);
3371
+ rb_define_method(cIndexWriter, "max_merge_docs=", frb_iw_set_max_merge_docs, 1);
3372
+
3373
+ rb_define_method(cIndexWriter, "max_field_length", frb_iw_get_max_field_length, 0);
3374
+ rb_define_method(cIndexWriter, "max_field_length=", frb_iw_set_max_field_length, 1);
3375
+
3376
+ rb_define_method(cIndexWriter, "use_compound_file", frb_iw_get_use_compound_file, 0);
3377
+ rb_define_method(cIndexWriter, "use_compound_file=", frb_iw_set_use_compound_file, 1);
3323
3378
  }
3324
3379
 
3325
3380
  /*
@@ -3352,18 +3407,16 @@ Init_IndexWriter(void)
3352
3407
  * doc.values #=> ["the title", "the content"]
3353
3408
  * doc.fields #=> [:title, :content]
3354
3409
  */
3355
- void
3356
- Init_LazyDoc(void)
3357
- {
3410
+ void Init_LazyDoc(void) {
3358
3411
  id_fields = rb_intern("@fields");
3359
3412
 
3360
3413
  cLazyDoc = rb_define_class_under(mIndex, "LazyDoc", rb_cHash);
3361
- rb_define_method(cLazyDoc, "default", frb_lzd_default, 1);
3362
- rb_define_method(cLazyDoc, "load", frb_lzd_load, 0);
3363
- rb_define_method(cLazyDoc, "fields", frb_lzd_fields, 0);
3414
+ rb_define_method(cLazyDoc, "default", frb_lzd_default, 1);
3415
+ rb_define_method(cLazyDoc, "load", frb_lzd_load, 0);
3416
+ rb_define_method(cLazyDoc, "fields", frb_lzd_fields, 0);
3364
3417
 
3365
3418
  cLazyDocData = rb_define_class_under(cLazyDoc, "LazyDocData", rb_cObject);
3366
- rb_define_alloc_func(cLazyDocData, frb_data_alloc);
3419
+ rb_define_alloc_func(cLazyDocData, frb_lzd_alloc);
3367
3420
  }
3368
3421
 
3369
3422
  /*
@@ -3376,41 +3429,39 @@ Init_LazyDoc(void)
3376
3429
  * index, accessing term-vectors or deleting documents by document id. It is
3377
3430
  * also used internally by IndexSearcher.
3378
3431
  */
3379
- void
3380
- Init_IndexReader(void)
3381
- {
3432
+ void Init_IndexReader(void) {
3382
3433
  cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
3383
- rb_define_alloc_func(cIndexReader, frb_data_alloc);
3384
- rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
3385
- rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
3386
- rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
3387
- rb_define_method(cIndexReader, "get_norms_into",frb_ir_get_norms_into, 3);
3388
- rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
3389
- rb_define_method(cIndexReader, "close", frb_ir_close, 0);
3390
- rb_define_method(cIndexReader, "has_deletions?",frb_ir_has_deletions, 0);
3391
- rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
3392
- rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
3393
- rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
3394
- rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
3395
- rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
3396
- rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
3397
- rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
3398
- rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
3399
- rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
3400
- rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
3401
- rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
3402
- rb_define_method(cIndexReader, "term_positions",frb_ir_term_positions, 0);
3403
- rb_define_method(cIndexReader, "term_docs_for", frb_ir_term_docs_for, 2);
3434
+ rb_define_alloc_func(cIndexReader, frb_ir_alloc);
3435
+ rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
3436
+ rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
3437
+ rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
3438
+ rb_define_method(cIndexReader, "get_norms_into", frb_ir_get_norms_into, 3);
3439
+ rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
3440
+ rb_define_method(cIndexReader, "close", frb_ir_close, 0);
3441
+ rb_define_method(cIndexReader, "has_deletions?", frb_ir_has_deletions, 0);
3442
+ rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
3443
+ rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
3444
+ rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
3445
+ rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
3446
+ rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
3447
+ rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
3448
+ rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
3449
+ rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
3450
+ rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
3451
+ rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
3452
+ rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
3453
+ rb_define_method(cIndexReader, "term_positions", frb_ir_term_positions, 0);
3454
+ rb_define_method(cIndexReader, "term_docs_for", frb_ir_term_docs_for, 2);
3404
3455
  rb_define_method(cIndexReader, "term_positions_for", frb_ir_t_pos_for, 2);
3405
- rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
3406
- rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
3407
- rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
3408
- rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
3409
- rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
3410
- rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
3411
- rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
3456
+ rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
3457
+ rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
3458
+ rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
3459
+ rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
3460
+ rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
3461
+ rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
3462
+ rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
3412
3463
  rb_define_method(cIndexReader, "tokenized_fields", frb_ir_tk_fields, 0);
3413
- rb_define_method(cIndexReader, "version", frb_ir_version, 0);
3464
+ rb_define_method(cIndexReader, "version", frb_ir_version, 0);
3414
3465
  }
3415
3466
 
3416
3467
  /* rdoc hack
@@ -3435,9 +3486,7 @@ extern VALUE mFerret = rb_define_module("Ferret");
3435
3486
  * building tag clouds, creating more-like-this queries, custom highlighting
3436
3487
  * etc. They are also useful for index browsers.
3437
3488
  */
3438
- void
3439
- Init_Index(void)
3440
- {
3489
+ void Init_Index(void) {
3441
3490
  mIndex = rb_define_module_under(mFerret, "Index");
3442
3491
 
3443
3492
  sym_boost = ID2SYM(rb_intern("boost"));