isomorfeus-ferret 0.12.5 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (247) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +54 -4
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  91. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  92. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  93. data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
  94. data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
  95. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  96. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  97. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  98. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  99. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  100. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  101. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  102. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  103. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  104. data/ext/isomorfeus_ferret_ext/frb_index.c +513 -464
  105. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  106. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  107. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  108. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  109. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  110. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  111. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  112. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  113. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  114. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  115. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  116. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -9
  117. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  118. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  119. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  120. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  121. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -0
  122. data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
  123. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
  124. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  125. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  126. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  127. data/ext/isomorfeus_ferret_ext/frt_index.c +714 -384
  128. data/ext/isomorfeus_ferret_ext/frt_index.h +274 -290
  129. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  130. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  131. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
  132. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  133. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  134. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  135. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  136. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  137. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  138. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +26 -25
  139. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  140. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  141. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  142. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  143. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  144. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +46 -84
  145. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  146. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  147. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  148. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  149. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  150. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  151. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  152. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  153. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
  154. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  155. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  156. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  157. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  158. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  159. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  160. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  161. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  162. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  163. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  164. data/ext/isomorfeus_ferret_ext/test.c +1 -2
  165. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  166. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  167. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  168. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  169. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  170. data/ext/isomorfeus_ferret_ext/test_fields.c +111 -100
  171. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  172. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  173. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  174. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  175. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  176. data/ext/isomorfeus_ferret_ext/test_index.c +373 -363
  177. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  178. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  179. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  180. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  181. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  182. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  183. data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
  184. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  185. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  186. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  187. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  188. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  189. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  190. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  191. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  192. data/lib/isomorfeus/ferret/version.rb +1 -1
  193. metadata +113 -58
  194. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  195. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  196. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  197. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  198. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  199. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  200. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  201. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  202. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  203. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  204. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  205. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  206. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  207. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  208. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  209. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  210. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  211. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  212. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  213. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  214. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  215. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  216. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  217. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  218. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  219. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  220. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  221. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  222. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  223. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  224. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  225. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  226. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  227. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  228. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  229. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  230. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  231. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  232. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  233. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  234. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  235. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  236. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  237. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  238. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  239. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  240. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  241. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  242. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  243. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  244. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  245. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  246. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  247. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -13,6 +13,7 @@
13
13
  #include "frt_priorityqueue.h"
14
14
 
15
15
  typedef struct FrtIndexReader FrtIndexReader;
16
+ typedef struct FrtSegmentReader FrtSegmentReader;
16
17
  typedef struct FrtMultiReader FrtMultiReader;
17
18
  typedef struct FrtDeleter FrtDeleter;
18
19
 
@@ -22,8 +23,7 @@ typedef struct FrtDeleter FrtDeleter;
22
23
  *
23
24
  ****************************************************************************/
24
25
 
25
- typedef struct FrtConfig
26
- {
26
+ typedef struct FrtConfig {
27
27
  int chunk_size;
28
28
  int max_buffer_memory;
29
29
  int index_interval;
@@ -52,8 +52,7 @@ typedef struct FrtCacheObject {
52
52
  void (*destroy)(void *p);
53
53
  } FrtCacheObject;
54
54
 
55
- extern FrtCacheObject *frt_co_create(FrtHash *ref_tab1,
56
- FrtHash *ref_tab2,
55
+ extern FrtCacheObject *frt_co_create(FrtHash *ref_tab1, FrtHash *ref_tab2,
57
56
  void *ref1, void *ref2, frt_free_ft destroy, void *obj);
58
57
  extern FrtHash *frt_co_hash_create();
59
58
 
@@ -63,14 +62,12 @@ extern FrtHash *frt_co_hash_create();
63
62
  *
64
63
  ****************************************************************************/
65
64
 
66
- typedef enum
67
- {
65
+ typedef enum {
68
66
  FRT_STORE_NO = 0,
69
- FRT_STORE_YES = 1
67
+ FRT_STORE_YES = 1,
70
68
  } FrtStoreValue;
71
69
 
72
- typedef enum
73
- {
70
+ typedef enum {
74
71
  FRT_INDEX_NO = 0,
75
72
  FRT_INDEX_UNTOKENIZED = 1,
76
73
  FRT_INDEX_YES = 3,
@@ -78,8 +75,7 @@ typedef enum
78
75
  FRT_INDEX_YES_OMIT_NORMS = 7
79
76
  } FrtIndexValue;
80
77
 
81
- typedef enum
82
- {
78
+ typedef enum {
83
79
  FRT_TERM_VECTOR_NO = 0,
84
80
  FRT_TERM_VECTOR_YES = 1,
85
81
  FRT_TERM_VECTOR_WITH_POSITIONS = 3,
@@ -87,37 +83,44 @@ typedef enum
87
83
  FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS = 7
88
84
  } FrtTermVectorValue;
89
85
 
90
- #define FRT_FI_IS_STORED_BM 0x001
91
- #define FRT_FI_IS_INDEXED_BM 0x004
92
- #define FRT_FI_IS_TOKENIZED_BM 0x008
93
- #define FRT_FI_OMIT_NORMS_BM 0x010
94
- #define FRT_FI_STORE_TERM_VECTOR_BM 0x020
95
- #define FRT_FI_STORE_POSITIONS_BM 0x040
96
- #define FRT_FI_STORE_OFFSETS_BM 0x080
97
-
98
- typedef struct FrtFieldInfo
99
- {
100
- FrtSymbol name;
101
- float boost;
86
+ #define FRT_FI_IS_STORED_BM 0x001
87
+ #define FRT_FI_IS_COMPRESSED_BM 0x002
88
+ #define FRT_FI_IS_INDEXED_BM 0x004
89
+ #define FRT_FI_IS_TOKENIZED_BM 0x008
90
+ #define FRT_FI_OMIT_NORMS_BM 0x010
91
+ #define FRT_FI_STORE_TERM_VECTOR_BM 0x020
92
+ #define FRT_FI_STORE_POSITIONS_BM 0x040
93
+ #define FRT_FI_STORE_OFFSETS_BM 0x080
94
+ #define FRT_FI_COMPRESSION_BROTLI_BM 0x100
95
+ #define FRT_FI_COMPRESSION_BZ2_BM 0x200
96
+ #define FRT_FI_COMPRESSION_LZ4_BM 0x400
97
+
98
+ typedef struct FrtFieldInfo {
99
+ ID name;
100
+ float boost;
102
101
  unsigned int bits;
103
- int number;
104
- int ref_cnt;
102
+ int number;
103
+ int ref_cnt;
104
+ VALUE rfi;
105
105
  } FrtFieldInfo;
106
106
 
107
- extern FrtFieldInfo *frt_fi_new(FrtSymbol name,
108
- FrtStoreValue store,
109
- FrtIndexValue index,
110
- FrtTermVectorValue term_vector);
107
+ extern FrtFieldInfo *frt_fi_alloc();
108
+ extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
109
+ extern FrtFieldInfo *frt_fi_new(ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
111
110
  extern char *frt_fi_to_s(FrtFieldInfo *fi);
112
111
  extern void frt_fi_deref(FrtFieldInfo *fi);
113
112
 
114
- #define fi_is_stored(fi) (((fi)->bits & FRT_FI_IS_STORED_BM) != 0)
115
- #define fi_is_indexed(fi) (((fi)->bits & FRT_FI_IS_INDEXED_BM) != 0)
116
- #define fi_is_tokenized(fi) (((fi)->bits & FRT_FI_IS_TOKENIZED_BM) != 0)
117
- #define fi_omit_norms(fi) (((fi)->bits & FRT_FI_OMIT_NORMS_BM) != 0)
118
- #define fi_store_term_vector(fi) (((fi)->bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
119
- #define fi_store_positions(fi) (((fi)->bits & FRT_FI_STORE_POSITIONS_BM) != 0)
120
- #define fi_store_offsets(fi) (((fi)->bits & FRT_FI_STORE_OFFSETS_BM) != 0)
113
+ #define fi_is_stored(fi) (((fi)->bits & FRT_FI_IS_STORED_BM) != 0)
114
+ #define fi_is_compressed(fi) (((fi)->bits & FRT_FI_IS_COMPRESSED_BM) != 0)
115
+ #define fi_is_compressed_brotli(fi) (((fi)->bits & FRT_FI_COMPRESSION_BROTLI_BM) != 0)
116
+ #define fi_is_compressed_bz2(fi) (((fi)->bits & FRT_FI_COMPRESSION_BZ2_BM) != 0)
117
+ #define fi_is_compressed_lz4(fi) (((fi)->bits & FRT_FI_COMPRESSION_LZ4_BM) != 0)
118
+ #define fi_is_indexed(fi) (((fi)->bits & FRT_FI_IS_INDEXED_BM) != 0)
119
+ #define fi_is_tokenized(fi) (((fi)->bits & FRT_FI_IS_TOKENIZED_BM) != 0)
120
+ #define fi_omit_norms(fi) (((fi)->bits & FRT_FI_OMIT_NORMS_BM) != 0)
121
+ #define fi_store_term_vector(fi) (((fi)->bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
122
+ #define fi_store_positions(fi) (((fi)->bits & FRT_FI_STORE_POSITIONS_BM) != 0)
123
+ #define fi_store_offsets(fi) (((fi)->bits & FRT_FI_STORE_OFFSETS_BM) != 0)
121
124
  #define fi_has_norms(fi)\
122
125
  (((fi)->bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
123
126
 
@@ -129,25 +132,26 @@ extern void frt_fi_deref(FrtFieldInfo *fi);
129
132
 
130
133
  #define FIELD_INFOS_INIT_CAPA 4
131
134
  /* carry changes over to dummy_fis in test/test_segments.c */
132
- typedef struct FrtFieldInfos
133
- {
134
- FrtStoreValue store;
135
- FrtIndexValue index;
135
+ typedef struct FrtFieldInfos {
136
+ FrtStoreValue store;
137
+ FrtCompressionType compression;
138
+ FrtIndexValue index;
136
139
  FrtTermVectorValue term_vector;
137
- int size;
138
- int capa;
139
- FrtFieldInfo **fields;
140
- FrtHash *field_dict;
141
- int ref_cnt;
140
+ int size;
141
+ int capa;
142
+ FrtFieldInfo **fields;
143
+ FrtHash *field_dict;
144
+ int ref_cnt;
145
+ VALUE rfis;
142
146
  } FrtFieldInfos;
143
147
 
144
- FrtFieldInfos *frt_fis_new(FrtStoreValue store, FrtIndexValue index,
145
- FrtTermVectorValue term_vector);
148
+ FrtFieldInfos *frt_fis_alloc();
149
+ FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
150
+ FrtFieldInfos *frt_fis_new(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
146
151
  extern FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi);
147
- extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, FrtSymbol name);
148
- extern int frt_fis_get_field_num(FrtFieldInfos *fis, FrtSymbol name);
149
- extern FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis,
150
- FrtSymbol name);
152
+ extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name);
153
+ extern int frt_fis_get_field_num(FrtFieldInfos *fis, ID name);
154
+ extern FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name);
151
155
  extern void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os);
152
156
  extern FrtFieldInfos *frt_fis_read(FrtInStream *is);
153
157
  extern char *frt_fis_to_s(FrtFieldInfos *fis);
@@ -162,8 +166,7 @@ extern void frt_fis_deref(FrtFieldInfos *fis);
162
166
  #define FRT_SEGMENT_NAME_MAX_LENGTH 100
163
167
  #define FRT_SEGMENTS_FILE_NAME "segments"
164
168
 
165
- typedef struct FrtSegmentInfo
166
- {
169
+ typedef struct FrtSegmentInfo {
167
170
  int ref_cnt;
168
171
  char *name;
169
172
  FrtStore *store;
@@ -186,8 +189,7 @@ extern void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num);
186
189
  *
187
190
  ****************************************************************************/
188
191
 
189
- typedef struct FrtSegmentInfos
190
- {
192
+ typedef struct FrtSegmentInfos {
191
193
  FrtFieldInfos *fis;
192
194
  frt_u64 counter;
193
195
  frt_u64 version;
@@ -199,10 +201,7 @@ typedef struct FrtSegmentInfos
199
201
  int capa;
200
202
  } FrtSegmentInfos;
201
203
 
202
- extern char *frt_fn_for_generation(char *buf,
203
- const char *base,
204
- const char *ext,
205
- frt_i64 gen);
204
+ extern char *frt_fn_for_generation(char *buf, const char *base, const char *ext, frt_i64 gen);
206
205
 
207
206
  extern FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis);
208
207
  extern FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int dcnt, FrtStore *store);
@@ -223,9 +222,8 @@ extern void frt_sis_put(FrtSegmentInfos *sis, FILE *stream);
223
222
  *
224
223
  ****************************************************************************/
225
224
 
226
- typedef struct FrtTermInfo
227
- {
228
- int doc_freq;
225
+ typedef struct FrtTermInfo {
226
+ int doc_freq;
229
227
  off_t frq_ptr;
230
228
  off_t prx_ptr;
231
229
  off_t skip_offset;
@@ -239,24 +237,21 @@ typedef struct FrtTermInfo
239
237
  } while (0)
240
238
 
241
239
  /****************************************************************************
242
- *
243
240
  * FrtTermEnum
244
- *
245
241
  ****************************************************************************/
246
242
 
247
243
  typedef struct FrtTermEnum FrtTermEnum;
248
244
 
249
- struct FrtTermEnum
250
- {
245
+ struct FrtTermEnum {
251
246
  char curr_term[FRT_MAX_WORD_SIZE];
252
247
  char prev_term[FRT_MAX_WORD_SIZE];
253
- FrtTermInfo curr_ti;
248
+ FrtTermInfo curr_ti;
254
249
  int curr_term_len;
255
250
  int field_num;
256
251
  FrtTermEnum *(*set_field)(FrtTermEnum *te, int field_num);
257
- char *(*next)(FrtTermEnum *te);
258
- char *(*skip_to)(FrtTermEnum *te, const char *term);
259
- void (*close)(FrtTermEnum *te);
252
+ char *(*next)(FrtTermEnum *te);
253
+ char *(*skip_to)(FrtTermEnum *te, const char *term);
254
+ void (*close)(FrtTermEnum *te);
260
255
  FrtTermEnum *(*clone)(FrtTermEnum *te);
261
256
  };
262
257
 
@@ -264,59 +259,54 @@ char *frt_te_get_term(struct FrtTermEnum *te);
264
259
  FrtTermInfo *frt_te_get_ti(struct FrtTermEnum *te);
265
260
 
266
261
  /****************************************************************************
267
- *
268
262
  * FrtSegmentTermEnum
269
- *
270
263
  ****************************************************************************/
271
264
 
272
- /* * FrtSegmentTermIndex * */
265
+ /* FrtSegmentTermIndex */
273
266
 
274
- typedef struct FrtSegmentTermIndex
275
- {
267
+ typedef struct FrtSegmentTermIndex {
276
268
  off_t index_ptr;
277
269
  off_t ptr;
278
270
  int index_cnt;
279
271
  int size;
280
- char **index_terms;
281
- int *index_term_lens;
282
- FrtTermInfo *index_term_infos;
283
- off_t *index_ptrs;
272
+ char **index_terms;
273
+ int *index_term_lens;
274
+ FrtTermInfo *index_term_infos;
275
+ off_t *index_ptrs;
284
276
  } FrtSegmentTermIndex;
285
277
 
286
- /* * FrtSegmentFieldIndex * */
278
+ /* FrtSegmentFieldIndex */
287
279
 
288
- typedef struct FrtSegmentTermEnum FrtSegmentTermEnum;
289
-
290
- typedef struct FrtSegmentFieldIndex
291
- {
292
- frt_mutex_t mutex;
280
+ typedef struct FrtSegmentFieldIndex {
281
+ frt_mutex_t mutex;
293
282
  int skip_interval;
294
283
  int index_interval;
295
284
  off_t index_ptr;
296
- FrtTermEnum *index_te;
297
- FrtHash *field_dict;
285
+ FrtTermEnum *index_te;
286
+ FrtHash *field_dict;
298
287
  } FrtSegmentFieldIndex;
299
288
 
300
- extern FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment);
301
- extern void frt_sfi_close(FrtSegmentFieldIndex *sfi);
289
+ /* FrtSegmentTermEnum */
302
290
 
291
+ typedef struct FrtSegmentTermEnum FrtSegmentTermEnum;
303
292
 
304
- /* * FrtSegmentTermEnum * */
305
- struct FrtSegmentTermEnum
306
- {
307
- FrtTermEnum te;
308
- FrtInStream *is;
293
+ struct FrtSegmentTermEnum {
294
+ FrtTermEnum te;
295
+ FrtInStream *is;
309
296
  int size;
310
297
  int pos;
311
298
  int skip_interval;
312
299
  FrtSegmentFieldIndex *sfi;
313
300
  };
314
301
 
302
+ extern FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment);
303
+ extern void frt_sfi_close(FrtSegmentFieldIndex *sfi);
304
+
315
305
  extern void frt_ste_close(FrtTermEnum *te);
316
306
  extern FrtTermEnum *frt_ste_clone(FrtTermEnum *te);
317
307
  extern FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi);
318
308
 
319
- /* * MultiTermEnum * */
309
+ /* MultiTermEnum */
320
310
 
321
311
  extern FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term);
322
312
 
@@ -326,17 +316,14 @@ extern FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *t
326
316
  *
327
317
  ****************************************************************************/
328
318
 
329
- typedef struct FrtTermInfosReader
330
- {
319
+ typedef struct FrtTermInfosReader {
331
320
  frt_thread_key_t thread_te;
332
- void **te_bucket;
333
- FrtTermEnum *orig_te;
334
- int field_num;
321
+ void **te_bucket;
322
+ FrtTermEnum *orig_te;
323
+ int field_num;
335
324
  } FrtTermInfosReader;
336
325
 
337
- extern FrtTermInfosReader *frt_tir_open(FrtStore *store,
338
- FrtSegmentFieldIndex *sfi,
339
- const char *segment);
326
+ extern FrtTermInfosReader *frt_tir_open(FrtStore *store, FrtSegmentFieldIndex *sfi, const char *segment);
340
327
  extern FrtTermInfosReader *frt_tir_set_field(FrtTermInfosReader *tir, int field_num);
341
328
  extern FrtTermInfo *frt_tir_get_ti(FrtTermInfosReader *tir, const char *term);
342
329
  extern char *frt_tir_get_term(FrtTermInfosReader *tir, int pos);
@@ -351,34 +338,26 @@ extern void frt_tir_close(FrtTermInfosReader *tir);
351
338
  #define FRT_INDEX_INTERVAL 128
352
339
  #define FRT_SKIP_INTERVAL 16
353
340
 
354
- typedef struct FrtTermWriter
355
- {
356
- int counter;
357
- const char *last_term;
358
- FrtTermInfo last_term_info;
341
+ typedef struct FrtTermWriter {
342
+ int counter;
343
+ const char *last_term;
344
+ FrtTermInfo last_term_info;
359
345
  FrtOutStream *os;
360
346
  } FrtTermWriter;
361
347
 
362
- typedef struct FrtTermInfosWriter
363
- {
364
- int field_count;
365
- int index_interval;
366
- int skip_interval;
367
- off_t last_index_ptr;
368
- FrtOutStream *tfx_out;
348
+ typedef struct FrtTermInfosWriter {
349
+ int field_count;
350
+ int index_interval;
351
+ int skip_interval;
352
+ off_t last_index_ptr;
353
+ FrtOutStream *tfx_out;
369
354
  FrtTermWriter *tix_writer;
370
355
  FrtTermWriter *tis_writer;
371
356
  } FrtTermInfosWriter;
372
357
 
373
- extern FrtTermInfosWriter *frt_tiw_open(FrtStore *store,
374
- const char *segment,
375
- int index_interval,
376
- int skip_interval);
358
+ extern FrtTermInfosWriter *frt_tiw_open(FrtStore *store, const char *segment, int index_interval, int skip_interval);
377
359
  extern void frt_tiw_start_field(FrtTermInfosWriter *tiw, int field_num);
378
- extern void frt_tiw_add(FrtTermInfosWriter *tiw,
379
- const char *term,
380
- int t_len,
381
- FrtTermInfo *ti);
360
+ extern void frt_tiw_add(FrtTermInfosWriter *tiw, const char *term, int t_len, FrtTermInfo *ti);
382
361
  extern void frt_tiw_close(FrtTermInfosWriter *tiw);
383
362
 
384
363
  /****************************************************************************
@@ -388,8 +367,7 @@ extern void frt_tiw_close(FrtTermInfosWriter *tiw);
388
367
  ****************************************************************************/
389
368
 
390
369
  typedef struct FrtTermDocEnum FrtTermDocEnum;
391
- struct FrtTermDocEnum
392
- {
370
+ struct FrtTermDocEnum {
393
371
  void (*seek)(FrtTermDocEnum *tde, int field_num, const char *term);
394
372
  void (*seek_te)(FrtTermDocEnum *tde, FrtTermEnum *te);
395
373
  void (*seek_ti)(FrtTermDocEnum *tde, FrtTermInfo *ti);
@@ -405,8 +383,7 @@ struct FrtTermDocEnum
405
383
  /* * FrtSegmentTermDocEnum * */
406
384
 
407
385
  typedef struct FrtSegmentTermDocEnum FrtSegmentTermDocEnum;
408
- struct FrtSegmentTermDocEnum
409
- {
386
+ struct FrtSegmentTermDocEnum {
410
387
  FrtTermDocEnum tde;
411
388
  void (*seek_prox)(FrtSegmentTermDocEnum *stde, off_t prx_ptr);
412
389
  void (*skip_prox)(FrtSegmentTermDocEnum *stde);
@@ -443,8 +420,7 @@ extern FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir, FrtInStream *frq_in
443
420
  * MultipleTermDocPosEnum
444
421
  ****************************************************************************/
445
422
 
446
- extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms,
447
- int t_cnt);
423
+ extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, int t_cnt);
448
424
 
449
425
  /****************************************************************************
450
426
  *
@@ -452,8 +428,7 @@ extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **t
452
428
  *
453
429
  ****************************************************************************/
454
430
 
455
- typedef struct FrtOffset
456
- {
431
+ typedef struct FrtOffset {
457
432
  off_t start;
458
433
  off_t end;
459
434
  } FrtOffset;
@@ -464,8 +439,7 @@ typedef struct FrtOffset
464
439
  *
465
440
  ****************************************************************************/
466
441
 
467
- typedef struct FrtOccurence
468
- {
442
+ typedef struct FrtOccurence {
469
443
  struct FrtOccurence *next;
470
444
  int pos;
471
445
  } FrtOccurence;
@@ -476,8 +450,7 @@ typedef struct FrtOccurence
476
450
  *
477
451
  ****************************************************************************/
478
452
 
479
- typedef struct FrtPosting
480
- {
453
+ typedef struct FrtPosting {
481
454
  int freq;
482
455
  int doc_num;
483
456
  FrtOccurence *first_occ;
@@ -492,17 +465,15 @@ extern FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos);
492
465
  *
493
466
  ****************************************************************************/
494
467
 
495
- typedef struct FrtPostingList
496
- {
497
- const char *term;
498
- int term_len;
499
- FrtPosting *first;
500
- FrtPosting *last;
468
+ typedef struct FrtPostingList {
469
+ const char *term;
470
+ int term_len;
471
+ FrtPosting *first;
472
+ FrtPosting *last;
501
473
  FrtOccurence *last_occ;
502
474
  } FrtPostingList;
503
475
 
504
- extern FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
505
- int term_len, FrtPosting *p);
476
+ extern FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term, int term_len, FrtPosting *p);
506
477
  extern void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos);
507
478
  extern int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2);
508
479
 
@@ -512,8 +483,7 @@ extern int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2);
512
483
  *
513
484
  ****************************************************************************/
514
485
 
515
- typedef struct FrtTVField
516
- {
486
+ typedef struct FrtTVField {
517
487
  int field_num;
518
488
  int size;
519
489
  } FrtTVField;
@@ -524,11 +494,10 @@ typedef struct FrtTVField
524
494
  *
525
495
  ****************************************************************************/
526
496
 
527
- typedef struct FrtTVTerm
528
- {
529
- char *text;
530
- int freq;
531
- int *positions;
497
+ typedef struct FrtTVTerm {
498
+ char *text;
499
+ int freq;
500
+ int *positions;
532
501
  } FrtTVTerm;
533
502
 
534
503
  /****************************************************************************
@@ -538,10 +507,9 @@ typedef struct FrtTVTerm
538
507
  ****************************************************************************/
539
508
 
540
509
  #define FRT_TV_FIELD_INIT_CAPA 8
541
- typedef struct FrtTermVector
542
- {
510
+ typedef struct FrtTermVector {
543
511
  int field_num;
544
- FrtSymbol field;
512
+ ID field;
545
513
  int term_cnt;
546
514
  FrtTVTerm *terms;
547
515
  int offset_cnt;
@@ -560,38 +528,38 @@ extern FrtTVTerm *frt_tv_get_tv_term(FrtTermVector *tv, const char *term);
560
528
  ****************************************************************************/
561
529
 
562
530
  /* * * FrtLazyDocField * * */
563
- typedef struct FrtLazyDocFieldData
564
- {
565
- off_t start;
566
- int length;
567
- char *text;
531
+ typedef struct FrtLazyDocFieldData {
532
+ off_t start;
533
+ int length;
534
+ rb_encoding *encoding;
535
+ FrtCompressionType compression; /* as stored */
536
+ char *text;
568
537
  } FrtLazyDocFieldData;
569
538
 
570
539
  typedef struct FrtLazyDoc FrtLazyDoc;
571
- typedef struct FrtLazyDocField
572
- {
573
- FrtSymbol name;
540
+ typedef struct FrtLazyDocField {
541
+ ID name;
574
542
  FrtLazyDocFieldData *data;
575
543
  FrtLazyDoc *doc;
576
544
  int size; /* number of data elements */
577
545
  int len; /* length of data elements concatenated */
546
+ FrtCompressionType compression; /* as configured */
547
+ bool decompressed;
578
548
  } FrtLazyDocField;
579
549
 
580
550
  extern char *frt_lazy_df_get_data(FrtLazyDocField *self, int i);
581
- extern void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf,
582
- int start, int len);
551
+ extern void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len);
583
552
 
584
553
  /* * * FrtLazyDoc * * */
585
- struct FrtLazyDoc
586
- {
587
- FrtHash *field_dictionary;
588
- int size;
554
+ struct FrtLazyDoc {
555
+ FrtHash *field_dictionary;
556
+ int size;
589
557
  FrtLazyDocField **fields;
590
- FrtInStream *fields_in;
558
+ FrtInStream *fields_in;
591
559
  };
592
560
 
593
561
  extern void frt_lazy_doc_close(FrtLazyDoc *self);
594
- extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, FrtSymbol field);
562
+ extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, ID field);
595
563
 
596
564
  /****************************************************************************
597
565
  *
@@ -599,8 +567,7 @@ extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, FrtSymbol field);
599
567
  *
600
568
  ****************************************************************************/
601
569
 
602
- typedef struct FrtFieldsReader
603
- {
570
+ typedef struct FrtFieldsReader {
604
571
  int size;
605
572
  FrtFieldInfos *fis;
606
573
  FrtStore *store;
@@ -608,15 +575,13 @@ typedef struct FrtFieldsReader
608
575
  FrtInStream *fdt_in;
609
576
  } FrtFieldsReader;
610
577
 
611
- extern FrtFieldsReader *frt_fr_open(FrtStore *store,
612
- const char *segment, FrtFieldInfos *fis);
578
+ extern FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos *fis);
613
579
  extern FrtFieldsReader *frt_fr_clone(FrtFieldsReader *orig);
614
580
  extern void frt_fr_close(FrtFieldsReader *fr);
615
581
  extern FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num);
616
582
  extern FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num);
617
583
  extern FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num);
618
- extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num,
619
- int field_num);
584
+ extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num, int field_num);
620
585
 
621
586
  /****************************************************************************
622
587
  *
@@ -624,18 +589,16 @@ extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num,
624
589
  *
625
590
  ****************************************************************************/
626
591
 
627
- typedef struct FrtFieldsWriter
628
- {
592
+ typedef struct FrtFieldsWriter {
629
593
  FrtFieldInfos *fis;
630
594
  FrtOutStream *fdt_out;
631
595
  FrtOutStream *fdx_out;
632
596
  FrtOutStream *buffer;
633
597
  FrtTVField *tv_fields;
634
- off_t start_ptr;
598
+ off_t start_ptr;
635
599
  } FrtFieldsWriter;
636
600
 
637
- extern FrtFieldsWriter *frt_fw_open(FrtStore *store,
638
- const char *segment, FrtFieldInfos *fis);
601
+ extern FrtFieldsWriter *frt_fw_open(FrtStore *store, const char *segment, FrtFieldInfos *fis);
639
602
  extern void frt_fw_close(FrtFieldsWriter *fw);
640
603
  extern void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc);
641
604
  extern void frt_fw_add_postings(FrtFieldsWriter *fw,
@@ -656,11 +619,10 @@ extern void frt_fw_write_tv_index(FrtFieldsWriter *fw);
656
619
  *
657
620
  ****************************************************************************/
658
621
 
659
- struct FrtDeleter
660
- {
661
- FrtStore *store;
662
- FrtSegmentInfos *sis;
663
- FrtHashSet *pending;
622
+ struct FrtDeleter {
623
+ FrtStore *store;
624
+ FrtSegmentInfos *sis;
625
+ FrtHashSet *pending;
664
626
  };
665
627
 
666
628
  extern FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store);
@@ -678,88 +640,115 @@ extern void frt_deleter_delete_files(FrtDeleter *dlr, char **files, int file_cnt
678
640
  #define FRT_WRITE_LOCK_NAME "write"
679
641
  #define FRT_COMMIT_LOCK_NAME "commit"
680
642
 
681
- struct FrtIndexReader
682
- {
683
- int (*num_docs)(FrtIndexReader *ir);
684
- int (*max_doc)(FrtIndexReader *ir);
685
- FrtDocument *(*get_doc)(FrtIndexReader *ir, int doc_num);
686
- FrtLazyDoc *(*get_lazy_doc)(FrtIndexReader *ir, int doc_num);
687
- frt_uchar *(*get_norms)(FrtIndexReader *ir, int field_num);
688
- frt_uchar *(*get_norms_into)(FrtIndexReader *ir, int field_num,
689
- frt_uchar *buf);
690
- FrtTermEnum *(*terms)(FrtIndexReader *ir, int field_num);
691
- FrtTermEnum *(*terms_from)(FrtIndexReader *ir, int field_num,
692
- const char *term);
693
- int (*doc_freq)(FrtIndexReader *ir, int field_num,
694
- const char *term);
695
- FrtTermDocEnum *(*term_docs)(FrtIndexReader *ir);
696
- FrtTermDocEnum *(*term_positions)(FrtIndexReader *ir);
697
- FrtTermVector *(*term_vector)(FrtIndexReader *ir, int doc_num,
698
- FrtSymbol field);
699
- FrtHash *(*term_vectors)(FrtIndexReader *ir, int doc_num);
700
- bool (*is_deleted)(FrtIndexReader *ir, int doc_num);
701
- bool (*has_deletions)(FrtIndexReader *ir);
702
- void (*acquire_write_lock)(FrtIndexReader *ir);
703
- void (*set_norm_i)(FrtIndexReader *ir, int doc_num,
704
- int field_num, frt_uchar val);
705
- void (*delete_doc_i)(FrtIndexReader *ir, int doc_num);
706
- void (*undelete_all_i)(FrtIndexReader *ir);
707
- void (*set_deleter_i)(FrtIndexReader *ir, FrtDeleter *dlr);
708
- bool (*is_latest_i)(FrtIndexReader *ir);
709
- void (*commit_i)(FrtIndexReader *ir);
710
- void (*close_i)(FrtIndexReader *ir);
711
- int ref_cnt;
712
- FrtDeleter *deleter;
713
- FrtStore *store;
714
- FrtLock *write_lock;
715
- FrtSegmentInfos *sis;
716
- FrtFieldInfos *fis;
717
- FrtHash *cache;
718
- FrtHash *field_index_cache;
719
- frt_mutex_t field_index_mutex;
720
- frt_uchar *fake_norms;
721
- frt_mutex_t mutex;
722
- bool has_changes : 1;
723
- bool is_stale : 1;
724
- bool is_owner : 1;
643
+ typedef enum {
644
+ FRT_INDEX_READER,
645
+ FRT_SEGMENT_READER,
646
+ FRT_MULTI_READER
647
+ } frt_index_reader_t;
648
+
649
+ struct FrtIndexReader {
650
+ int type;
651
+ int (*num_docs)(FrtIndexReader *ir);
652
+ int (*max_doc)(FrtIndexReader *ir);
653
+ FrtDocument *(*get_doc)(FrtIndexReader *ir, int doc_num);
654
+ FrtLazyDoc *(*get_lazy_doc)(FrtIndexReader *ir, int doc_num);
655
+ frt_uchar *(*get_norms)(FrtIndexReader *ir, int field_num);
656
+ frt_uchar *(*get_norms_into)(FrtIndexReader *ir, int field_num, frt_uchar *buf);
657
+ FrtTermEnum *(*terms)(FrtIndexReader *ir, int field_num);
658
+ FrtTermEnum *(*terms_from)(FrtIndexReader *ir, int field_num, const char *term);
659
+ int (*doc_freq)(FrtIndexReader *ir, int field_num, const char *term);
660
+ FrtTermDocEnum *(*term_docs)(FrtIndexReader *ir);
661
+ FrtTermDocEnum *(*term_positions)(FrtIndexReader *ir);
662
+ FrtTermVector *(*term_vector)(FrtIndexReader *ir, int doc_num, ID field);
663
+ FrtHash *(*term_vectors)(FrtIndexReader *ir, int doc_num);
664
+ bool (*is_deleted)(FrtIndexReader *ir, int doc_num);
665
+ bool (*has_deletions)(FrtIndexReader *ir);
666
+ void (*acquire_write_lock)(FrtIndexReader *ir);
667
+ void (*set_norm_i)(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val);
668
+ void (*delete_doc_i)(FrtIndexReader *ir, int doc_num);
669
+ void (*undelete_all_i)(FrtIndexReader *ir);
670
+ void (*set_deleter_i)(FrtIndexReader *ir, FrtDeleter *dlr);
671
+ bool (*is_latest_i)(FrtIndexReader *ir);
672
+ void (*commit_i)(FrtIndexReader *ir);
673
+ void (*close_i)(FrtIndexReader *ir);
674
+ int ref_cnt;
675
+ FrtDeleter *deleter;
676
+ FrtStore *store;
677
+ FrtLock *write_lock;
678
+ FrtSegmentInfos *sis;
679
+ FrtFieldInfos *fis;
680
+ FrtHash *cache;
681
+ FrtHash *field_index_cache;
682
+ frt_mutex_t field_index_mutex;
683
+ frt_uchar *fake_norms;
684
+ frt_mutex_t mutex;
685
+ bool has_changes : 1;
686
+ bool is_stale : 1;
687
+ bool is_owner : 1;
688
+ VALUE rir;
725
689
  };
726
690
 
727
- extern FrtIndexReader *frt_ir_open(FrtStore *store);
691
+ extern FrtIndexReader *frt_ir_open(FrtIndexReader *ir, FrtStore *store);
728
692
  extern void frt_ir_close(FrtIndexReader *ir);
729
693
  extern void frt_ir_commit(FrtIndexReader *ir);
730
694
  extern void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num);
731
695
  extern void frt_ir_undelete_all(FrtIndexReader *ir);
732
- extern int frt_ir_doc_freq(FrtIndexReader *ir, FrtSymbol field, const char *term);
733
- extern void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, FrtSymbol field, frt_uchar val);
696
+ extern int frt_ir_doc_freq(FrtIndexReader *ir, ID field, const char *term);
697
+ extern void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, ID field, frt_uchar val);
734
698
  extern frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num);
735
- extern frt_uchar *frt_ir_get_norms(FrtIndexReader *ir, FrtSymbol field);
736
- extern frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, FrtSymbol field, frt_uchar *buf);
737
- extern FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir, FrtSymbol field, const char *term);
738
- extern FrtTermEnum *frt_ir_terms(FrtIndexReader *ir, FrtSymbol field);
739
- extern FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir, FrtSymbol field, const char *t);
740
- extern FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir, FrtSymbol field, const char *term);
741
- extern FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, FrtSymbol field, const char *t);
699
+ extern frt_uchar *frt_ir_get_norms(FrtIndexReader *ir, ID field);
700
+ extern frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, ID field, frt_uchar *buf);
701
+ extern FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir, ID field, const char *term);
702
+ extern FrtTermEnum *frt_ir_terms(FrtIndexReader *ir, ID field);
703
+ extern FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir, ID field, const char *t);
704
+ extern FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir, ID field, const char *term);
705
+ extern FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, ID field, const char *t);
742
706
  extern void frt_ir_add_cache(FrtIndexReader *ir);
743
707
  extern bool frt_ir_is_latest(FrtIndexReader *ir);
744
708
 
709
+ /****************************************************************************
710
+ * FrtSegmentReader
711
+ ****************************************************************************/
712
+
713
+ struct FrtSegmentReader {
714
+ FrtIndexReader ir;
715
+ FrtSegmentInfo *si;
716
+ char *segment;
717
+ FrtFieldsReader *fr;
718
+ FrtBitVector *deleted_docs;
719
+ FrtInStream *frq_in;
720
+ FrtInStream *prx_in;
721
+ FrtSegmentFieldIndex *sfi;
722
+ FrtTermInfosReader *tir;
723
+ frt_thread_key_t thread_fr;
724
+ void **fr_bucket;
725
+ FrtHash *norms;
726
+ FrtStore *cfs_store;
727
+ bool deleted_docs_dirty : 1;
728
+ bool undelete_all : 1;
729
+ bool norms_dirty : 1;
730
+ };
731
+
732
+ extern FrtSegmentReader *frt_sr_alloc();
733
+
745
734
  /****************************************************************************
746
735
  * FrtMultiReader
747
736
  ****************************************************************************/
748
737
 
749
738
  struct FrtMultiReader {
750
739
  FrtIndexReader ir;
751
- int max_doc;
752
- int num_docs_cache;
753
- int r_cnt;
754
- int *starts;
740
+ int max_doc;
741
+ int num_docs_cache;
742
+ int r_cnt;
743
+ int *starts;
755
744
  FrtIndexReader **sub_readers;
756
- FrtHash *norms_cache;
757
- bool has_deletions : 1;
758
- int **field_num_map;
745
+ FrtHash *norms_cache;
746
+ bool has_deletions : 1;
747
+ int **field_num_map;
759
748
  };
760
749
 
761
750
  extern int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num);
762
- extern FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt);
751
+ extern FrtIndexReader *frt_mr_open(FrtIndexReader *ir, FrtIndexReader **sub_readers, const int r_cnt);
763
752
 
764
753
  /****************************************************************************
765
754
  *
@@ -767,16 +756,15 @@ extern FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt
767
756
  *
768
757
  ****************************************************************************/
769
758
 
770
- typedef struct FrtFieldInverter
771
- {
772
- FrtHash *plists;
773
- frt_uchar *norms;
759
+ typedef struct FrtFieldInverter {
760
+ FrtHash *plists;
761
+ frt_uchar *norms;
774
762
  FrtFieldInfo *fi;
775
- int length;
776
- bool is_tokenized : 1;
777
- bool store_term_vector : 1;
778
- bool store_offsets : 1;
779
- bool has_norms : 1;
763
+ int length;
764
+ bool is_tokenized : 1;
765
+ bool store_term_vector : 1;
766
+ bool store_offsets : 1;
767
+ bool has_norms : 1;
780
768
  } FrtFieldInverter;
781
769
 
782
770
  /****************************************************************************
@@ -788,18 +776,17 @@ typedef struct FrtFieldInverter
788
776
  #define DW_OFFSET_INIT_CAPA 512
789
777
  typedef struct FrtIndexWriter FrtIndexWriter;
790
778
 
791
- typedef struct FrtDocWriter
792
- {
793
- FrtStore *store;
794
- FrtSegmentInfo *si;
795
- FrtFieldInfos *fis;
779
+ typedef struct FrtDocWriter {
780
+ FrtStore *store;
781
+ FrtSegmentInfo *si;
782
+ FrtFieldInfos *fis;
796
783
  FrtFieldsWriter *fw;
797
- FrtMemoryPool *mp;
798
- FrtAnalyzer *analyzer;
799
- FrtHash *curr_plists;
800
- FrtHash *fields;
801
- FrtSimilarity *similarity;
802
- FrtOffset *offsets;
784
+ FrtMemoryPool *mp;
785
+ FrtAnalyzer *analyzer;
786
+ FrtHash *curr_plists;
787
+ FrtHash *fields;
788
+ FrtSimilarity *similarity;
789
+ FrtOffset *offsets;
803
790
  int offsets_size;
804
791
  int offsets_capa;
805
792
  int doc_num;
@@ -814,9 +801,7 @@ extern void frt_dw_close(FrtDocWriter *dw);
814
801
  extern void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc);
815
802
  extern void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si);
816
803
  /* For testing. need to remove somehow. FIXME */
817
- extern FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
818
- FrtFieldInverter *fld_inv,
819
- FrtDocField *df);
804
+ extern FrtHash *frt_dw_invert_field(FrtDocWriter *dw, FrtFieldInverter *fld_inv, FrtDocField *df);
820
805
  extern FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi);
821
806
  extern void frt_dw_reset_postings(FrtHash *postings);
822
807
 
@@ -826,25 +811,25 @@ extern void frt_dw_reset_postings(FrtHash *postings);
826
811
  *
827
812
  ****************************************************************************/
828
813
 
829
- struct FrtIndexWriter
830
- {
831
- FrtConfig config;
832
- frt_mutex_t mutex;
833
- FrtStore *store;
834
- FrtAnalyzer *analyzer;
814
+ struct FrtIndexWriter {
815
+ FrtConfig config;
816
+ frt_mutex_t mutex;
817
+ FrtStore *store;
818
+ FrtAnalyzer *analyzer;
835
819
  FrtSegmentInfos *sis;
836
- FrtFieldInfos *fis;
837
- FrtDocWriter *dw;
838
- FrtSimilarity *similarity;
839
- FrtLock *write_lock;
840
- FrtDeleter *deleter;
820
+ FrtFieldInfos *fis;
821
+ FrtDocWriter *dw;
822
+ FrtSimilarity *similarity;
823
+ FrtLock *write_lock;
824
+ FrtDeleter *deleter;
841
825
  };
842
826
 
843
827
  extern void frt_index_create(FrtStore *store, FrtFieldInfos *fis);
844
828
  extern bool frt_index_is_locked(FrtStore *store);
845
- extern FrtIndexWriter *frt_iw_open(FrtStore *store, FrtAnalyzer *analyzer, const FrtConfig *config);
846
- extern void frt_iw_delete_term(FrtIndexWriter *iw, FrtSymbol field, const char *term);
847
- extern void frt_iw_delete_terms(FrtIndexWriter *iw, FrtSymbol field, char **terms, const int term_cnt);
829
+ extern FrtIndexWriter *frt_iw_alloc();
830
+ extern FrtIndexWriter *frt_iw_open(FrtIndexWriter *, FrtStore *store, FrtAnalyzer *analyzer, const FrtConfig *config);
831
+ extern void frt_iw_delete_term(FrtIndexWriter *iw, ID field, const char *term);
832
+ extern void frt_iw_delete_terms(FrtIndexWriter *iw, ID field, char **terms, const int term_cnt);
848
833
  extern void frt_iw_close(FrtIndexWriter *iw);
849
834
  extern void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc);
850
835
  extern int frt_iw_doc_count(FrtIndexWriter *iw);
@@ -859,17 +844,16 @@ extern void frt_iw_add_readers(FrtIndexWriter *iw, FrtIndexReader **readers, con
859
844
  ****************************************************************************/
860
845
 
861
846
  #define FRT_CW_INIT_CAPA 16
862
- typedef struct FrtCWFileEntry
863
- {
864
- char *name;
847
+ typedef struct FrtCWFileEntry {
848
+ char *name;
865
849
  off_t dir_offset;
866
850
  off_t data_offset;
867
851
  } FrtCWFileEntry;
868
852
 
869
853
  typedef struct FrtCompoundWriter {
870
- FrtStore *store;
871
- const char *name;
872
- FrtHashSet *ids;
854
+ FrtStore *store;
855
+ const char *name;
856
+ FrtHashSet *ids;
873
857
  FrtCWFileEntry *file_entries;
874
858
  } FrtCompoundWriter;
875
859