isomorfeus-ferret 0.12.7 → 0.13.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +85 -13
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
  9. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
  10. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  11. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  12. data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
  13. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  14. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  15. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  16. data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
  17. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  18. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  19. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  20. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  21. data/ext/isomorfeus_ferret_ext/frb_index.c +497 -495
  22. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  23. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  24. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  25. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  26. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  27. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  28. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  29. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  30. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  31. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  32. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  33. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
  34. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  42. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  43. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  44. data/ext/isomorfeus_ferret_ext/frt_index.c +603 -410
  45. data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
  46. data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
  47. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  48. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  49. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
  50. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  51. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  52. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  53. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  54. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  55. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  56. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  57. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  58. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  59. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  60. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  61. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  62. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
  63. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  64. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  65. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  66. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  67. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  68. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  69. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  70. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  71. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
  72. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  73. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  74. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  75. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  76. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  77. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  78. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  79. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  80. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  81. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  82. data/ext/isomorfeus_ferret_ext/test.c +0 -17
  83. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  84. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  85. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  86. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  87. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  88. data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
  89. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  90. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  91. data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
  92. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  93. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  94. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  95. data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
  96. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  97. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  98. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  99. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  100. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  101. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  102. data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
  103. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  104. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  105. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  106. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  107. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  108. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  109. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  110. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  111. data/lib/isomorfeus/ferret/version.rb +1 -1
  112. metadata +27 -57
  113. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  114. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  115. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  116. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  117. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  118. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  119. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  120. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  121. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  122. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  144. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  145. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  146. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  147. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  148. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  149. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  150. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  151. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  152. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  153. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  154. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  155. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  156. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  157. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  158. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  159. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  160. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  161. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  162. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  163. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  164. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  165. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  166. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -13,6 +13,7 @@
13
13
  #include "frt_priorityqueue.h"
14
14
 
15
15
  typedef struct FrtIndexReader FrtIndexReader;
16
+ typedef struct FrtSegmentReader FrtSegmentReader;
16
17
  typedef struct FrtMultiReader FrtMultiReader;
17
18
  typedef struct FrtDeleter FrtDeleter;
18
19
 
@@ -22,8 +23,7 @@ typedef struct FrtDeleter FrtDeleter;
22
23
  *
23
24
  ****************************************************************************/
24
25
 
25
- typedef struct FrtConfig
26
- {
26
+ typedef struct FrtConfig {
27
27
  int chunk_size;
28
28
  int max_buffer_memory;
29
29
  int index_interval;
@@ -52,8 +52,7 @@ typedef struct FrtCacheObject {
52
52
  void (*destroy)(void *p);
53
53
  } FrtCacheObject;
54
54
 
55
- extern FrtCacheObject *frt_co_create(FrtHash *ref_tab1,
56
- FrtHash *ref_tab2,
55
+ extern FrtCacheObject *frt_co_create(FrtHash *ref_tab1, FrtHash *ref_tab2,
57
56
  void *ref1, void *ref2, frt_free_ft destroy, void *obj);
58
57
  extern FrtHash *frt_co_hash_create();
59
58
 
@@ -63,15 +62,12 @@ extern FrtHash *frt_co_hash_create();
63
62
  *
64
63
  ****************************************************************************/
65
64
 
66
- typedef enum
67
- {
65
+ typedef enum {
68
66
  FRT_STORE_NO = 0,
69
67
  FRT_STORE_YES = 1,
70
- FRT_STORE_COMPRESS = 2
71
68
  } FrtStoreValue;
72
69
 
73
- typedef enum
74
- {
70
+ typedef enum {
75
71
  FRT_INDEX_NO = 0,
76
72
  FRT_INDEX_UNTOKENIZED = 1,
77
73
  FRT_INDEX_YES = 3,
@@ -79,8 +75,7 @@ typedef enum
79
75
  FRT_INDEX_YES_OMIT_NORMS = 7
80
76
  } FrtIndexValue;
81
77
 
82
- typedef enum
83
- {
78
+ typedef enum {
84
79
  FRT_TERM_VECTOR_NO = 0,
85
80
  FRT_TERM_VECTOR_YES = 1,
86
81
  FRT_TERM_VECTOR_WITH_POSITIONS = 3,
@@ -88,39 +83,44 @@ typedef enum
88
83
  FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS = 7
89
84
  } FrtTermVectorValue;
90
85
 
91
- #define FRT_FI_IS_STORED_BM 0x001
92
- #define FRT_FI_IS_COMPRESSED_BM 0x002
93
- #define FRT_FI_IS_INDEXED_BM 0x004
94
- #define FRT_FI_IS_TOKENIZED_BM 0x008
95
- #define FRT_FI_OMIT_NORMS_BM 0x010
96
- #define FRT_FI_STORE_TERM_VECTOR_BM 0x020
97
- #define FRT_FI_STORE_POSITIONS_BM 0x040
98
- #define FRT_FI_STORE_OFFSETS_BM 0x080
99
-
100
- typedef struct FrtFieldInfo
101
- {
102
- FrtSymbol name;
103
- float boost;
86
+ #define FRT_FI_IS_STORED_BM 0x001
87
+ #define FRT_FI_IS_COMPRESSED_BM 0x002
88
+ #define FRT_FI_IS_INDEXED_BM 0x004
89
+ #define FRT_FI_IS_TOKENIZED_BM 0x008
90
+ #define FRT_FI_OMIT_NORMS_BM 0x010
91
+ #define FRT_FI_STORE_TERM_VECTOR_BM 0x020
92
+ #define FRT_FI_STORE_POSITIONS_BM 0x040
93
+ #define FRT_FI_STORE_OFFSETS_BM 0x080
94
+ #define FRT_FI_COMPRESSION_BROTLI_BM 0x100
95
+ #define FRT_FI_COMPRESSION_BZ2_BM 0x200
96
+ #define FRT_FI_COMPRESSION_LZ4_BM 0x400
97
+
98
+ typedef struct FrtFieldInfo {
99
+ ID name;
100
+ float boost;
104
101
  unsigned int bits;
105
- int number;
106
- int ref_cnt;
102
+ int number;
103
+ int ref_cnt;
104
+ VALUE rfi;
107
105
  } FrtFieldInfo;
108
106
 
109
- extern FrtFieldInfo *frt_fi_new(FrtSymbol name,
110
- FrtStoreValue store,
111
- FrtIndexValue index,
112
- FrtTermVectorValue term_vector);
107
+ extern FrtFieldInfo *frt_fi_alloc();
108
+ extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
109
+ extern FrtFieldInfo *frt_fi_new(ID name, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
113
110
  extern char *frt_fi_to_s(FrtFieldInfo *fi);
114
111
  extern void frt_fi_deref(FrtFieldInfo *fi);
115
112
 
116
- #define fi_is_stored(fi) (((fi)->bits & FRT_FI_IS_STORED_BM) != 0)
117
- #define fi_is_compressed(fi) (((fi)->bits & FRT_FI_IS_COMPRESSED_BM) != 0)
118
- #define fi_is_indexed(fi) (((fi)->bits & FRT_FI_IS_INDEXED_BM) != 0)
119
- #define fi_is_tokenized(fi) (((fi)->bits & FRT_FI_IS_TOKENIZED_BM) != 0)
120
- #define fi_omit_norms(fi) (((fi)->bits & FRT_FI_OMIT_NORMS_BM) != 0)
121
- #define fi_store_term_vector(fi) (((fi)->bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
122
- #define fi_store_positions(fi) (((fi)->bits & FRT_FI_STORE_POSITIONS_BM) != 0)
123
- #define fi_store_offsets(fi) (((fi)->bits & FRT_FI_STORE_OFFSETS_BM) != 0)
113
+ #define fi_is_stored(fi) (((fi)->bits & FRT_FI_IS_STORED_BM) != 0)
114
+ #define fi_is_compressed(fi) (((fi)->bits & FRT_FI_IS_COMPRESSED_BM) != 0)
115
+ #define fi_is_compressed_brotli(fi) (((fi)->bits & FRT_FI_COMPRESSION_BROTLI_BM) != 0)
116
+ #define fi_is_compressed_bz2(fi) (((fi)->bits & FRT_FI_COMPRESSION_BZ2_BM) != 0)
117
+ #define fi_is_compressed_lz4(fi) (((fi)->bits & FRT_FI_COMPRESSION_LZ4_BM) != 0)
118
+ #define fi_is_indexed(fi) (((fi)->bits & FRT_FI_IS_INDEXED_BM) != 0)
119
+ #define fi_is_tokenized(fi) (((fi)->bits & FRT_FI_IS_TOKENIZED_BM) != 0)
120
+ #define fi_omit_norms(fi) (((fi)->bits & FRT_FI_OMIT_NORMS_BM) != 0)
121
+ #define fi_store_term_vector(fi) (((fi)->bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
122
+ #define fi_store_positions(fi) (((fi)->bits & FRT_FI_STORE_POSITIONS_BM) != 0)
123
+ #define fi_store_offsets(fi) (((fi)->bits & FRT_FI_STORE_OFFSETS_BM) != 0)
124
124
  #define fi_has_norms(fi)\
125
125
  (((fi)->bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
126
126
 
@@ -132,25 +132,26 @@ extern void frt_fi_deref(FrtFieldInfo *fi);
132
132
 
133
133
  #define FIELD_INFOS_INIT_CAPA 4
134
134
  /* carry changes over to dummy_fis in test/test_segments.c */
135
- typedef struct FrtFieldInfos
136
- {
137
- FrtStoreValue store;
138
- FrtIndexValue index;
135
+ typedef struct FrtFieldInfos {
136
+ FrtStoreValue store;
137
+ FrtCompressionType compression;
138
+ FrtIndexValue index;
139
139
  FrtTermVectorValue term_vector;
140
- int size;
141
- int capa;
142
- FrtFieldInfo **fields;
143
- FrtHash *field_dict;
144
- int ref_cnt;
140
+ int size;
141
+ int capa;
142
+ FrtFieldInfo **fields;
143
+ FrtHash *field_dict;
144
+ int ref_cnt;
145
+ VALUE rfis;
145
146
  } FrtFieldInfos;
146
147
 
147
- FrtFieldInfos *frt_fis_new(FrtStoreValue store, FrtIndexValue index,
148
- FrtTermVectorValue term_vector);
148
+ FrtFieldInfos *frt_fis_alloc();
149
+ FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
150
+ FrtFieldInfos *frt_fis_new(FrtStoreValue store, FrtCompressionType compression, FrtIndexValue index, FrtTermVectorValue term_vector);
149
151
  extern FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi);
150
- extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, FrtSymbol name);
151
- extern int frt_fis_get_field_num(FrtFieldInfos *fis, FrtSymbol name);
152
- extern FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis,
153
- FrtSymbol name);
152
+ extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name);
153
+ extern int frt_fis_get_field_num(FrtFieldInfos *fis, ID name);
154
+ extern FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name);
154
155
  extern void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os);
155
156
  extern FrtFieldInfos *frt_fis_read(FrtInStream *is);
156
157
  extern char *frt_fis_to_s(FrtFieldInfos *fis);
@@ -165,8 +166,7 @@ extern void frt_fis_deref(FrtFieldInfos *fis);
165
166
  #define FRT_SEGMENT_NAME_MAX_LENGTH 100
166
167
  #define FRT_SEGMENTS_FILE_NAME "segments"
167
168
 
168
- typedef struct FrtSegmentInfo
169
- {
169
+ typedef struct FrtSegmentInfo {
170
170
  int ref_cnt;
171
171
  char *name;
172
172
  FrtStore *store;
@@ -189,8 +189,7 @@ extern void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num);
189
189
  *
190
190
  ****************************************************************************/
191
191
 
192
- typedef struct FrtSegmentInfos
193
- {
192
+ typedef struct FrtSegmentInfos {
194
193
  FrtFieldInfos *fis;
195
194
  frt_u64 counter;
196
195
  frt_u64 version;
@@ -202,10 +201,7 @@ typedef struct FrtSegmentInfos
202
201
  int capa;
203
202
  } FrtSegmentInfos;
204
203
 
205
- extern char *frt_fn_for_generation(char *buf,
206
- const char *base,
207
- const char *ext,
208
- frt_i64 gen);
204
+ extern char *frt_fn_for_generation(char *buf, const char *base, const char *ext, frt_i64 gen);
209
205
 
210
206
  extern FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis);
211
207
  extern FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int dcnt, FrtStore *store);
@@ -226,9 +222,8 @@ extern void frt_sis_put(FrtSegmentInfos *sis, FILE *stream);
226
222
  *
227
223
  ****************************************************************************/
228
224
 
229
- typedef struct FrtTermInfo
230
- {
231
- int doc_freq;
225
+ typedef struct FrtTermInfo {
226
+ int doc_freq;
232
227
  off_t frq_ptr;
233
228
  off_t prx_ptr;
234
229
  off_t skip_offset;
@@ -242,24 +237,21 @@ typedef struct FrtTermInfo
242
237
  } while (0)
243
238
 
244
239
  /****************************************************************************
245
- *
246
240
  * FrtTermEnum
247
- *
248
241
  ****************************************************************************/
249
242
 
250
243
  typedef struct FrtTermEnum FrtTermEnum;
251
244
 
252
- struct FrtTermEnum
253
- {
245
+ struct FrtTermEnum {
254
246
  char curr_term[FRT_MAX_WORD_SIZE];
255
247
  char prev_term[FRT_MAX_WORD_SIZE];
256
- FrtTermInfo curr_ti;
248
+ FrtTermInfo curr_ti;
257
249
  int curr_term_len;
258
250
  int field_num;
259
251
  FrtTermEnum *(*set_field)(FrtTermEnum *te, int field_num);
260
- char *(*next)(FrtTermEnum *te);
261
- char *(*skip_to)(FrtTermEnum *te, const char *term);
262
- void (*close)(FrtTermEnum *te);
252
+ char *(*next)(FrtTermEnum *te);
253
+ char *(*skip_to)(FrtTermEnum *te, const char *term);
254
+ void (*close)(FrtTermEnum *te);
263
255
  FrtTermEnum *(*clone)(FrtTermEnum *te);
264
256
  };
265
257
 
@@ -267,59 +259,54 @@ char *frt_te_get_term(struct FrtTermEnum *te);
267
259
  FrtTermInfo *frt_te_get_ti(struct FrtTermEnum *te);
268
260
 
269
261
  /****************************************************************************
270
- *
271
262
  * FrtSegmentTermEnum
272
- *
273
263
  ****************************************************************************/
274
264
 
275
- /* * FrtSegmentTermIndex * */
265
+ /* FrtSegmentTermIndex */
276
266
 
277
- typedef struct FrtSegmentTermIndex
278
- {
267
+ typedef struct FrtSegmentTermIndex {
279
268
  off_t index_ptr;
280
269
  off_t ptr;
281
270
  int index_cnt;
282
271
  int size;
283
- char **index_terms;
284
- int *index_term_lens;
285
- FrtTermInfo *index_term_infos;
286
- off_t *index_ptrs;
272
+ char **index_terms;
273
+ int *index_term_lens;
274
+ FrtTermInfo *index_term_infos;
275
+ off_t *index_ptrs;
287
276
  } FrtSegmentTermIndex;
288
277
 
289
- /* * FrtSegmentFieldIndex * */
278
+ /* FrtSegmentFieldIndex */
290
279
 
291
- typedef struct FrtSegmentTermEnum FrtSegmentTermEnum;
292
-
293
- typedef struct FrtSegmentFieldIndex
294
- {
295
- frt_mutex_t mutex;
280
+ typedef struct FrtSegmentFieldIndex {
281
+ frt_mutex_t mutex;
296
282
  int skip_interval;
297
283
  int index_interval;
298
284
  off_t index_ptr;
299
- FrtTermEnum *index_te;
300
- FrtHash *field_dict;
285
+ FrtTermEnum *index_te;
286
+ FrtHash *field_dict;
301
287
  } FrtSegmentFieldIndex;
302
288
 
303
- extern FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment);
304
- extern void frt_sfi_close(FrtSegmentFieldIndex *sfi);
289
+ /* FrtSegmentTermEnum */
305
290
 
291
+ typedef struct FrtSegmentTermEnum FrtSegmentTermEnum;
306
292
 
307
- /* * FrtSegmentTermEnum * */
308
- struct FrtSegmentTermEnum
309
- {
310
- FrtTermEnum te;
311
- FrtInStream *is;
293
+ struct FrtSegmentTermEnum {
294
+ FrtTermEnum te;
295
+ FrtInStream *is;
312
296
  int size;
313
297
  int pos;
314
298
  int skip_interval;
315
299
  FrtSegmentFieldIndex *sfi;
316
300
  };
317
301
 
302
+ extern FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment);
303
+ extern void frt_sfi_close(FrtSegmentFieldIndex *sfi);
304
+
318
305
  extern void frt_ste_close(FrtTermEnum *te);
319
306
  extern FrtTermEnum *frt_ste_clone(FrtTermEnum *te);
320
307
  extern FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi);
321
308
 
322
- /* * MultiTermEnum * */
309
+ /* MultiTermEnum */
323
310
 
324
311
  extern FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term);
325
312
 
@@ -329,17 +316,14 @@ extern FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *t
329
316
  *
330
317
  ****************************************************************************/
331
318
 
332
- typedef struct FrtTermInfosReader
333
- {
319
+ typedef struct FrtTermInfosReader {
334
320
  frt_thread_key_t thread_te;
335
- void **te_bucket;
336
- FrtTermEnum *orig_te;
337
- int field_num;
321
+ void **te_bucket;
322
+ FrtTermEnum *orig_te;
323
+ int field_num;
338
324
  } FrtTermInfosReader;
339
325
 
340
- extern FrtTermInfosReader *frt_tir_open(FrtStore *store,
341
- FrtSegmentFieldIndex *sfi,
342
- const char *segment);
326
+ extern FrtTermInfosReader *frt_tir_open(FrtStore *store, FrtSegmentFieldIndex *sfi, const char *segment);
343
327
  extern FrtTermInfosReader *frt_tir_set_field(FrtTermInfosReader *tir, int field_num);
344
328
  extern FrtTermInfo *frt_tir_get_ti(FrtTermInfosReader *tir, const char *term);
345
329
  extern char *frt_tir_get_term(FrtTermInfosReader *tir, int pos);
@@ -354,34 +338,26 @@ extern void frt_tir_close(FrtTermInfosReader *tir);
354
338
  #define FRT_INDEX_INTERVAL 128
355
339
  #define FRT_SKIP_INTERVAL 16
356
340
 
357
- typedef struct FrtTermWriter
358
- {
359
- int counter;
360
- const char *last_term;
361
- FrtTermInfo last_term_info;
341
+ typedef struct FrtTermWriter {
342
+ int counter;
343
+ const char *last_term;
344
+ FrtTermInfo last_term_info;
362
345
  FrtOutStream *os;
363
346
  } FrtTermWriter;
364
347
 
365
- typedef struct FrtTermInfosWriter
366
- {
367
- int field_count;
368
- int index_interval;
369
- int skip_interval;
370
- off_t last_index_ptr;
371
- FrtOutStream *tfx_out;
348
+ typedef struct FrtTermInfosWriter {
349
+ int field_count;
350
+ int index_interval;
351
+ int skip_interval;
352
+ off_t last_index_ptr;
353
+ FrtOutStream *tfx_out;
372
354
  FrtTermWriter *tix_writer;
373
355
  FrtTermWriter *tis_writer;
374
356
  } FrtTermInfosWriter;
375
357
 
376
- extern FrtTermInfosWriter *frt_tiw_open(FrtStore *store,
377
- const char *segment,
378
- int index_interval,
379
- int skip_interval);
358
+ extern FrtTermInfosWriter *frt_tiw_open(FrtStore *store, const char *segment, int index_interval, int skip_interval);
380
359
  extern void frt_tiw_start_field(FrtTermInfosWriter *tiw, int field_num);
381
- extern void frt_tiw_add(FrtTermInfosWriter *tiw,
382
- const char *term,
383
- int t_len,
384
- FrtTermInfo *ti);
360
+ extern void frt_tiw_add(FrtTermInfosWriter *tiw, const char *term, int t_len, FrtTermInfo *ti);
385
361
  extern void frt_tiw_close(FrtTermInfosWriter *tiw);
386
362
 
387
363
  /****************************************************************************
@@ -391,8 +367,7 @@ extern void frt_tiw_close(FrtTermInfosWriter *tiw);
391
367
  ****************************************************************************/
392
368
 
393
369
  typedef struct FrtTermDocEnum FrtTermDocEnum;
394
- struct FrtTermDocEnum
395
- {
370
+ struct FrtTermDocEnum {
396
371
  void (*seek)(FrtTermDocEnum *tde, int field_num, const char *term);
397
372
  void (*seek_te)(FrtTermDocEnum *tde, FrtTermEnum *te);
398
373
  void (*seek_ti)(FrtTermDocEnum *tde, FrtTermInfo *ti);
@@ -408,8 +383,7 @@ struct FrtTermDocEnum
408
383
  /* * FrtSegmentTermDocEnum * */
409
384
 
410
385
  typedef struct FrtSegmentTermDocEnum FrtSegmentTermDocEnum;
411
- struct FrtSegmentTermDocEnum
412
- {
386
+ struct FrtSegmentTermDocEnum {
413
387
  FrtTermDocEnum tde;
414
388
  void (*seek_prox)(FrtSegmentTermDocEnum *stde, off_t prx_ptr);
415
389
  void (*skip_prox)(FrtSegmentTermDocEnum *stde);
@@ -446,8 +420,7 @@ extern FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir, FrtInStream *frq_in
446
420
  * MultipleTermDocPosEnum
447
421
  ****************************************************************************/
448
422
 
449
- extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms,
450
- int t_cnt);
423
+ extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms, int t_cnt);
451
424
 
452
425
  /****************************************************************************
453
426
  *
@@ -455,8 +428,7 @@ extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **t
455
428
  *
456
429
  ****************************************************************************/
457
430
 
458
- typedef struct FrtOffset
459
- {
431
+ typedef struct FrtOffset {
460
432
  off_t start;
461
433
  off_t end;
462
434
  } FrtOffset;
@@ -467,8 +439,7 @@ typedef struct FrtOffset
467
439
  *
468
440
  ****************************************************************************/
469
441
 
470
- typedef struct FrtOccurence
471
- {
442
+ typedef struct FrtOccurence {
472
443
  struct FrtOccurence *next;
473
444
  int pos;
474
445
  } FrtOccurence;
@@ -479,8 +450,7 @@ typedef struct FrtOccurence
479
450
  *
480
451
  ****************************************************************************/
481
452
 
482
- typedef struct FrtPosting
483
- {
453
+ typedef struct FrtPosting {
484
454
  int freq;
485
455
  int doc_num;
486
456
  FrtOccurence *first_occ;
@@ -495,17 +465,15 @@ extern FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos);
495
465
  *
496
466
  ****************************************************************************/
497
467
 
498
- typedef struct FrtPostingList
499
- {
500
- const char *term;
501
- int term_len;
502
- FrtPosting *first;
503
- FrtPosting *last;
468
+ typedef struct FrtPostingList {
469
+ const char *term;
470
+ int term_len;
471
+ FrtPosting *first;
472
+ FrtPosting *last;
504
473
  FrtOccurence *last_occ;
505
474
  } FrtPostingList;
506
475
 
507
- extern FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
508
- int term_len, FrtPosting *p);
476
+ extern FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term, int term_len, FrtPosting *p);
509
477
  extern void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos);
510
478
  extern int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2);
511
479
 
@@ -515,8 +483,7 @@ extern int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2);
515
483
  *
516
484
  ****************************************************************************/
517
485
 
518
- typedef struct FrtTVField
519
- {
486
+ typedef struct FrtTVField {
520
487
  int field_num;
521
488
  int size;
522
489
  } FrtTVField;
@@ -527,11 +494,10 @@ typedef struct FrtTVField
527
494
  *
528
495
  ****************************************************************************/
529
496
 
530
- typedef struct FrtTVTerm
531
- {
532
- char *text;
533
- int freq;
534
- int *positions;
497
+ typedef struct FrtTVTerm {
498
+ char *text;
499
+ int freq;
500
+ int *positions;
535
501
  } FrtTVTerm;
536
502
 
537
503
  /****************************************************************************
@@ -541,10 +507,9 @@ typedef struct FrtTVTerm
541
507
  ****************************************************************************/
542
508
 
543
509
  #define FRT_TV_FIELD_INIT_CAPA 8
544
- typedef struct FrtTermVector
545
- {
510
+ typedef struct FrtTermVector {
546
511
  int field_num;
547
- FrtSymbol field;
512
+ ID field;
548
513
  int term_cnt;
549
514
  FrtTVTerm *terms;
550
515
  int offset_cnt;
@@ -563,38 +528,38 @@ extern FrtTVTerm *frt_tv_get_tv_term(FrtTermVector *tv, const char *term);
563
528
  ****************************************************************************/
564
529
 
565
530
  /* * * FrtLazyDocField * * */
566
- typedef struct FrtLazyDocFieldData
567
- {
568
- off_t start;
569
- int length;
570
- char *text;
531
+ typedef struct FrtLazyDocFieldData {
532
+ off_t start;
533
+ int length;
534
+ rb_encoding *encoding;
535
+ FrtCompressionType compression; /* as stored */
536
+ char *text;
571
537
  } FrtLazyDocFieldData;
572
538
 
573
539
  typedef struct FrtLazyDoc FrtLazyDoc;
574
- typedef struct FrtLazyDocField
575
- {
576
- FrtSymbol name;
540
+ typedef struct FrtLazyDocField {
541
+ ID name;
577
542
  FrtLazyDocFieldData *data;
578
543
  FrtLazyDoc *doc;
579
544
  int size; /* number of data elements */
580
545
  int len; /* length of data elements concatenated */
581
- int is_compressed : 2; /* set to 2 after all data is loaded */
546
+ FrtCompressionType compression; /* as configured */
547
+ bool decompressed;
582
548
  } FrtLazyDocField;
583
549
 
584
550
  extern char *frt_lazy_df_get_data(FrtLazyDocField *self, int i);
585
551
  extern void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len);
586
552
 
587
553
  /* * * FrtLazyDoc * * */
588
- struct FrtLazyDoc
589
- {
590
- FrtHash *field_dictionary;
591
- int size;
554
+ struct FrtLazyDoc {
555
+ FrtHash *field_dictionary;
556
+ int size;
592
557
  FrtLazyDocField **fields;
593
- FrtInStream *fields_in;
558
+ FrtInStream *fields_in;
594
559
  };
595
560
 
596
561
  extern void frt_lazy_doc_close(FrtLazyDoc *self);
597
- extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, FrtSymbol field);
562
+ extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, ID field);
598
563
 
599
564
  /****************************************************************************
600
565
  *
@@ -602,8 +567,7 @@ extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, FrtSymbol field);
602
567
  *
603
568
  ****************************************************************************/
604
569
 
605
- typedef struct FrtFieldsReader
606
- {
570
+ typedef struct FrtFieldsReader {
607
571
  int size;
608
572
  FrtFieldInfos *fis;
609
573
  FrtStore *store;
@@ -611,15 +575,13 @@ typedef struct FrtFieldsReader
611
575
  FrtInStream *fdt_in;
612
576
  } FrtFieldsReader;
613
577
 
614
- extern FrtFieldsReader *frt_fr_open(FrtStore *store,
615
- const char *segment, FrtFieldInfos *fis);
578
+ extern FrtFieldsReader *frt_fr_open(FrtStore *store, const char *segment, FrtFieldInfos *fis);
616
579
  extern FrtFieldsReader *frt_fr_clone(FrtFieldsReader *orig);
617
580
  extern void frt_fr_close(FrtFieldsReader *fr);
618
581
  extern FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num);
619
582
  extern FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num);
620
583
  extern FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num);
621
- extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num,
622
- int field_num);
584
+ extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num, int field_num);
623
585
 
624
586
  /****************************************************************************
625
587
  *
@@ -627,18 +589,16 @@ extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num,
627
589
  *
628
590
  ****************************************************************************/
629
591
 
630
- typedef struct FrtFieldsWriter
631
- {
592
+ typedef struct FrtFieldsWriter {
632
593
  FrtFieldInfos *fis;
633
594
  FrtOutStream *fdt_out;
634
595
  FrtOutStream *fdx_out;
635
596
  FrtOutStream *buffer;
636
597
  FrtTVField *tv_fields;
637
- off_t start_ptr;
598
+ off_t start_ptr;
638
599
  } FrtFieldsWriter;
639
600
 
640
- extern FrtFieldsWriter *frt_fw_open(FrtStore *store,
641
- const char *segment, FrtFieldInfos *fis);
601
+ extern FrtFieldsWriter *frt_fw_open(FrtStore *store, const char *segment, FrtFieldInfos *fis);
642
602
  extern void frt_fw_close(FrtFieldsWriter *fw);
643
603
  extern void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc);
644
604
  extern void frt_fw_add_postings(FrtFieldsWriter *fw,
@@ -659,11 +619,10 @@ extern void frt_fw_write_tv_index(FrtFieldsWriter *fw);
659
619
  *
660
620
  ****************************************************************************/
661
621
 
662
- struct FrtDeleter
663
- {
664
- FrtStore *store;
665
- FrtSegmentInfos *sis;
666
- FrtHashSet *pending;
622
+ struct FrtDeleter {
623
+ FrtStore *store;
624
+ FrtSegmentInfos *sis;
625
+ FrtHashSet *pending;
667
626
  };
668
627
 
669
628
  extern FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store);
@@ -681,88 +640,115 @@ extern void frt_deleter_delete_files(FrtDeleter *dlr, char **files, int file_cnt
681
640
  #define FRT_WRITE_LOCK_NAME "write"
682
641
  #define FRT_COMMIT_LOCK_NAME "commit"
683
642
 
684
- struct FrtIndexReader
685
- {
686
- int (*num_docs)(FrtIndexReader *ir);
687
- int (*max_doc)(FrtIndexReader *ir);
688
- FrtDocument *(*get_doc)(FrtIndexReader *ir, int doc_num);
689
- FrtLazyDoc *(*get_lazy_doc)(FrtIndexReader *ir, int doc_num);
690
- frt_uchar *(*get_norms)(FrtIndexReader *ir, int field_num);
691
- frt_uchar *(*get_norms_into)(FrtIndexReader *ir, int field_num,
692
- frt_uchar *buf);
693
- FrtTermEnum *(*terms)(FrtIndexReader *ir, int field_num);
694
- FrtTermEnum *(*terms_from)(FrtIndexReader *ir, int field_num,
695
- const char *term);
696
- int (*doc_freq)(FrtIndexReader *ir, int field_num,
697
- const char *term);
698
- FrtTermDocEnum *(*term_docs)(FrtIndexReader *ir);
699
- FrtTermDocEnum *(*term_positions)(FrtIndexReader *ir);
700
- FrtTermVector *(*term_vector)(FrtIndexReader *ir, int doc_num,
701
- FrtSymbol field);
702
- FrtHash *(*term_vectors)(FrtIndexReader *ir, int doc_num);
703
- bool (*is_deleted)(FrtIndexReader *ir, int doc_num);
704
- bool (*has_deletions)(FrtIndexReader *ir);
705
- void (*acquire_write_lock)(FrtIndexReader *ir);
706
- void (*set_norm_i)(FrtIndexReader *ir, int doc_num,
707
- int field_num, frt_uchar val);
708
- void (*delete_doc_i)(FrtIndexReader *ir, int doc_num);
709
- void (*undelete_all_i)(FrtIndexReader *ir);
710
- void (*set_deleter_i)(FrtIndexReader *ir, FrtDeleter *dlr);
711
- bool (*is_latest_i)(FrtIndexReader *ir);
712
- void (*commit_i)(FrtIndexReader *ir);
713
- void (*close_i)(FrtIndexReader *ir);
714
- int ref_cnt;
715
- FrtDeleter *deleter;
716
- FrtStore *store;
717
- FrtLock *write_lock;
718
- FrtSegmentInfos *sis;
719
- FrtFieldInfos *fis;
720
- FrtHash *cache;
721
- FrtHash *field_index_cache;
722
- frt_mutex_t field_index_mutex;
723
- frt_uchar *fake_norms;
724
- frt_mutex_t mutex;
725
- bool has_changes : 1;
726
- bool is_stale : 1;
727
- bool is_owner : 1;
643
+ typedef enum {
644
+ FRT_INDEX_READER,
645
+ FRT_SEGMENT_READER,
646
+ FRT_MULTI_READER
647
+ } frt_index_reader_t;
648
+
649
+ struct FrtIndexReader {
650
+ int type;
651
+ int (*num_docs)(FrtIndexReader *ir);
652
+ int (*max_doc)(FrtIndexReader *ir);
653
+ FrtDocument *(*get_doc)(FrtIndexReader *ir, int doc_num);
654
+ FrtLazyDoc *(*get_lazy_doc)(FrtIndexReader *ir, int doc_num);
655
+ frt_uchar *(*get_norms)(FrtIndexReader *ir, int field_num);
656
+ frt_uchar *(*get_norms_into)(FrtIndexReader *ir, int field_num, frt_uchar *buf);
657
+ FrtTermEnum *(*terms)(FrtIndexReader *ir, int field_num);
658
+ FrtTermEnum *(*terms_from)(FrtIndexReader *ir, int field_num, const char *term);
659
+ int (*doc_freq)(FrtIndexReader *ir, int field_num, const char *term);
660
+ FrtTermDocEnum *(*term_docs)(FrtIndexReader *ir);
661
+ FrtTermDocEnum *(*term_positions)(FrtIndexReader *ir);
662
+ FrtTermVector *(*term_vector)(FrtIndexReader *ir, int doc_num, ID field);
663
+ FrtHash *(*term_vectors)(FrtIndexReader *ir, int doc_num);
664
+ bool (*is_deleted)(FrtIndexReader *ir, int doc_num);
665
+ bool (*has_deletions)(FrtIndexReader *ir);
666
+ void (*acquire_write_lock)(FrtIndexReader *ir);
667
+ void (*set_norm_i)(FrtIndexReader *ir, int doc_num, int field_num, frt_uchar val);
668
+ void (*delete_doc_i)(FrtIndexReader *ir, int doc_num);
669
+ void (*undelete_all_i)(FrtIndexReader *ir);
670
+ void (*set_deleter_i)(FrtIndexReader *ir, FrtDeleter *dlr);
671
+ bool (*is_latest_i)(FrtIndexReader *ir);
672
+ void (*commit_i)(FrtIndexReader *ir);
673
+ void (*close_i)(FrtIndexReader *ir);
674
+ int ref_cnt;
675
+ FrtDeleter *deleter;
676
+ FrtStore *store;
677
+ FrtLock *write_lock;
678
+ FrtSegmentInfos *sis;
679
+ FrtFieldInfos *fis;
680
+ FrtHash *cache;
681
+ FrtHash *field_index_cache;
682
+ frt_mutex_t field_index_mutex;
683
+ frt_uchar *fake_norms;
684
+ frt_mutex_t mutex;
685
+ bool has_changes : 1;
686
+ bool is_stale : 1;
687
+ bool is_owner : 1;
688
+ VALUE rir;
728
689
  };
729
690
 
730
- extern FrtIndexReader *frt_ir_open(FrtStore *store);
691
+ extern FrtIndexReader *frt_ir_open(FrtIndexReader *ir, FrtStore *store);
731
692
  extern void frt_ir_close(FrtIndexReader *ir);
732
693
  extern void frt_ir_commit(FrtIndexReader *ir);
733
694
  extern void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num);
734
695
  extern void frt_ir_undelete_all(FrtIndexReader *ir);
735
- extern int frt_ir_doc_freq(FrtIndexReader *ir, FrtSymbol field, const char *term);
736
- extern void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, FrtSymbol field, frt_uchar val);
696
+ extern int frt_ir_doc_freq(FrtIndexReader *ir, ID field, const char *term);
697
+ extern void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, ID field, frt_uchar val);
737
698
  extern frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num);
738
- extern frt_uchar *frt_ir_get_norms(FrtIndexReader *ir, FrtSymbol field);
739
- extern frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, FrtSymbol field, frt_uchar *buf);
740
- extern FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir, FrtSymbol field, const char *term);
741
- extern FrtTermEnum *frt_ir_terms(FrtIndexReader *ir, FrtSymbol field);
742
- extern FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir, FrtSymbol field, const char *t);
743
- extern FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir, FrtSymbol field, const char *term);
744
- extern FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, FrtSymbol field, const char *t);
699
+ extern frt_uchar *frt_ir_get_norms(FrtIndexReader *ir, ID field);
700
+ extern frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, ID field, frt_uchar *buf);
701
+ extern FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir, ID field, const char *term);
702
+ extern FrtTermEnum *frt_ir_terms(FrtIndexReader *ir, ID field);
703
+ extern FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir, ID field, const char *t);
704
+ extern FrtTermDocEnum *ir_term_docs_for(FrtIndexReader *ir, ID field, const char *term);
705
+ extern FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir, ID field, const char *t);
745
706
  extern void frt_ir_add_cache(FrtIndexReader *ir);
746
707
  extern bool frt_ir_is_latest(FrtIndexReader *ir);
747
708
 
709
+ /****************************************************************************
710
+ * FrtSegmentReader
711
+ ****************************************************************************/
712
+
713
+ struct FrtSegmentReader {
714
+ FrtIndexReader ir;
715
+ FrtSegmentInfo *si;
716
+ char *segment;
717
+ FrtFieldsReader *fr;
718
+ FrtBitVector *deleted_docs;
719
+ FrtInStream *frq_in;
720
+ FrtInStream *prx_in;
721
+ FrtSegmentFieldIndex *sfi;
722
+ FrtTermInfosReader *tir;
723
+ frt_thread_key_t thread_fr;
724
+ void **fr_bucket;
725
+ FrtHash *norms;
726
+ FrtStore *cfs_store;
727
+ bool deleted_docs_dirty : 1;
728
+ bool undelete_all : 1;
729
+ bool norms_dirty : 1;
730
+ };
731
+
732
+ extern FrtSegmentReader *frt_sr_alloc();
733
+
748
734
  /****************************************************************************
749
735
  * FrtMultiReader
750
736
  ****************************************************************************/
751
737
 
752
738
  struct FrtMultiReader {
753
739
  FrtIndexReader ir;
754
- int max_doc;
755
- int num_docs_cache;
756
- int r_cnt;
757
- int *starts;
740
+ int max_doc;
741
+ int num_docs_cache;
742
+ int r_cnt;
743
+ int *starts;
758
744
  FrtIndexReader **sub_readers;
759
- FrtHash *norms_cache;
760
- bool has_deletions : 1;
761
- int **field_num_map;
745
+ FrtHash *norms_cache;
746
+ bool has_deletions : 1;
747
+ int **field_num_map;
762
748
  };
763
749
 
764
750
  extern int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num);
765
- extern FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt);
751
+ extern FrtIndexReader *frt_mr_open(FrtIndexReader *ir, FrtIndexReader **sub_readers, const int r_cnt);
766
752
 
767
753
  /****************************************************************************
768
754
  *
@@ -770,16 +756,15 @@ extern FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt
770
756
  *
771
757
  ****************************************************************************/
772
758
 
773
- typedef struct FrtFieldInverter
774
- {
775
- FrtHash *plists;
776
- frt_uchar *norms;
759
+ typedef struct FrtFieldInverter {
760
+ FrtHash *plists;
761
+ frt_uchar *norms;
777
762
  FrtFieldInfo *fi;
778
- int length;
779
- bool is_tokenized : 1;
780
- bool store_term_vector : 1;
781
- bool store_offsets : 1;
782
- bool has_norms : 1;
763
+ int length;
764
+ bool is_tokenized : 1;
765
+ bool store_term_vector : 1;
766
+ bool store_offsets : 1;
767
+ bool has_norms : 1;
783
768
  } FrtFieldInverter;
784
769
 
785
770
  /****************************************************************************
@@ -791,18 +776,17 @@ typedef struct FrtFieldInverter
791
776
  #define DW_OFFSET_INIT_CAPA 512
792
777
  typedef struct FrtIndexWriter FrtIndexWriter;
793
778
 
794
- typedef struct FrtDocWriter
795
- {
796
- FrtStore *store;
797
- FrtSegmentInfo *si;
798
- FrtFieldInfos *fis;
779
+ typedef struct FrtDocWriter {
780
+ FrtStore *store;
781
+ FrtSegmentInfo *si;
782
+ FrtFieldInfos *fis;
799
783
  FrtFieldsWriter *fw;
800
- FrtMemoryPool *mp;
801
- FrtAnalyzer *analyzer;
802
- FrtHash *curr_plists;
803
- FrtHash *fields;
804
- FrtSimilarity *similarity;
805
- FrtOffset *offsets;
784
+ FrtMemoryPool *mp;
785
+ FrtAnalyzer *analyzer;
786
+ FrtHash *curr_plists;
787
+ FrtHash *fields;
788
+ FrtSimilarity *similarity;
789
+ FrtOffset *offsets;
806
790
  int offsets_size;
807
791
  int offsets_capa;
808
792
  int doc_num;
@@ -817,9 +801,7 @@ extern void frt_dw_close(FrtDocWriter *dw);
817
801
  extern void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc);
818
802
  extern void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si);
819
803
  /* For testing. need to remove somehow. FIXME */
820
- extern FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
821
- FrtFieldInverter *fld_inv,
822
- FrtDocField *df);
804
+ extern FrtHash *frt_dw_invert_field(FrtDocWriter *dw, FrtFieldInverter *fld_inv, FrtDocField *df);
823
805
  extern FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi);
824
806
  extern void frt_dw_reset_postings(FrtHash *postings);
825
807
 
@@ -829,25 +811,25 @@ extern void frt_dw_reset_postings(FrtHash *postings);
829
811
  *
830
812
  ****************************************************************************/
831
813
 
832
- struct FrtIndexWriter
833
- {
834
- FrtConfig config;
835
- frt_mutex_t mutex;
836
- FrtStore *store;
837
- FrtAnalyzer *analyzer;
814
+ struct FrtIndexWriter {
815
+ FrtConfig config;
816
+ frt_mutex_t mutex;
817
+ FrtStore *store;
818
+ FrtAnalyzer *analyzer;
838
819
  FrtSegmentInfos *sis;
839
- FrtFieldInfos *fis;
840
- FrtDocWriter *dw;
841
- FrtSimilarity *similarity;
842
- FrtLock *write_lock;
843
- FrtDeleter *deleter;
820
+ FrtFieldInfos *fis;
821
+ FrtDocWriter *dw;
822
+ FrtSimilarity *similarity;
823
+ FrtLock *write_lock;
824
+ FrtDeleter *deleter;
844
825
  };
845
826
 
846
827
  extern void frt_index_create(FrtStore *store, FrtFieldInfos *fis);
847
828
  extern bool frt_index_is_locked(FrtStore *store);
848
- extern FrtIndexWriter *frt_iw_open(FrtStore *store, FrtAnalyzer *analyzer, const FrtConfig *config);
849
- extern void frt_iw_delete_term(FrtIndexWriter *iw, FrtSymbol field, const char *term);
850
- extern void frt_iw_delete_terms(FrtIndexWriter *iw, FrtSymbol field, char **terms, const int term_cnt);
829
+ extern FrtIndexWriter *frt_iw_alloc();
830
+ extern FrtIndexWriter *frt_iw_open(FrtIndexWriter *, FrtStore *store, FrtAnalyzer *analyzer, const FrtConfig *config);
831
+ extern void frt_iw_delete_term(FrtIndexWriter *iw, ID field, const char *term);
832
+ extern void frt_iw_delete_terms(FrtIndexWriter *iw, ID field, char **terms, const int term_cnt);
851
833
  extern void frt_iw_close(FrtIndexWriter *iw);
852
834
  extern void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc);
853
835
  extern int frt_iw_doc_count(FrtIndexWriter *iw);
@@ -862,17 +844,16 @@ extern void frt_iw_add_readers(FrtIndexWriter *iw, FrtIndexReader **readers, con
862
844
  ****************************************************************************/
863
845
 
864
846
  #define FRT_CW_INIT_CAPA 16
865
- typedef struct FrtCWFileEntry
866
- {
867
- char *name;
847
+ typedef struct FrtCWFileEntry {
848
+ char *name;
868
849
  off_t dir_offset;
869
850
  off_t data_offset;
870
851
  } FrtCWFileEntry;
871
852
 
872
853
  typedef struct FrtCompoundWriter {
873
- FrtStore *store;
874
- const char *name;
875
- FrtHashSet *ids;
854
+ FrtStore *store;
855
+ const char *name;
856
+ FrtHashSet *ids;
876
857
  FrtCWFileEntry *file_entries;
877
858
  } FrtCompoundWriter;
878
859