isomorfeus-ferret 0.13.11 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,8 @@
1
1
  #include "frt_index.h"
2
2
  #include "isomorfeus_ferret.h"
3
- #include <ruby.h>
4
-
5
- // #undef close
6
3
 
7
4
  VALUE mIndex;
8
5
 
9
- VALUE cFieldInfo;
10
6
  VALUE cFieldInfos;
11
7
 
12
8
  VALUE cTVOffsets;
@@ -16,16 +12,15 @@ VALUE cTermVector;
16
12
  VALUE cTermEnum;
17
13
  VALUE cTermDocEnum;
18
14
 
19
- VALUE cLazyDoc;
20
- VALUE cLazyDocData;
21
15
  VALUE cIndexWriter;
22
16
  VALUE cIndexReader;
23
17
 
24
18
  VALUE sym_analyzer;
19
+ VALUE sym_boost;
20
+
25
21
  static VALUE sym_close_dir;
26
22
  static VALUE sym_create;
27
23
  static VALUE sym_create_if_missing;
28
-
29
24
  static VALUE sym_chunk_size;
30
25
  static VALUE sym_max_buffer_memory;
31
26
  static VALUE sym_index_interval;
@@ -35,338 +30,25 @@ static VALUE sym_max_buffered_docs;
35
30
  static VALUE sym_max_merge_docs;
36
31
  static VALUE sym_max_field_length;
37
32
  static VALUE sym_use_compound_file;
38
-
39
- static VALUE sym_boost;
40
33
  static VALUE sym_field_infos;
41
34
 
42
- static VALUE sym_store;
43
- static VALUE sym_index;
44
- static VALUE sym_term_vector;
45
-
46
- static VALUE sym_brotli;
47
- static VALUE sym_bz2;
48
- static VALUE sym_lz4;
49
- static VALUE sym_compression;
50
-
51
- static VALUE sym_untokenized;
52
- static VALUE sym_omit_norms;
53
- static VALUE sym_untokenized_omit_norms;
54
-
55
- static VALUE sym_with_positions;
56
- static VALUE sym_with_offsets;
57
- static VALUE sym_with_positions_offsets;
58
-
59
35
  static ID fsym_content;
60
-
61
36
  static ID id_term;
62
- static ID id_fields;
63
37
  static ID id_fld_num_map;
64
38
  static ID id_field_num;
65
39
  static ID id_boost;
66
40
 
41
+ extern VALUE sym_each;
67
42
  extern rb_encoding *utf8_encoding;
68
- extern void frb_set_term(VALUE rterm, FrtTerm *t);
43
+ extern void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost);
69
44
  extern FrtAnalyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
70
45
  extern VALUE frb_get_analyzer(FrtAnalyzer *a);
46
+ extern VALUE frb_get_field_info(FrtFieldInfo *fi);
47
+ extern VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc);
48
+ extern void frb_set_term(VALUE rterm, FrtTerm *t);
71
49
 
72
- /****************************************************************************
73
- *
74
- * FieldInfo Methods
75
- *
76
- ****************************************************************************/
77
-
78
- static void frb_fi_free(void *p) {
79
- frt_fi_deref((FrtFieldInfo *)p);
80
- }
81
-
82
- static void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost) {
83
- VALUE v;
84
- Check_Type(roptions, T_HASH);
85
- v = rb_hash_aref(roptions, sym_boost);
86
- if (Qnil != v) {
87
- *boost = (float)NUM2DBL(v);
88
- } else {
89
- *boost = 1.0f;
90
- }
91
- v = rb_hash_aref(roptions, sym_store);
92
- if (Qnil != v) Check_Type(v, T_SYMBOL);
93
- if (v == sym_no || v == sym_false || v == Qfalse) {
94
- *store = FRT_STORE_NO;
95
- } else if (v == sym_yes || v == sym_true || v == Qtrue) {
96
- *store = FRT_STORE_YES;
97
- } else if (v == Qnil) {
98
- /* leave as default */
99
- } else {
100
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]",
101
- rb_id2name(SYM2ID(v)));
102
- }
103
-
104
- v = rb_hash_aref(roptions, sym_compression);
105
- if (Qnil != v) Check_Type(v, T_SYMBOL);
106
- if (v == sym_no || v == sym_false || v == Qfalse) {
107
- *compression = FRT_COMPRESSION_NONE;
108
- } else if (v == sym_yes || v == sym_true || v == Qtrue || v == sym_brotli) {
109
- *compression = FRT_COMPRESSION_BROTLI;
110
- } else if (v == sym_bz2) {
111
- *compression = FRT_COMPRESSION_BZ2;
112
- } else if (v == sym_lz4) {
113
- *compression = FRT_COMPRESSION_LZ4;
114
- } else if (v == Qnil) {
115
- /* leave as default */
116
- } else {
117
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]",
118
- rb_id2name(SYM2ID(v)));
119
- }
120
-
121
- v = rb_hash_aref(roptions, sym_index);
122
- if (Qnil != v) Check_Type(v, T_SYMBOL);
123
- if (v == sym_no || v == sym_false || v == Qfalse) {
124
- *index = FRT_INDEX_NO;
125
- } else if (v == sym_yes || v == sym_true || v == Qtrue) {
126
- *index = FRT_INDEX_YES;
127
- } else if (v == sym_untokenized) {
128
- *index = FRT_INDEX_UNTOKENIZED;
129
- } else if (v == sym_omit_norms) {
130
- *index = FRT_INDEX_YES_OMIT_NORMS;
131
- } else if (v == sym_untokenized_omit_norms) {
132
- *index = FRT_INDEX_UNTOKENIZED_OMIT_NORMS;
133
- } else if (v == Qnil) {
134
- /* leave as default */
135
- } else {
136
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :index. Please choose from [:no, :yes, :untokenized, "
137
- ":omit_norms, :untokenized_omit_norms]", rb_id2name(SYM2ID(v)));
138
- }
139
-
140
- v = rb_hash_aref(roptions, sym_term_vector);
141
- if (Qnil != v) Check_Type(v, T_SYMBOL);
142
- if (v == sym_no || v == sym_false || v == Qfalse) {
143
- *term_vector = FRT_TERM_VECTOR_NO;
144
- } else if (v == sym_yes || v == sym_true || v == Qtrue) {
145
- *term_vector = FRT_TERM_VECTOR_YES;
146
- } else if (v == sym_with_positions) {
147
- *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS;
148
- } else if (v == sym_with_offsets) {
149
- *term_vector = FRT_TERM_VECTOR_WITH_OFFSETS;
150
- } else if (v == sym_with_positions_offsets) {
151
- *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
152
- } else if (v == Qnil) {
153
- /* leave as default */
154
- } else {
155
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :term_vector. Please choose from [:no, :yes, "
156
- ":with_positions, :with_offsets, :with_positions_offsets]", rb_id2name(SYM2ID(v)));
157
- }
158
- }
159
-
160
- static size_t frb_fi_size(const void *p) {
161
- return sizeof(FrtFieldInfo);
162
- (void)p;
163
- }
164
-
165
- const rb_data_type_t frb_field_info_t = {
166
- .wrap_struct_name = "FrbFieldInfo",
167
- .function = {
168
- .dmark = NULL,
169
- .dfree = frb_fi_free,
170
- .dsize = frb_fi_size,
171
- .dcompact = NULL,
172
- .reserved = {0},
173
- },
174
- .parent = NULL,
175
- .data = NULL,
176
- .flags = RUBY_TYPED_FREE_IMMEDIATELY
177
- };
178
-
179
- static VALUE frb_get_field_info(FrtFieldInfo *fi) {
180
- if (fi) {
181
- if (fi->rfi == 0 || fi->rfi == Qnil) {
182
- fi->rfi = TypedData_Wrap_Struct(cFieldInfo, &frb_field_info_t, fi);
183
- FRT_REF(fi);
184
- }
185
- return fi->rfi;
186
- }
187
- return Qnil;
188
- }
189
-
190
- /*
191
- * call-seq:
192
- * FieldInfo.new(name, options = {}) -> field_info
193
- *
194
- * Create a new FieldInfo object with the name +name+ and the properties
195
- * specified in +options+. The available options are [:store, :compression,
196
- * :index, :term_vector, :boost]. See the description of FieldInfo for more
197
- * information on these properties.
198
- */
199
- static VALUE frb_fi_alloc(VALUE rclass) {
200
- FrtFieldInfo *fi = frt_fi_alloc();
201
- return TypedData_Wrap_Struct(rclass, &frb_field_info_t, fi);
202
- }
203
-
204
- static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
205
- VALUE roptions, rname;
206
- FrtFieldInfo *fi;
207
- TypedData_Get_Struct(self, FrtFieldInfo, &frb_field_info_t, fi);
208
- FrtStoreValue store = FRT_STORE_YES;
209
- FrtCompressionType compression = FRT_COMPRESSION_NONE;
210
- FrtIndexValue index = FRT_INDEX_YES;
211
- FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
212
- float boost = 1.0f;
213
-
214
- rb_scan_args(argc, argv, "11", &rname, &roptions);
215
- if (argc > 1) {
216
- frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
217
- }
218
- fi = frt_fi_init(fi, frb_field(rname), store, compression, index, term_vector);
219
- fi->boost = boost;
220
- fi->rfi = self;
221
- return self;
222
- }
223
-
224
- /*
225
- * call-seq:
226
- * fi.name -> symbol
227
- *
228
- * Return the name of the field
229
- */
230
- static VALUE frb_fi_name(VALUE self) {
231
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
232
- return ID2SYM(fi->name);
233
- }
234
-
235
- /*
236
- * call-seq:
237
- * fi.stored? -> bool
238
- *
239
- * Return true if the field is stored in the index.
240
- */
241
- static VALUE frb_fi_is_stored(VALUE self) {
242
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
243
- return fi_is_stored(fi) ? Qtrue : Qfalse;
244
- }
245
-
246
- /*
247
- * call-seq:
248
- * fi.compressed? -> bool
249
- *
250
- * Return true if the field is stored in the index in compressed format.
251
- */
252
- static VALUE frb_fi_is_compressed(VALUE self) {
253
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
254
- return fi_is_compressed(fi) ? Qtrue : Qfalse;
255
- }
256
-
257
- /*
258
- * call-seq:
259
- * fi.indexed? -> bool
260
- *
261
- * Return true if the field is indexed, ie searchable in the index.
262
- */
263
- static VALUE frb_fi_is_indexed(VALUE self) {
264
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
265
- return fi_is_indexed(fi) ? Qtrue : Qfalse;
266
- }
267
-
268
- /*
269
- * call-seq:
270
- * fi.tokenized? -> bool
271
- *
272
- * Return true if the field is tokenized. Tokenizing is the process of
273
- * breaking the field up into tokens. That is "the quick brown fox" becomes:
274
- *
275
- * ["the", "quick", "brown", "fox"]
276
- *
277
- * A field can only be tokenized if it is indexed.
278
- */
279
- static VALUE frb_fi_is_tokenized(VALUE self) {
280
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
281
- return fi_is_tokenized(fi) ? Qtrue : Qfalse;
282
- }
283
-
284
- /*
285
- * call-seq:
286
- * fi.omit_norms? -> bool
287
- *
288
- * Return true if the field omits the norm file. The norm file is the file
289
- * used to store the field boosts for an indexed field. If you do not boost
290
- * any fields, and you can live without scoring based on field length then
291
- * you can omit the norms file. This will give the index a slight performance
292
- * boost and it will use less memory, especially for indexes which have a
293
- * large number of documents.
294
- */
295
- static VALUE frb_fi_omit_norms(VALUE self) {
296
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
297
- return fi_omit_norms(fi) ? Qtrue : Qfalse;
298
- }
299
-
300
- /*
301
- * call-seq:
302
- * fi.store_term_vector? -> bool
303
- *
304
- * Return true if the term-vectors are stored for this field.
305
- */
306
- static VALUE frb_fi_store_term_vector(VALUE self) {
307
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
308
- return fi_store_term_vector(fi) ? Qtrue : Qfalse;
309
- }
310
-
311
- /*
312
- * call-seq:
313
- * fi.store_positions? -> bool
314
- *
315
- * Return true if positions are stored with the term-vectors for this field.
316
- */
317
- static VALUE frb_fi_store_positions(VALUE self) {
318
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
319
- return fi_store_positions(fi) ? Qtrue : Qfalse;
320
- }
321
-
322
- /*
323
- * call-seq:
324
- * fi.store_offsets? -> bool
325
- *
326
- * Return true if offsets are stored with the term-vectors for this field.
327
- */
328
- static VALUE frb_fi_store_offsets(VALUE self) {
329
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
330
- return fi_store_offsets(fi) ? Qtrue : Qfalse;
331
- }
332
-
333
- /*
334
- * call-seq:
335
- * fi.has_norms? -> bool
336
- *
337
- * Return true if this field has a norms file. This is the same as calling;
338
- *
339
- * fi.indexed? and not fi.omit_norms?
340
- */
341
- static VALUE frb_fi_has_norms(VALUE self) {
342
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
343
- return fi_has_norms(fi) ? Qtrue : Qfalse;
344
- }
345
-
346
- /*
347
- * call-seq:
348
- * fi.boost -> boost
349
- *
350
- * Return the default boost for this field
351
- */
352
- static VALUE frb_fi_boost(VALUE self) {
353
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
354
- return rb_float_new((double)fi->boost);
355
- }
356
-
357
- /*
358
- * call-seq:
359
- * fi.to_s -> string
360
- *
361
- * Return a string representation of the FieldInfo object.
362
- */
363
- static VALUE frb_fi_to_s(VALUE self) {
364
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
365
- char *fi_s = frt_fi_to_s(fi);
366
- VALUE rfi_s = rb_str_new2(fi_s);
367
- free(fi_s);
368
- return rfi_s;
369
- }
50
+ extern void Init_FieldInfo(void);
51
+ extern void Init_LazyDoc(void);
370
52
 
371
53
  /****************************************************************************
372
54
  *
@@ -1987,125 +1669,6 @@ frb_iw_set_use_compound_file(VALUE self, VALUE rval)
1987
1669
  return rval;
1988
1670
  }
1989
1671
 
1990
- /****************************************************************************
1991
- *
1992
- * LazyDoc Methods
1993
- *
1994
- ****************************************************************************/
1995
-
1996
- static void frb_lzd_data_free(void *p) {
1997
- frt_lazy_doc_close((FrtLazyDoc *)p);
1998
- }
1999
-
2000
- static size_t frb_lazy_doc_size(const void *p) {
2001
- return sizeof(FrtLazyDoc);
2002
- (void)p;
2003
- }
2004
-
2005
- const rb_data_type_t frb_lazy_doc_t = {
2006
- .wrap_struct_name = "FrbLazyDoc",
2007
- .function = {
2008
- .dmark = NULL,
2009
- .dfree = frb_lzd_data_free,
2010
- .dsize = frb_lazy_doc_size,
2011
- .dcompact = NULL,
2012
- .reserved = {0},
2013
- },
2014
- .parent = NULL,
2015
- .data = NULL,
2016
- .flags = RUBY_TYPED_FREE_IMMEDIATELY
2017
- };
2018
-
2019
- static VALUE frb_lzd_alloc(VALUE klass) {
2020
- FrtLazyDoc *ld = FRT_ALLOC(FrtLazyDoc);
2021
- return TypedData_Wrap_Struct(klass, &frb_lazy_doc_t, ld);
2022
- }
2023
-
2024
- static VALUE frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
2025
- VALUE rdata = Qnil;
2026
- if (lazy_df) {
2027
- if (lazy_df->size == 1) {
2028
- char *data = frt_lazy_df_get_data(lazy_df, 0);
2029
- rdata = rb_str_new(data, lazy_df->data[0].length);
2030
- rb_enc_associate(rdata, lazy_df->data[0].encoding);
2031
- } else {
2032
- int i;
2033
- VALUE rstr;
2034
- rdata = rb_ary_new2(lazy_df->size);
2035
- for (i = 0; i < lazy_df->size; i++) {
2036
- char *data = frt_lazy_df_get_data(lazy_df, i);
2037
- rstr = rb_str_new(data, lazy_df->data[i].length);
2038
- rb_enc_associate(rstr, lazy_df->data[i].encoding);
2039
- rb_ary_store(rdata, i, rstr);
2040
- }
2041
- }
2042
- rb_hash_aset(self, rkey, rdata);
2043
- }
2044
- return rdata;
2045
- }
2046
-
2047
- /*
2048
- * call-seq:
2049
- * lazy_doc.default(key) -> string
2050
- *
2051
- * This method is used internally to lazily load fields. You should never
2052
- * really need to call it yourself.
2053
- */
2054
- static VALUE frb_lzd_default(VALUE self, VALUE rkey) {
2055
- FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
2056
- ID field = frb_field(rkey);
2057
- VALUE rfield = ID2SYM(field);
2058
-
2059
- return frb_lazy_df_load(self, rfield, frt_lazy_doc_get(lazy_doc, field));
2060
- }
2061
-
2062
- /*
2063
- * call-seq:
2064
- * lazy_doc.fields -> array of available fields
2065
- *
2066
- * Returns the list of fields stored for this particular document. If you try
2067
- * to access any of these fields in the document the field will be loaded.
2068
- * Try to access any other field an nil will be returned.
2069
- */
2070
- static VALUE frb_lzd_fields(VALUE self) {
2071
- return rb_ivar_get(self, id_fields);
2072
- }
2073
-
2074
- /*
2075
- * call-seq:
2076
- * lazy_doc.load -> lazy_doc
2077
- *
2078
- * Load all unloaded fields in the document from the index.
2079
- */
2080
- static VALUE frb_lzd_load(VALUE self) {
2081
- FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
2082
- int i;
2083
- for (i = 0; i < lazy_doc->size; i++) {
2084
- FrtLazyDocField *lazy_df = lazy_doc->fields[i];
2085
- frb_lazy_df_load(self, ID2SYM(lazy_df->name), lazy_df);
2086
- }
2087
- return self;
2088
- }
2089
-
2090
- VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc) {
2091
- int i;
2092
- VALUE rfields = rb_ary_new2(lazy_doc->size);
2093
-
2094
- VALUE self, rdata;
2095
- self = rb_hash_new();
2096
- OBJSETUP(self, cLazyDoc, T_HASH);
2097
-
2098
- rdata = TypedData_Wrap_Struct(cLazyDocData, &frb_lazy_doc_t, lazy_doc);
2099
- rb_ivar_set(self, id_data, rdata);
2100
-
2101
- for (i = 0; i < lazy_doc->size; i++) {
2102
- rb_ary_store(rfields, i, ID2SYM(lazy_doc->fields[i]->name));
2103
- }
2104
- rb_ivar_set(self, id_fields, rfields);
2105
-
2106
- return self;
2107
- }
2108
-
2109
1672
  /****************************************************************************
2110
1673
  *
2111
1674
  * IndexReader Methods
@@ -2743,190 +2306,39 @@ frb_ir_tk_fields(VALUE self)
2743
2306
  * Returns the current version of the index reader.
2744
2307
  */
2745
2308
  static VALUE
2746
- frb_ir_version(VALUE self)
2747
- {
2309
+ frb_ir_version(VALUE self) {
2748
2310
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2749
2311
  return ULL2NUM(ir->sis->version);
2750
2312
  }
2751
2313
 
2314
+ static VALUE frb_ir_to_enum(VALUE self) {
2315
+ return rb_enumeratorize(self, sym_each, 0, NULL);
2316
+ }
2317
+
2318
+ static VALUE frb_ir_each(VALUE self) {
2319
+ FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2320
+ if (rb_block_given_p()) {
2321
+ long i;
2322
+ long max_doc = ir->max_doc(ir);
2323
+ VALUE rld;
2324
+ for (i = 0; i < max_doc; i++) {
2325
+ if (ir->is_deleted(ir, i)) continue;
2326
+ rld = frb_get_lazy_doc(ir->get_lazy_doc(ir, i));
2327
+ rb_yield(rld);
2328
+ }
2329
+ return self;
2330
+ } else {
2331
+ return frb_ir_to_enum(self);
2332
+ }
2333
+
2334
+ }
2335
+
2752
2336
  /****************************************************************************
2753
2337
  *
2754
2338
  * Init Functions
2755
2339
  *
2756
2340
  ****************************************************************************/
2757
2341
 
2758
-
2759
- /*
2760
- * Document-class: Ferret::Index::FieldInfo
2761
- *
2762
- * == Summary
2763
- *
2764
- * The FieldInfo class is the field descriptor for the index. It specifies
2765
- * whether a field is compressed or not or whether it should be indexed and
2766
- * tokenized. Every field has a name which must be a symbol. There are three
2767
- * properties that you can set, +:store+, +:index+ and +:term_vector+. You
2768
- * can also set the default +:boost+ for a field as well.
2769
- *
2770
- * == Properties
2771
- *
2772
- * === :store
2773
- *
2774
- * The +:store+ property allows you to specify how a field is stored. You can
2775
- * leave a field unstored (+:no+), store it in it's original format (+:yes+)
2776
- * or store it in compressed format (+:compressed+). By default the document
2777
- * is stored in its original format. If the field is large and it is stored
2778
- * elsewhere where it is easily accessible you might want to leave it
2779
- * unstored. This will keep the index size a lot smaller and make the
2780
- * indexing process a lot faster. For example, you should probably leave the
2781
- * +:content+ field unstored when indexing all the documents in your
2782
- * file-system.
2783
- *
2784
- * === :index
2785
- *
2786
- * The +:index+ property allows you to specify how a field is indexed. A
2787
- * field must be indexed to be searchable. However, a field doesn't need to
2788
- * be indexed to be store in the Ferret index. You may want to use the index
2789
- * as a simple database and store things like images or MP3s in the index. By
2790
- * default each field is indexed and tokenized (split into tokens) (+:yes+).
2791
- * If you don't want to index the field use +:no+. If you want the field
2792
- * indexed but not tokenized, use +:untokenized+. Do this for the fields you
2793
- * wish to sort by. There are two other values for +:index+; +:omit_norms+
2794
- * and +:untokenized_omit_norms+. These values correspond to +:yes+ and
2795
- * +:untokenized+ respectively and are useful if you are not boosting any
2796
- * fields and you'd like to speed up the index. The norms file is the file
2797
- * which contains the boost values for each document for a particular field.
2798
- *
2799
- * === :term_vector
2800
- *
2801
- * See TermVector for a description of term-vectors. You can specify whether
2802
- * or not you would like to store term-vectors. The available options are
2803
- * +:no+, +:yes+, +:with_positions+, +:with_offsets+ and
2804
- * +:with_positions_offsets+. Note that you need to store the positions to
2805
- * associate offsets with individual terms in the term_vector.
2806
- *
2807
- * == Property Table
2808
- *
2809
- * Property Value Description
2810
- * ------------------------------------------------------------------------
2811
- * :store | :no | Don't store field
2812
- * | |
2813
- * | :yes (default) | Store field in its original
2814
- * | | format. Use this value if you
2815
- * | | want to highlight matches.
2816
- * | | or print match excerpts a la
2817
- * | | Google search.
2818
- * -------------|-------------------------|------------------------------
2819
- * :compression | :no (default) | Don't compress stored field
2820
- * | |
2821
- * | :brotli | Compress field using Brotli
2822
- * | |
2823
- * | :bz2 | Compress field using BZip2
2824
- * | |
2825
- * | :lz4 | Compress field using LZ4
2826
- * -------------|-------------------------|------------------------------
2827
- * :index | :no | Do not make this field
2828
- * | | searchable.
2829
- * | |
2830
- * | :yes (default) | Make this field searchable and
2831
- * | | tokenized its contents.
2832
- * | |
2833
- * | :untokenized | Make this field searchable but
2834
- * | | do not tokenize its contents.
2835
- * | | use this value for fields you
2836
- * | | wish to sort by.
2837
- * | |
2838
- * | :omit_norms | Same as :yes except omit the
2839
- * | | norms file. The norms file can
2840
- * | | be omitted if you don't boost
2841
- * | | any fields and you don't need
2842
- * | | scoring based on field length.
2843
- * | |
2844
- * | :untokenized_omit_norms | Same as :untokenized except omit
2845
- * | | the norms file. Norms files can
2846
- * | | be omitted if you don't boost
2847
- * | | any fields and you don't need
2848
- * | | scoring based on field length.
2849
- * | |
2850
- * -------------|-------------------------|------------------------------
2851
- * :term_vector | :no | Don't store term-vectors
2852
- * | |
2853
- * | :yes | Store term-vectors without
2854
- * | | storing positions or offsets.
2855
- * | |
2856
- * | :with_positions | Store term-vectors with
2857
- * | | positions.
2858
- * | |
2859
- * | :with_offsets | Store term-vectors with
2860
- * | | offsets.
2861
- * | |
2862
- * | :with_positions_offsets | Store term-vectors with
2863
- * | (default) | positions and offsets.
2864
- * -------------|-------------------------|------------------------------
2865
- * :boost | Float | The boost property is used to
2866
- * | | set the default boost for a
2867
- * | | field. This boost value will
2868
- * | | used for all instances of the
2869
- * | | field in the index unless
2870
- * | | otherwise specified when you
2871
- * | | create the field. All values
2872
- * | | should be positive.
2873
- * | |
2874
- *
2875
- * == Examples
2876
- *
2877
- * fi = FieldInfo.new(:title, :index => :untokenized, :term_vector => :no,
2878
- * :boost => 10.0)
2879
- *
2880
- * fi = FieldInfo.new(:content)
2881
- *
2882
- * fi = FieldInfo.new(:created_on, :index => :untokenized_omit_norms,
2883
- * :term_vector => :no)
2884
- *
2885
- * fi = FieldInfo.new(:image, :store => :yes, :compression => :brotli, :index => :no,
2886
- * :term_vector => :no)
2887
- */
2888
- static void
2889
- Init_FieldInfo(void)
2890
- {
2891
- sym_store = ID2SYM(rb_intern("store"));
2892
- sym_index = ID2SYM(rb_intern("index"));
2893
- sym_term_vector = ID2SYM(rb_intern("term_vector"));
2894
-
2895
- sym_brotli = ID2SYM(rb_intern("brotli"));
2896
- sym_bz2 = ID2SYM(rb_intern("bz2"));
2897
- sym_lz4 = ID2SYM(rb_intern("lz4"));
2898
- // sym_level = ID2SYM(rb_intern("level"));
2899
- sym_compression = ID2SYM(rb_intern("compression"));
2900
-
2901
- sym_untokenized = ID2SYM(rb_intern("untokenized"));
2902
- sym_omit_norms = ID2SYM(rb_intern("omit_norms"));
2903
- sym_untokenized_omit_norms = ID2SYM(rb_intern("untokenized_omit_norms"));
2904
-
2905
- sym_with_positions = ID2SYM(rb_intern("with_positions"));
2906
- sym_with_offsets = ID2SYM(rb_intern("with_offsets"));
2907
- sym_with_positions_offsets = ID2SYM(rb_intern("with_positions_offsets"));
2908
-
2909
- cFieldInfo = rb_define_class_under(mIndex, "FieldInfo", rb_cObject);
2910
- rb_define_alloc_func(cFieldInfo, frb_fi_alloc);
2911
-
2912
- rb_define_method(cFieldInfo, "initialize", frb_fi_init, -1);
2913
- rb_define_method(cFieldInfo, "name", frb_fi_name, 0);
2914
- rb_define_method(cFieldInfo, "stored?", frb_fi_is_stored, 0);
2915
- rb_define_method(cFieldInfo, "compressed?", frb_fi_is_compressed, 0);
2916
- rb_define_method(cFieldInfo, "indexed?", frb_fi_is_indexed, 0);
2917
- rb_define_method(cFieldInfo, "tokenized?", frb_fi_is_tokenized, 0);
2918
- rb_define_method(cFieldInfo, "omit_norms?", frb_fi_omit_norms, 0);
2919
- rb_define_method(cFieldInfo, "store_term_vector?",
2920
- frb_fi_store_term_vector, 0);
2921
- rb_define_method(cFieldInfo, "store_positions?",
2922
- frb_fi_store_positions, 0);
2923
- rb_define_method(cFieldInfo, "store_offsets?",
2924
- frb_fi_store_offsets, 0);
2925
- rb_define_method(cFieldInfo, "has_norms?", frb_fi_has_norms, 0);
2926
- rb_define_method(cFieldInfo, "boost", frb_fi_boost, 0);
2927
- rb_define_method(cFieldInfo, "to_s", frb_fi_to_s, 0);
2928
- }
2929
-
2930
2342
  /*
2931
2343
  * Document-class: Ferret::Index::FieldInfos
2932
2344
  *
@@ -3350,48 +2762,6 @@ void Init_IndexWriter(void) {
3350
2762
  rb_define_method(cIndexWriter, "use_compound_file=", frb_iw_set_use_compound_file, 1);
3351
2763
  }
3352
2764
 
3353
- /*
3354
- * Document-class: Ferret::Index::LazyDoc
3355
- *
3356
- * == Summary
3357
- *
3358
- * When a document is retrieved from the index a LazyDoc is returned.
3359
- * Actually, LazyDoc is just a modified Hash object which lazily adds fields
3360
- * to itself when they are accessed. You should note that the keys method
3361
- * will return nothing until you actually access one of the fields. To see
3362
- * what fields are available use LazyDoc#fields rather than LazyDoc#keys. To
3363
- * load all fields use the LazyDoc#load method.
3364
- *
3365
- * == Example
3366
- *
3367
- * doc = index_reader[0]
3368
- *
3369
- * doc.keys #=> []
3370
- * doc.values #=> []
3371
- * doc.fields #=> [:title, :content]
3372
- *
3373
- * title = doc[:title] #=> "the title"
3374
- * doc.keys #=> [:title]
3375
- * doc.values #=> ["the title"]
3376
- * doc.fields #=> [:title, :content]
3377
- *
3378
- * doc.load
3379
- * doc.keys #=> [:title, :content]
3380
- * doc.values #=> ["the title", "the content"]
3381
- * doc.fields #=> [:title, :content]
3382
- */
3383
- void Init_LazyDoc(void) {
3384
- id_fields = rb_intern("@fields");
3385
-
3386
- cLazyDoc = rb_define_class_under(mIndex, "LazyDoc", rb_cHash);
3387
- rb_define_method(cLazyDoc, "default", frb_lzd_default, 1);
3388
- rb_define_method(cLazyDoc, "load", frb_lzd_load, 0);
3389
- rb_define_method(cLazyDoc, "fields", frb_lzd_fields, 0);
3390
-
3391
- cLazyDocData = rb_define_class_under(cLazyDoc, "LazyDocData", rb_cObject);
3392
- rb_define_alloc_func(cLazyDocData, frb_lzd_alloc);
3393
- }
3394
-
3395
2765
  /*
3396
2766
  * Document-class: Ferret::Index::IndexReader
3397
2767
  *
@@ -3405,36 +2775,38 @@ void Init_LazyDoc(void) {
3405
2775
  void Init_IndexReader(void) {
3406
2776
  cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
3407
2777
  rb_define_alloc_func(cIndexReader, frb_ir_alloc);
3408
- rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
3409
- rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
3410
- rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
2778
+ rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
2779
+ rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
2780
+ rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
3411
2781
  rb_define_method(cIndexReader, "get_norms_into", frb_ir_get_norms_into, 3);
3412
- rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
3413
- rb_define_method(cIndexReader, "close", frb_ir_close, 0);
2782
+ rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
2783
+ rb_define_method(cIndexReader, "close", frb_ir_close, 0);
3414
2784
  rb_define_method(cIndexReader, "has_deletions?", frb_ir_has_deletions, 0);
3415
- rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
3416
- rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
3417
- rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
3418
- rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
3419
- rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
3420
- rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
3421
- rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
3422
- rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
3423
- rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
3424
- rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
3425
- rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
2785
+ rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
2786
+ rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
2787
+ rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
2788
+ rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
2789
+ rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
2790
+ rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
2791
+ rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
2792
+ rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
2793
+ rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
2794
+ rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
2795
+ rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
3426
2796
  rb_define_method(cIndexReader, "term_positions", frb_ir_term_positions, 0);
3427
2797
  rb_define_method(cIndexReader, "term_docs_for", frb_ir_term_docs_for, 2);
3428
2798
  rb_define_method(cIndexReader, "term_positions_for", frb_ir_t_pos_for, 2);
3429
- rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
3430
- rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
3431
- rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
3432
- rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
3433
- rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
3434
- rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
3435
- rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
3436
- rb_define_method(cIndexReader, "tokenized_fields", frb_ir_tk_fields, 0);
3437
- rb_define_method(cIndexReader, "version", frb_ir_version, 0);
2799
+ rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
2800
+ rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
2801
+ rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
2802
+ rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
2803
+ rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
2804
+ rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
2805
+ rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
2806
+ rb_define_method(cIndexReader, "tokenized_fields", frb_ir_tk_fields, 0);
2807
+ rb_define_method(cIndexReader, "version", frb_ir_version, 0);
2808
+ rb_define_method(cIndexReader, "each", frb_ir_each, 0);
2809
+ rb_define_method(cIndexReader, "to_enum", frb_ir_to_enum, 0);
3438
2810
  }
3439
2811
 
3440
2812
  /* rdoc hack