isomorfeus-ferret 0.13.11 → 0.14.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,12 +1,8 @@
1
1
  #include "frt_index.h"
2
2
  #include "isomorfeus_ferret.h"
3
- #include <ruby.h>
4
-
5
- // #undef close
6
3
 
7
4
  VALUE mIndex;
8
5
 
9
- VALUE cFieldInfo;
10
6
  VALUE cFieldInfos;
11
7
 
12
8
  VALUE cTVOffsets;
@@ -16,16 +12,15 @@ VALUE cTermVector;
16
12
  VALUE cTermEnum;
17
13
  VALUE cTermDocEnum;
18
14
 
19
- VALUE cLazyDoc;
20
- VALUE cLazyDocData;
21
15
  VALUE cIndexWriter;
22
16
  VALUE cIndexReader;
23
17
 
24
18
  VALUE sym_analyzer;
19
+ VALUE sym_boost;
20
+
25
21
  static VALUE sym_close_dir;
26
22
  static VALUE sym_create;
27
23
  static VALUE sym_create_if_missing;
28
-
29
24
  static VALUE sym_chunk_size;
30
25
  static VALUE sym_max_buffer_memory;
31
26
  static VALUE sym_index_interval;
@@ -35,338 +30,25 @@ static VALUE sym_max_buffered_docs;
35
30
  static VALUE sym_max_merge_docs;
36
31
  static VALUE sym_max_field_length;
37
32
  static VALUE sym_use_compound_file;
38
-
39
- static VALUE sym_boost;
40
33
  static VALUE sym_field_infos;
41
34
 
42
- static VALUE sym_store;
43
- static VALUE sym_index;
44
- static VALUE sym_term_vector;
45
-
46
- static VALUE sym_brotli;
47
- static VALUE sym_bz2;
48
- static VALUE sym_lz4;
49
- static VALUE sym_compression;
50
-
51
- static VALUE sym_untokenized;
52
- static VALUE sym_omit_norms;
53
- static VALUE sym_untokenized_omit_norms;
54
-
55
- static VALUE sym_with_positions;
56
- static VALUE sym_with_offsets;
57
- static VALUE sym_with_positions_offsets;
58
-
59
35
  static ID fsym_content;
60
-
61
36
  static ID id_term;
62
- static ID id_fields;
63
37
  static ID id_fld_num_map;
64
38
  static ID id_field_num;
65
39
  static ID id_boost;
66
40
 
41
+ extern VALUE sym_each;
67
42
  extern rb_encoding *utf8_encoding;
68
- extern void frb_set_term(VALUE rterm, FrtTerm *t);
43
+ extern void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost);
69
44
  extern FrtAnalyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
70
45
  extern VALUE frb_get_analyzer(FrtAnalyzer *a);
46
+ extern VALUE frb_get_field_info(FrtFieldInfo *fi);
47
+ extern VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc);
48
+ extern void frb_set_term(VALUE rterm, FrtTerm *t);
71
49
 
72
- /****************************************************************************
73
- *
74
- * FieldInfo Methods
75
- *
76
- ****************************************************************************/
77
-
78
- static void frb_fi_free(void *p) {
79
- frt_fi_deref((FrtFieldInfo *)p);
80
- }
81
-
82
- static void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost) {
83
- VALUE v;
84
- Check_Type(roptions, T_HASH);
85
- v = rb_hash_aref(roptions, sym_boost);
86
- if (Qnil != v) {
87
- *boost = (float)NUM2DBL(v);
88
- } else {
89
- *boost = 1.0f;
90
- }
91
- v = rb_hash_aref(roptions, sym_store);
92
- if (Qnil != v) Check_Type(v, T_SYMBOL);
93
- if (v == sym_no || v == sym_false || v == Qfalse) {
94
- *store = FRT_STORE_NO;
95
- } else if (v == sym_yes || v == sym_true || v == Qtrue) {
96
- *store = FRT_STORE_YES;
97
- } else if (v == Qnil) {
98
- /* leave as default */
99
- } else {
100
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]",
101
- rb_id2name(SYM2ID(v)));
102
- }
103
-
104
- v = rb_hash_aref(roptions, sym_compression);
105
- if (Qnil != v) Check_Type(v, T_SYMBOL);
106
- if (v == sym_no || v == sym_false || v == Qfalse) {
107
- *compression = FRT_COMPRESSION_NONE;
108
- } else if (v == sym_yes || v == sym_true || v == Qtrue || v == sym_brotli) {
109
- *compression = FRT_COMPRESSION_BROTLI;
110
- } else if (v == sym_bz2) {
111
- *compression = FRT_COMPRESSION_BZ2;
112
- } else if (v == sym_lz4) {
113
- *compression = FRT_COMPRESSION_LZ4;
114
- } else if (v == Qnil) {
115
- /* leave as default */
116
- } else {
117
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]",
118
- rb_id2name(SYM2ID(v)));
119
- }
120
-
121
- v = rb_hash_aref(roptions, sym_index);
122
- if (Qnil != v) Check_Type(v, T_SYMBOL);
123
- if (v == sym_no || v == sym_false || v == Qfalse) {
124
- *index = FRT_INDEX_NO;
125
- } else if (v == sym_yes || v == sym_true || v == Qtrue) {
126
- *index = FRT_INDEX_YES;
127
- } else if (v == sym_untokenized) {
128
- *index = FRT_INDEX_UNTOKENIZED;
129
- } else if (v == sym_omit_norms) {
130
- *index = FRT_INDEX_YES_OMIT_NORMS;
131
- } else if (v == sym_untokenized_omit_norms) {
132
- *index = FRT_INDEX_UNTOKENIZED_OMIT_NORMS;
133
- } else if (v == Qnil) {
134
- /* leave as default */
135
- } else {
136
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :index. Please choose from [:no, :yes, :untokenized, "
137
- ":omit_norms, :untokenized_omit_norms]", rb_id2name(SYM2ID(v)));
138
- }
139
-
140
- v = rb_hash_aref(roptions, sym_term_vector);
141
- if (Qnil != v) Check_Type(v, T_SYMBOL);
142
- if (v == sym_no || v == sym_false || v == Qfalse) {
143
- *term_vector = FRT_TERM_VECTOR_NO;
144
- } else if (v == sym_yes || v == sym_true || v == Qtrue) {
145
- *term_vector = FRT_TERM_VECTOR_YES;
146
- } else if (v == sym_with_positions) {
147
- *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS;
148
- } else if (v == sym_with_offsets) {
149
- *term_vector = FRT_TERM_VECTOR_WITH_OFFSETS;
150
- } else if (v == sym_with_positions_offsets) {
151
- *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
152
- } else if (v == Qnil) {
153
- /* leave as default */
154
- } else {
155
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :term_vector. Please choose from [:no, :yes, "
156
- ":with_positions, :with_offsets, :with_positions_offsets]", rb_id2name(SYM2ID(v)));
157
- }
158
- }
159
-
160
- static size_t frb_fi_size(const void *p) {
161
- return sizeof(FrtFieldInfo);
162
- (void)p;
163
- }
164
-
165
- const rb_data_type_t frb_field_info_t = {
166
- .wrap_struct_name = "FrbFieldInfo",
167
- .function = {
168
- .dmark = NULL,
169
- .dfree = frb_fi_free,
170
- .dsize = frb_fi_size,
171
- .dcompact = NULL,
172
- .reserved = {0},
173
- },
174
- .parent = NULL,
175
- .data = NULL,
176
- .flags = RUBY_TYPED_FREE_IMMEDIATELY
177
- };
178
-
179
- static VALUE frb_get_field_info(FrtFieldInfo *fi) {
180
- if (fi) {
181
- if (fi->rfi == 0 || fi->rfi == Qnil) {
182
- fi->rfi = TypedData_Wrap_Struct(cFieldInfo, &frb_field_info_t, fi);
183
- FRT_REF(fi);
184
- }
185
- return fi->rfi;
186
- }
187
- return Qnil;
188
- }
189
-
190
- /*
191
- * call-seq:
192
- * FieldInfo.new(name, options = {}) -> field_info
193
- *
194
- * Create a new FieldInfo object with the name +name+ and the properties
195
- * specified in +options+. The available options are [:store, :compression,
196
- * :index, :term_vector, :boost]. See the description of FieldInfo for more
197
- * information on these properties.
198
- */
199
- static VALUE frb_fi_alloc(VALUE rclass) {
200
- FrtFieldInfo *fi = frt_fi_alloc();
201
- return TypedData_Wrap_Struct(rclass, &frb_field_info_t, fi);
202
- }
203
-
204
- static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
205
- VALUE roptions, rname;
206
- FrtFieldInfo *fi;
207
- TypedData_Get_Struct(self, FrtFieldInfo, &frb_field_info_t, fi);
208
- FrtStoreValue store = FRT_STORE_YES;
209
- FrtCompressionType compression = FRT_COMPRESSION_NONE;
210
- FrtIndexValue index = FRT_INDEX_YES;
211
- FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
212
- float boost = 1.0f;
213
-
214
- rb_scan_args(argc, argv, "11", &rname, &roptions);
215
- if (argc > 1) {
216
- frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
217
- }
218
- fi = frt_fi_init(fi, frb_field(rname), store, compression, index, term_vector);
219
- fi->boost = boost;
220
- fi->rfi = self;
221
- return self;
222
- }
223
-
224
- /*
225
- * call-seq:
226
- * fi.name -> symbol
227
- *
228
- * Return the name of the field
229
- */
230
- static VALUE frb_fi_name(VALUE self) {
231
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
232
- return ID2SYM(fi->name);
233
- }
234
-
235
- /*
236
- * call-seq:
237
- * fi.stored? -> bool
238
- *
239
- * Return true if the field is stored in the index.
240
- */
241
- static VALUE frb_fi_is_stored(VALUE self) {
242
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
243
- return fi_is_stored(fi) ? Qtrue : Qfalse;
244
- }
245
-
246
- /*
247
- * call-seq:
248
- * fi.compressed? -> bool
249
- *
250
- * Return true if the field is stored in the index in compressed format.
251
- */
252
- static VALUE frb_fi_is_compressed(VALUE self) {
253
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
254
- return fi_is_compressed(fi) ? Qtrue : Qfalse;
255
- }
256
-
257
- /*
258
- * call-seq:
259
- * fi.indexed? -> bool
260
- *
261
- * Return true if the field is indexed, ie searchable in the index.
262
- */
263
- static VALUE frb_fi_is_indexed(VALUE self) {
264
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
265
- return fi_is_indexed(fi) ? Qtrue : Qfalse;
266
- }
267
-
268
- /*
269
- * call-seq:
270
- * fi.tokenized? -> bool
271
- *
272
- * Return true if the field is tokenized. Tokenizing is the process of
273
- * breaking the field up into tokens. That is "the quick brown fox" becomes:
274
- *
275
- * ["the", "quick", "brown", "fox"]
276
- *
277
- * A field can only be tokenized if it is indexed.
278
- */
279
- static VALUE frb_fi_is_tokenized(VALUE self) {
280
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
281
- return fi_is_tokenized(fi) ? Qtrue : Qfalse;
282
- }
283
-
284
- /*
285
- * call-seq:
286
- * fi.omit_norms? -> bool
287
- *
288
- * Return true if the field omits the norm file. The norm file is the file
289
- * used to store the field boosts for an indexed field. If you do not boost
290
- * any fields, and you can live without scoring based on field length then
291
- * you can omit the norms file. This will give the index a slight performance
292
- * boost and it will use less memory, especially for indexes which have a
293
- * large number of documents.
294
- */
295
- static VALUE frb_fi_omit_norms(VALUE self) {
296
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
297
- return fi_omit_norms(fi) ? Qtrue : Qfalse;
298
- }
299
-
300
- /*
301
- * call-seq:
302
- * fi.store_term_vector? -> bool
303
- *
304
- * Return true if the term-vectors are stored for this field.
305
- */
306
- static VALUE frb_fi_store_term_vector(VALUE self) {
307
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
308
- return fi_store_term_vector(fi) ? Qtrue : Qfalse;
309
- }
310
-
311
- /*
312
- * call-seq:
313
- * fi.store_positions? -> bool
314
- *
315
- * Return true if positions are stored with the term-vectors for this field.
316
- */
317
- static VALUE frb_fi_store_positions(VALUE self) {
318
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
319
- return fi_store_positions(fi) ? Qtrue : Qfalse;
320
- }
321
-
322
- /*
323
- * call-seq:
324
- * fi.store_offsets? -> bool
325
- *
326
- * Return true if offsets are stored with the term-vectors for this field.
327
- */
328
- static VALUE frb_fi_store_offsets(VALUE self) {
329
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
330
- return fi_store_offsets(fi) ? Qtrue : Qfalse;
331
- }
332
-
333
- /*
334
- * call-seq:
335
- * fi.has_norms? -> bool
336
- *
337
- * Return true if this field has a norms file. This is the same as calling;
338
- *
339
- * fi.indexed? and not fi.omit_norms?
340
- */
341
- static VALUE frb_fi_has_norms(VALUE self) {
342
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
343
- return fi_has_norms(fi) ? Qtrue : Qfalse;
344
- }
345
-
346
- /*
347
- * call-seq:
348
- * fi.boost -> boost
349
- *
350
- * Return the default boost for this field
351
- */
352
- static VALUE frb_fi_boost(VALUE self) {
353
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
354
- return rb_float_new((double)fi->boost);
355
- }
356
-
357
- /*
358
- * call-seq:
359
- * fi.to_s -> string
360
- *
361
- * Return a string representation of the FieldInfo object.
362
- */
363
- static VALUE frb_fi_to_s(VALUE self) {
364
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
365
- char *fi_s = frt_fi_to_s(fi);
366
- VALUE rfi_s = rb_str_new2(fi_s);
367
- free(fi_s);
368
- return rfi_s;
369
- }
50
+ extern void Init_FieldInfo(void);
51
+ extern void Init_LazyDoc(void);
370
52
 
371
53
  /****************************************************************************
372
54
  *
@@ -1987,125 +1669,6 @@ frb_iw_set_use_compound_file(VALUE self, VALUE rval)
1987
1669
  return rval;
1988
1670
  }
1989
1671
 
1990
- /****************************************************************************
1991
- *
1992
- * LazyDoc Methods
1993
- *
1994
- ****************************************************************************/
1995
-
1996
- static void frb_lzd_data_free(void *p) {
1997
- frt_lazy_doc_close((FrtLazyDoc *)p);
1998
- }
1999
-
2000
- static size_t frb_lazy_doc_size(const void *p) {
2001
- return sizeof(FrtLazyDoc);
2002
- (void)p;
2003
- }
2004
-
2005
- const rb_data_type_t frb_lazy_doc_t = {
2006
- .wrap_struct_name = "FrbLazyDoc",
2007
- .function = {
2008
- .dmark = NULL,
2009
- .dfree = frb_lzd_data_free,
2010
- .dsize = frb_lazy_doc_size,
2011
- .dcompact = NULL,
2012
- .reserved = {0},
2013
- },
2014
- .parent = NULL,
2015
- .data = NULL,
2016
- .flags = RUBY_TYPED_FREE_IMMEDIATELY
2017
- };
2018
-
2019
- static VALUE frb_lzd_alloc(VALUE klass) {
2020
- FrtLazyDoc *ld = FRT_ALLOC(FrtLazyDoc);
2021
- return TypedData_Wrap_Struct(klass, &frb_lazy_doc_t, ld);
2022
- }
2023
-
2024
- static VALUE frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
2025
- VALUE rdata = Qnil;
2026
- if (lazy_df) {
2027
- if (lazy_df->size == 1) {
2028
- char *data = frt_lazy_df_get_data(lazy_df, 0);
2029
- rdata = rb_str_new(data, lazy_df->data[0].length);
2030
- rb_enc_associate(rdata, lazy_df->data[0].encoding);
2031
- } else {
2032
- int i;
2033
- VALUE rstr;
2034
- rdata = rb_ary_new2(lazy_df->size);
2035
- for (i = 0; i < lazy_df->size; i++) {
2036
- char *data = frt_lazy_df_get_data(lazy_df, i);
2037
- rstr = rb_str_new(data, lazy_df->data[i].length);
2038
- rb_enc_associate(rstr, lazy_df->data[i].encoding);
2039
- rb_ary_store(rdata, i, rstr);
2040
- }
2041
- }
2042
- rb_hash_aset(self, rkey, rdata);
2043
- }
2044
- return rdata;
2045
- }
2046
-
2047
- /*
2048
- * call-seq:
2049
- * lazy_doc.default(key) -> string
2050
- *
2051
- * This method is used internally to lazily load fields. You should never
2052
- * really need to call it yourself.
2053
- */
2054
- static VALUE frb_lzd_default(VALUE self, VALUE rkey) {
2055
- FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
2056
- ID field = frb_field(rkey);
2057
- VALUE rfield = ID2SYM(field);
2058
-
2059
- return frb_lazy_df_load(self, rfield, frt_lazy_doc_get(lazy_doc, field));
2060
- }
2061
-
2062
- /*
2063
- * call-seq:
2064
- * lazy_doc.fields -> array of available fields
2065
- *
2066
- * Returns the list of fields stored for this particular document. If you try
2067
- * to access any of these fields in the document the field will be loaded.
2068
- * Try to access any other field an nil will be returned.
2069
- */
2070
- static VALUE frb_lzd_fields(VALUE self) {
2071
- return rb_ivar_get(self, id_fields);
2072
- }
2073
-
2074
- /*
2075
- * call-seq:
2076
- * lazy_doc.load -> lazy_doc
2077
- *
2078
- * Load all unloaded fields in the document from the index.
2079
- */
2080
- static VALUE frb_lzd_load(VALUE self) {
2081
- FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
2082
- int i;
2083
- for (i = 0; i < lazy_doc->size; i++) {
2084
- FrtLazyDocField *lazy_df = lazy_doc->fields[i];
2085
- frb_lazy_df_load(self, ID2SYM(lazy_df->name), lazy_df);
2086
- }
2087
- return self;
2088
- }
2089
-
2090
- VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc) {
2091
- int i;
2092
- VALUE rfields = rb_ary_new2(lazy_doc->size);
2093
-
2094
- VALUE self, rdata;
2095
- self = rb_hash_new();
2096
- OBJSETUP(self, cLazyDoc, T_HASH);
2097
-
2098
- rdata = TypedData_Wrap_Struct(cLazyDocData, &frb_lazy_doc_t, lazy_doc);
2099
- rb_ivar_set(self, id_data, rdata);
2100
-
2101
- for (i = 0; i < lazy_doc->size; i++) {
2102
- rb_ary_store(rfields, i, ID2SYM(lazy_doc->fields[i]->name));
2103
- }
2104
- rb_ivar_set(self, id_fields, rfields);
2105
-
2106
- return self;
2107
- }
2108
-
2109
1672
  /****************************************************************************
2110
1673
  *
2111
1674
  * IndexReader Methods
@@ -2743,190 +2306,39 @@ frb_ir_tk_fields(VALUE self)
2743
2306
  * Returns the current version of the index reader.
2744
2307
  */
2745
2308
  static VALUE
2746
- frb_ir_version(VALUE self)
2747
- {
2309
+ frb_ir_version(VALUE self) {
2748
2310
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2749
2311
  return ULL2NUM(ir->sis->version);
2750
2312
  }
2751
2313
 
2314
+ static VALUE frb_ir_to_enum(VALUE self) {
2315
+ return rb_enumeratorize(self, sym_each, 0, NULL);
2316
+ }
2317
+
2318
+ static VALUE frb_ir_each(VALUE self) {
2319
+ FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2320
+ if (rb_block_given_p()) {
2321
+ long i;
2322
+ long max_doc = ir->max_doc(ir);
2323
+ VALUE rld;
2324
+ for (i = 0; i < max_doc; i++) {
2325
+ if (ir->is_deleted(ir, i)) continue;
2326
+ rld = frb_get_lazy_doc(ir->get_lazy_doc(ir, i));
2327
+ rb_yield(rld);
2328
+ }
2329
+ return self;
2330
+ } else {
2331
+ return frb_ir_to_enum(self);
2332
+ }
2333
+
2334
+ }
2335
+
2752
2336
  /****************************************************************************
2753
2337
  *
2754
2338
  * Init Functions
2755
2339
  *
2756
2340
  ****************************************************************************/
2757
2341
 
2758
-
2759
- /*
2760
- * Document-class: Ferret::Index::FieldInfo
2761
- *
2762
- * == Summary
2763
- *
2764
- * The FieldInfo class is the field descriptor for the index. It specifies
2765
- * whether a field is compressed or not or whether it should be indexed and
2766
- * tokenized. Every field has a name which must be a symbol. There are three
2767
- * properties that you can set, +:store+, +:index+ and +:term_vector+. You
2768
- * can also set the default +:boost+ for a field as well.
2769
- *
2770
- * == Properties
2771
- *
2772
- * === :store
2773
- *
2774
- * The +:store+ property allows you to specify how a field is stored. You can
2775
- * leave a field unstored (+:no+), store it in it's original format (+:yes+)
2776
- * or store it in compressed format (+:compressed+). By default the document
2777
- * is stored in its original format. If the field is large and it is stored
2778
- * elsewhere where it is easily accessible you might want to leave it
2779
- * unstored. This will keep the index size a lot smaller and make the
2780
- * indexing process a lot faster. For example, you should probably leave the
2781
- * +:content+ field unstored when indexing all the documents in your
2782
- * file-system.
2783
- *
2784
- * === :index
2785
- *
2786
- * The +:index+ property allows you to specify how a field is indexed. A
2787
- * field must be indexed to be searchable. However, a field doesn't need to
2788
- * be indexed to be store in the Ferret index. You may want to use the index
2789
- * as a simple database and store things like images or MP3s in the index. By
2790
- * default each field is indexed and tokenized (split into tokens) (+:yes+).
2791
- * If you don't want to index the field use +:no+. If you want the field
2792
- * indexed but not tokenized, use +:untokenized+. Do this for the fields you
2793
- * wish to sort by. There are two other values for +:index+; +:omit_norms+
2794
- * and +:untokenized_omit_norms+. These values correspond to +:yes+ and
2795
- * +:untokenized+ respectively and are useful if you are not boosting any
2796
- * fields and you'd like to speed up the index. The norms file is the file
2797
- * which contains the boost values for each document for a particular field.
2798
- *
2799
- * === :term_vector
2800
- *
2801
- * See TermVector for a description of term-vectors. You can specify whether
2802
- * or not you would like to store term-vectors. The available options are
2803
- * +:no+, +:yes+, +:with_positions+, +:with_offsets+ and
2804
- * +:with_positions_offsets+. Note that you need to store the positions to
2805
- * associate offsets with individual terms in the term_vector.
2806
- *
2807
- * == Property Table
2808
- *
2809
- * Property Value Description
2810
- * ------------------------------------------------------------------------
2811
- * :store | :no | Don't store field
2812
- * | |
2813
- * | :yes (default) | Store field in its original
2814
- * | | format. Use this value if you
2815
- * | | want to highlight matches.
2816
- * | | or print match excerpts a la
2817
- * | | Google search.
2818
- * -------------|-------------------------|------------------------------
2819
- * :compression | :no (default) | Don't compress stored field
2820
- * | |
2821
- * | :brotli | Compress field using Brotli
2822
- * | |
2823
- * | :bz2 | Compress field using BZip2
2824
- * | |
2825
- * | :lz4 | Compress field using LZ4
2826
- * -------------|-------------------------|------------------------------
2827
- * :index | :no | Do not make this field
2828
- * | | searchable.
2829
- * | |
2830
- * | :yes (default) | Make this field searchable and
2831
- * | | tokenized its contents.
2832
- * | |
2833
- * | :untokenized | Make this field searchable but
2834
- * | | do not tokenize its contents.
2835
- * | | use this value for fields you
2836
- * | | wish to sort by.
2837
- * | |
2838
- * | :omit_norms | Same as :yes except omit the
2839
- * | | norms file. The norms file can
2840
- * | | be omitted if you don't boost
2841
- * | | any fields and you don't need
2842
- * | | scoring based on field length.
2843
- * | |
2844
- * | :untokenized_omit_norms | Same as :untokenized except omit
2845
- * | | the norms file. Norms files can
2846
- * | | be omitted if you don't boost
2847
- * | | any fields and you don't need
2848
- * | | scoring based on field length.
2849
- * | |
2850
- * -------------|-------------------------|------------------------------
2851
- * :term_vector | :no | Don't store term-vectors
2852
- * | |
2853
- * | :yes | Store term-vectors without
2854
- * | | storing positions or offsets.
2855
- * | |
2856
- * | :with_positions | Store term-vectors with
2857
- * | | positions.
2858
- * | |
2859
- * | :with_offsets | Store term-vectors with
2860
- * | | offsets.
2861
- * | |
2862
- * | :with_positions_offsets | Store term-vectors with
2863
- * | (default) | positions and offsets.
2864
- * -------------|-------------------------|------------------------------
2865
- * :boost | Float | The boost property is used to
2866
- * | | set the default boost for a
2867
- * | | field. This boost value will
2868
- * | | used for all instances of the
2869
- * | | field in the index unless
2870
- * | | otherwise specified when you
2871
- * | | create the field. All values
2872
- * | | should be positive.
2873
- * | |
2874
- *
2875
- * == Examples
2876
- *
2877
- * fi = FieldInfo.new(:title, :index => :untokenized, :term_vector => :no,
2878
- * :boost => 10.0)
2879
- *
2880
- * fi = FieldInfo.new(:content)
2881
- *
2882
- * fi = FieldInfo.new(:created_on, :index => :untokenized_omit_norms,
2883
- * :term_vector => :no)
2884
- *
2885
- * fi = FieldInfo.new(:image, :store => :yes, :compression => :brotli, :index => :no,
2886
- * :term_vector => :no)
2887
- */
2888
- static void
2889
- Init_FieldInfo(void)
2890
- {
2891
- sym_store = ID2SYM(rb_intern("store"));
2892
- sym_index = ID2SYM(rb_intern("index"));
2893
- sym_term_vector = ID2SYM(rb_intern("term_vector"));
2894
-
2895
- sym_brotli = ID2SYM(rb_intern("brotli"));
2896
- sym_bz2 = ID2SYM(rb_intern("bz2"));
2897
- sym_lz4 = ID2SYM(rb_intern("lz4"));
2898
- // sym_level = ID2SYM(rb_intern("level"));
2899
- sym_compression = ID2SYM(rb_intern("compression"));
2900
-
2901
- sym_untokenized = ID2SYM(rb_intern("untokenized"));
2902
- sym_omit_norms = ID2SYM(rb_intern("omit_norms"));
2903
- sym_untokenized_omit_norms = ID2SYM(rb_intern("untokenized_omit_norms"));
2904
-
2905
- sym_with_positions = ID2SYM(rb_intern("with_positions"));
2906
- sym_with_offsets = ID2SYM(rb_intern("with_offsets"));
2907
- sym_with_positions_offsets = ID2SYM(rb_intern("with_positions_offsets"));
2908
-
2909
- cFieldInfo = rb_define_class_under(mIndex, "FieldInfo", rb_cObject);
2910
- rb_define_alloc_func(cFieldInfo, frb_fi_alloc);
2911
-
2912
- rb_define_method(cFieldInfo, "initialize", frb_fi_init, -1);
2913
- rb_define_method(cFieldInfo, "name", frb_fi_name, 0);
2914
- rb_define_method(cFieldInfo, "stored?", frb_fi_is_stored, 0);
2915
- rb_define_method(cFieldInfo, "compressed?", frb_fi_is_compressed, 0);
2916
- rb_define_method(cFieldInfo, "indexed?", frb_fi_is_indexed, 0);
2917
- rb_define_method(cFieldInfo, "tokenized?", frb_fi_is_tokenized, 0);
2918
- rb_define_method(cFieldInfo, "omit_norms?", frb_fi_omit_norms, 0);
2919
- rb_define_method(cFieldInfo, "store_term_vector?",
2920
- frb_fi_store_term_vector, 0);
2921
- rb_define_method(cFieldInfo, "store_positions?",
2922
- frb_fi_store_positions, 0);
2923
- rb_define_method(cFieldInfo, "store_offsets?",
2924
- frb_fi_store_offsets, 0);
2925
- rb_define_method(cFieldInfo, "has_norms?", frb_fi_has_norms, 0);
2926
- rb_define_method(cFieldInfo, "boost", frb_fi_boost, 0);
2927
- rb_define_method(cFieldInfo, "to_s", frb_fi_to_s, 0);
2928
- }
2929
-
2930
2342
  /*
2931
2343
  * Document-class: Ferret::Index::FieldInfos
2932
2344
  *
@@ -3350,48 +2762,6 @@ void Init_IndexWriter(void) {
3350
2762
  rb_define_method(cIndexWriter, "use_compound_file=", frb_iw_set_use_compound_file, 1);
3351
2763
  }
3352
2764
 
3353
- /*
3354
- * Document-class: Ferret::Index::LazyDoc
3355
- *
3356
- * == Summary
3357
- *
3358
- * When a document is retrieved from the index a LazyDoc is returned.
3359
- * Actually, LazyDoc is just a modified Hash object which lazily adds fields
3360
- * to itself when they are accessed. You should note that the keys method
3361
- * will return nothing until you actually access one of the fields. To see
3362
- * what fields are available use LazyDoc#fields rather than LazyDoc#keys. To
3363
- * load all fields use the LazyDoc#load method.
3364
- *
3365
- * == Example
3366
- *
3367
- * doc = index_reader[0]
3368
- *
3369
- * doc.keys #=> []
3370
- * doc.values #=> []
3371
- * doc.fields #=> [:title, :content]
3372
- *
3373
- * title = doc[:title] #=> "the title"
3374
- * doc.keys #=> [:title]
3375
- * doc.values #=> ["the title"]
3376
- * doc.fields #=> [:title, :content]
3377
- *
3378
- * doc.load
3379
- * doc.keys #=> [:title, :content]
3380
- * doc.values #=> ["the title", "the content"]
3381
- * doc.fields #=> [:title, :content]
3382
- */
3383
- void Init_LazyDoc(void) {
3384
- id_fields = rb_intern("@fields");
3385
-
3386
- cLazyDoc = rb_define_class_under(mIndex, "LazyDoc", rb_cHash);
3387
- rb_define_method(cLazyDoc, "default", frb_lzd_default, 1);
3388
- rb_define_method(cLazyDoc, "load", frb_lzd_load, 0);
3389
- rb_define_method(cLazyDoc, "fields", frb_lzd_fields, 0);
3390
-
3391
- cLazyDocData = rb_define_class_under(cLazyDoc, "LazyDocData", rb_cObject);
3392
- rb_define_alloc_func(cLazyDocData, frb_lzd_alloc);
3393
- }
3394
-
3395
2765
  /*
3396
2766
  * Document-class: Ferret::Index::IndexReader
3397
2767
  *
@@ -3405,36 +2775,38 @@ void Init_LazyDoc(void) {
3405
2775
  void Init_IndexReader(void) {
3406
2776
  cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
3407
2777
  rb_define_alloc_func(cIndexReader, frb_ir_alloc);
3408
- rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
3409
- rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
3410
- rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
2778
+ rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
2779
+ rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
2780
+ rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
3411
2781
  rb_define_method(cIndexReader, "get_norms_into", frb_ir_get_norms_into, 3);
3412
- rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
3413
- rb_define_method(cIndexReader, "close", frb_ir_close, 0);
2782
+ rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
2783
+ rb_define_method(cIndexReader, "close", frb_ir_close, 0);
3414
2784
  rb_define_method(cIndexReader, "has_deletions?", frb_ir_has_deletions, 0);
3415
- rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
3416
- rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
3417
- rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
3418
- rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
3419
- rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
3420
- rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
3421
- rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
3422
- rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
3423
- rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
3424
- rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
3425
- rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
2785
+ rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
2786
+ rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
2787
+ rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
2788
+ rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
2789
+ rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
2790
+ rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
2791
+ rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
2792
+ rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
2793
+ rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
2794
+ rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
2795
+ rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
3426
2796
  rb_define_method(cIndexReader, "term_positions", frb_ir_term_positions, 0);
3427
2797
  rb_define_method(cIndexReader, "term_docs_for", frb_ir_term_docs_for, 2);
3428
2798
  rb_define_method(cIndexReader, "term_positions_for", frb_ir_t_pos_for, 2);
3429
- rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
3430
- rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
3431
- rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
3432
- rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
3433
- rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
3434
- rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
3435
- rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
3436
- rb_define_method(cIndexReader, "tokenized_fields", frb_ir_tk_fields, 0);
3437
- rb_define_method(cIndexReader, "version", frb_ir_version, 0);
2799
+ rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
2800
+ rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
2801
+ rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
2802
+ rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
2803
+ rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
2804
+ rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
2805
+ rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
2806
+ rb_define_method(cIndexReader, "tokenized_fields", frb_ir_tk_fields, 0);
2807
+ rb_define_method(cIndexReader, "version", frb_ir_version, 0);
2808
+ rb_define_method(cIndexReader, "each", frb_ir_each, 0);
2809
+ rb_define_method(cIndexReader, "to_enum", frb_ir_to_enum, 0);
3438
2810
  }
3439
2811
 
3440
2812
  /* rdoc hack