isomorfeus-ferret 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e6893e7012cf75189d3ff378b6e869a831a5281472f84ea5ab4e354bd92bfcee
4
- data.tar.gz: 0a4cad49faae062c29e0bed8fd7f87c5e3875c548cbb6e168907719b52777306
3
+ metadata.gz: 5818fce6d84b9bd4814be3bbed270127e05297dcf85adeebc495c8f334430d88
4
+ data.tar.gz: 77c9c3246c7777947084b47620d3aeeeb9eb76d7b0a17a4d30a37a38547a54da
5
5
  SHA512:
6
- metadata.gz: 4e48ec64d99af7fe0440480f11f22fd79dd9fca0c6dd09ce58bc0953f7f556ee4f61bdf18810f5cb2f39c8f80c99b406c5319c93235d06974009cefb1c73fccb
7
- data.tar.gz: 254960eb7543fb59e1d12087f83feaeb4abd957314f76b56778baf7a2fef2e090922b4b99d653e8532e30690a14d90c661ec5f849588a7e384b2a65346f7f04d
6
+ metadata.gz: 59632a0b46b9bd247da0f8b3908654a8027fbcef2aadc897f7681d25b03d4404191d037be323f666ef9bae679c72b135318aa853158e6bf0205b754ec3b2b18f
7
+ data.tar.gz: 2a037003347c6bca0900bf80410e83f43d397400f37e22f112e6ef6893a568dba29561b12594f803f3b28baee9f5f1ae67595c244d91b7dffa9d06e4e493c891
data/LICENSE CHANGED
@@ -143,7 +143,7 @@ The following licenses apply to files, which are distributed within the repo
143
143
  but not distributed with the gem and not used at runtime:
144
144
 
145
145
 
146
- For the Reuter-21578 files in the misc/ferret_vs_lucene directory (corpus, etc.),
146
+ For the Reuter-21578 files in the misc/ferret_vs_others directory (corpus, etc.),
147
147
  used for research for developing search engine technology:
148
148
 
149
149
  The copyright for the text of newswire articles and Reuters
@@ -156,7 +156,7 @@ Distribution 1.0", and inform your readers of the current location of
156
156
  the data set (see "Availability & Questions").
157
157
 
158
158
 
159
- Apache Lucene jars in the misc/ferret_vs_lucene directory:
159
+ Apache Lucene jars in the misc/ferret_vs_others directory:
160
160
 
161
161
 
162
162
  Apache License
data/README.md CHANGED
@@ -69,7 +69,7 @@ Compression semantics have changed, now Brotli, BZip2 and LZ4 compression codecs
69
69
  - LZ4: fast compression, fast decrompression, low compression ratio
70
70
 
71
71
  To see performance and compression ratios `rake ferret_compression_bench` can be run from the cloned repo.
72
- It uses data and code within the misc/ferret_vs_lucene directory.
72
+ It uses data and code within the misc/ferret_vs_others directory.
73
73
 
74
74
  To compress a stored field the :compression option can be used with one of: :no, :brotli, :bz2 or :lz4.
75
75
  Example:
@@ -96,7 +96,7 @@ https://github.com/isomorfeus/isomorfeus-ferret/blob/master/lib/isomorfeus/ferre
96
96
  The query language and parser are documented here:
97
97
  https://github.com/isomorfeus/isomorfeus-ferret/blob/master/ext/isomorfeus_ferret_ext/frb_qparser.c
98
98
 
99
- Examples can be found in the 'test' directory or in 'misc/ferret_vs_lucene'.
99
+ Examples can be found in the 'test' directory or in 'misc/ferret_vs_others'.
100
100
 
101
101
  ## Running Specs
102
102
 
@@ -111,24 +111,24 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
111
111
  ### Indexing and Searching
112
112
  - clone repo
113
113
  - bundle install
114
- - rake ferret_vs_lucene
114
+ - rake ferret_vs_others
115
115
 
116
116
  A recent Java JDK must be installed to compile and run lucene benchmarks.
117
117
 
118
- Results, Ferret 0.14.0 vs. Lucene 9.1.0, WhitespaceAnalyzer,
119
- Linux Ubuntu 20.04, FreeBSD 13.1 and Windows 10 on old Intel Core i5 from 2015,
118
+ Results, Ferret 0.14.0 vs. Lucene 9.2.0, WhitespaceAnalyzer,
119
+ Linux Ubuntu 22.04, FreeBSD 13.1 and Windows 10 on old Intel Core i5 from 2015,
120
120
  LinuxPi on RaspberryPi 400:
121
121
 
122
122
  | OS | Task | Ferret | Lucene* |
123
123
  |---------|------------|-----------------|----------------|
124
- | Linux | Indexing | 5125 docs/s | 4671 docs/s |
124
+ | Linux | Indexing | 5125 docs/s | 4959 docs/s |
125
125
  | FreeBSD | Indexing | 4537 docs/s | 3831 docs/s |
126
126
  | Windows | Indexing | 2488 docs/s | 2588 docs/s |
127
- | LinuxPi | Indexing | 1200 docs/s | 551 docs/s |
127
+ | LinuxPi | Indexing | 1200 docs/s | 755 docs/s |
128
128
  | Linux | Searching | 26610 queries/s | 7165 queries/s |
129
129
  | FreeBSD | Searching | 24167 queries/s | 4288 queries/s |
130
130
  | Windows | Searching | 3901 queries/s | 1033 queries/s |
131
- | LinuxPi | Searching | 6194 queries/s | 769 queries/s |
131
+ | LinuxPi | Searching | 6194 queries/s | 785 queries/s |
132
132
  | | Index Size | 28 MB | 35 MB |
133
133
 
134
134
  * JVM Versions:
@@ -0,0 +1,539 @@
1
+ #include "frt_index.h"
2
+ #include "isomorfeus_ferret.h"
3
+
4
+ VALUE cFieldInfo;
5
+
6
+ static VALUE sym_store;
7
+ static VALUE sym_index;
8
+ static VALUE sym_compression;
9
+ static VALUE sym_brotli;
10
+ static VALUE sym_bz2;
11
+ static VALUE sym_lz4;
12
+ static VALUE sym_term_vector;
13
+ static VALUE sym_omit_norms;
14
+ static VALUE sym_untokenized;
15
+ static VALUE sym_untokenized_omit_norms;
16
+ static VALUE sym_with_offsets;
17
+ static VALUE sym_with_positions;
18
+ static VALUE sym_with_positions_offsets;
19
+
20
+ extern VALUE sym_boost;
21
+
22
+ void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost) {
23
+ VALUE v;
24
+ Check_Type(roptions, T_HASH);
25
+ v = rb_hash_aref(roptions, sym_boost);
26
+ if (Qnil != v) {
27
+ *boost = (float)NUM2DBL(v);
28
+ } else {
29
+ *boost = 1.0f;
30
+ }
31
+ v = rb_hash_aref(roptions, sym_store);
32
+ if (Qnil != v) Check_Type(v, T_SYMBOL);
33
+ if (v == sym_no || v == sym_false || v == Qfalse) {
34
+ *store = FRT_STORE_NO;
35
+ } else if (v == sym_yes || v == sym_true || v == Qtrue) {
36
+ *store = FRT_STORE_YES;
37
+ } else if (v == Qnil) {
38
+ /* leave as default */
39
+ } else {
40
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]",
41
+ rb_id2name(SYM2ID(v)));
42
+ }
43
+
44
+ v = rb_hash_aref(roptions, sym_compression);
45
+ if (Qnil != v) Check_Type(v, T_SYMBOL);
46
+ if (v == sym_no || v == sym_false || v == Qfalse) {
47
+ *compression = FRT_COMPRESSION_NONE;
48
+ } else if (v == sym_yes || v == sym_true || v == Qtrue || v == sym_brotli) {
49
+ *compression = FRT_COMPRESSION_BROTLI;
50
+ } else if (v == sym_bz2) {
51
+ *compression = FRT_COMPRESSION_BZ2;
52
+ } else if (v == sym_lz4) {
53
+ *compression = FRT_COMPRESSION_LZ4;
54
+ } else if (v == Qnil) {
55
+ /* leave as default */
56
+ } else {
57
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]",
58
+ rb_id2name(SYM2ID(v)));
59
+ }
60
+
61
+ v = rb_hash_aref(roptions, sym_index);
62
+ if (Qnil != v) Check_Type(v, T_SYMBOL);
63
+ if (v == sym_no || v == sym_false || v == Qfalse) {
64
+ *index = FRT_INDEX_NO;
65
+ } else if (v == sym_yes || v == sym_true || v == Qtrue) {
66
+ *index = FRT_INDEX_YES;
67
+ } else if (v == sym_untokenized) {
68
+ *index = FRT_INDEX_UNTOKENIZED;
69
+ } else if (v == sym_omit_norms) {
70
+ *index = FRT_INDEX_YES_OMIT_NORMS;
71
+ } else if (v == sym_untokenized_omit_norms) {
72
+ *index = FRT_INDEX_UNTOKENIZED_OMIT_NORMS;
73
+ } else if (v == Qnil) {
74
+ /* leave as default */
75
+ } else {
76
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :index. Please choose from [:no, :yes, :untokenized, "
77
+ ":omit_norms, :untokenized_omit_norms]", rb_id2name(SYM2ID(v)));
78
+ }
79
+
80
+ v = rb_hash_aref(roptions, sym_term_vector);
81
+ if (Qnil != v) Check_Type(v, T_SYMBOL);
82
+ if (v == sym_no || v == sym_false || v == Qfalse) {
83
+ *term_vector = FRT_TERM_VECTOR_NO;
84
+ } else if (v == sym_yes || v == sym_true || v == Qtrue) {
85
+ *term_vector = FRT_TERM_VECTOR_YES;
86
+ } else if (v == sym_with_positions) {
87
+ *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS;
88
+ } else if (v == sym_with_offsets) {
89
+ *term_vector = FRT_TERM_VECTOR_WITH_OFFSETS;
90
+ } else if (v == sym_with_positions_offsets) {
91
+ *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
92
+ } else if (v == Qnil) {
93
+ /* leave as default */
94
+ if (*index == FRT_INDEX_NO) *term_vector = FRT_TERM_VECTOR_NO;
95
+ } else {
96
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :term_vector. Please choose from [:no, :yes, "
97
+ ":with_positions, :with_offsets, :with_positions_offsets]", rb_id2name(SYM2ID(v)));
98
+ }
99
+ }
100
+
101
+ static void frb_fi_free(void *p) {
102
+ frt_fi_deref((FrtFieldInfo *)p);
103
+ }
104
+
105
+ static size_t frb_fi_size(const void *p) {
106
+ return sizeof(FrtFieldInfo);
107
+ (void)p;
108
+ }
109
+
110
+ const rb_data_type_t frb_field_info_t = {
111
+ .wrap_struct_name = "FrbFieldInfo",
112
+ .function = {
113
+ .dmark = NULL,
114
+ .dfree = frb_fi_free,
115
+ .dsize = frb_fi_size,
116
+ .dcompact = NULL,
117
+ .reserved = {0},
118
+ },
119
+ .parent = NULL,
120
+ .data = NULL,
121
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
122
+ };
123
+
124
+ VALUE frb_get_field_info(FrtFieldInfo *fi) {
125
+ if (fi) {
126
+ if (fi->rfi == 0 || fi->rfi == Qnil) {
127
+ fi->rfi = TypedData_Wrap_Struct(cFieldInfo, &frb_field_info_t, fi);
128
+ FRT_REF(fi);
129
+ }
130
+ return fi->rfi;
131
+ }
132
+ return Qnil;
133
+ }
134
+
135
+ /*
136
+ * call-seq:
137
+ * FieldInfo.new(name, options = {}) -> field_info
138
+ *
139
+ * Create a new FieldInfo object with the name +name+ and the properties
140
+ * specified in +options+. The available options are [:store, :compression,
141
+ * :index, :term_vector, :boost]. See the description of FieldInfo for more
142
+ * information on these properties.
143
+ */
144
+ static VALUE frb_fi_alloc(VALUE rclass) {
145
+ FrtFieldInfo *fi = frt_fi_alloc();
146
+ return TypedData_Wrap_Struct(rclass, &frb_field_info_t, fi);
147
+ }
148
+
149
+ static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
150
+ VALUE roptions, rname;
151
+ FrtFieldInfo *fi;
152
+ TypedData_Get_Struct(self, FrtFieldInfo, &frb_field_info_t, fi);
153
+ FrtStoreValue store = FRT_STORE_YES;
154
+ FrtCompressionType compression = FRT_COMPRESSION_NONE;
155
+ FrtIndexValue index = FRT_INDEX_YES;
156
+ FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
157
+ float boost = 1.0f;
158
+
159
+ rb_scan_args(argc, argv, "11", &rname, &roptions);
160
+ if (argc > 1) {
161
+ frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
162
+ }
163
+ fi = frt_fi_init(fi, frb_field(rname), store, compression, index, term_vector);
164
+ fi->boost = boost;
165
+ fi->rfi = self;
166
+ return self;
167
+ }
168
+
169
+ /*
170
+ * call-seq:
171
+ * fi.name -> symbol
172
+ *
173
+ * Return the name of the field
174
+ */
175
+ static VALUE frb_fi_name(VALUE self) {
176
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
177
+ return ID2SYM(fi->name);
178
+ }
179
+
180
+ /*
181
+ * call-seq:
182
+ * fi.stored? -> bool
183
+ *
184
+ * Return true if the field is stored in the index.
185
+ */
186
+ static VALUE frb_fi_is_stored(VALUE self) {
187
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
188
+ return fi_is_stored(fi) ? Qtrue : Qfalse;
189
+ }
190
+
191
+ /*
192
+ * call-seq:
193
+ * fi.compressed? -> bool
194
+ *
195
+ * Return true if the field is stored in the index in compressed format.
196
+ */
197
+ static VALUE frb_fi_is_compressed(VALUE self) {
198
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
199
+ return fi_is_compressed(fi) ? Qtrue : Qfalse;
200
+ }
201
+
202
+ /*
203
+ * call-seq:
204
+ * fi.indexed? -> bool
205
+ *
206
+ * Return true if the field is indexed, ie searchable in the index.
207
+ */
208
+ static VALUE frb_fi_is_indexed(VALUE self) {
209
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
210
+ return fi_is_indexed(fi) ? Qtrue : Qfalse;
211
+ }
212
+
213
+ /*
214
+ * call-seq:
215
+ * fi.tokenized? -> bool
216
+ *
217
+ * Return true if the field is tokenized. Tokenizing is the process of
218
+ * breaking the field up into tokens. That is "the quick brown fox" becomes:
219
+ *
220
+ * ["the", "quick", "brown", "fox"]
221
+ *
222
+ * A field can only be tokenized if it is indexed.
223
+ */
224
+ static VALUE frb_fi_is_tokenized(VALUE self) {
225
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
226
+ return fi_is_tokenized(fi) ? Qtrue : Qfalse;
227
+ }
228
+
229
+ /*
230
+ * call-seq:
231
+ * fi.omit_norms? -> bool
232
+ *
233
+ * Return true if the field omits the norm file. The norm file is the file
234
+ * used to store the field boosts for an indexed field. If you do not boost
235
+ * any fields, and you can live without scoring based on field length then
236
+ * you can omit the norms file. This will give the index a slight performance
237
+ * boost and it will use less memory, especially for indexes which have a
238
+ * large number of documents.
239
+ */
240
+ static VALUE frb_fi_omit_norms(VALUE self) {
241
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
242
+ return fi_omit_norms(fi) ? Qtrue : Qfalse;
243
+ }
244
+
245
+ /*
246
+ * call-seq:
247
+ * fi.store_term_vector? -> bool
248
+ *
249
+ * Return true if the term-vectors are stored for this field.
250
+ */
251
+ static VALUE frb_fi_store_term_vector(VALUE self) {
252
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
253
+ return fi_store_term_vector(fi) ? Qtrue : Qfalse;
254
+ }
255
+
256
+ /*
257
+ * call-seq:
258
+ * fi.store_positions? -> bool
259
+ *
260
+ * Return true if positions are stored with the term-vectors for this field.
261
+ */
262
+ static VALUE frb_fi_store_positions(VALUE self) {
263
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
264
+ return fi_store_positions(fi) ? Qtrue : Qfalse;
265
+ }
266
+
267
+ /*
268
+ * call-seq:
269
+ * fi.store_offsets? -> bool
270
+ *
271
+ * Return true if offsets are stored with the term-vectors for this field.
272
+ */
273
+ static VALUE frb_fi_store_offsets(VALUE self) {
274
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
275
+ return fi_store_offsets(fi) ? Qtrue : Qfalse;
276
+ }
277
+
278
+ /*
279
+ * call-seq:
280
+ * fi.has_norms? -> bool
281
+ *
282
+ * Return true if this field has a norms file. This is the same as calling;
283
+ *
284
+ * fi.indexed? and not fi.omit_norms?
285
+ */
286
+ static VALUE frb_fi_has_norms(VALUE self) {
287
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
288
+ return fi_has_norms(fi) ? Qtrue : Qfalse;
289
+ }
290
+
291
+ /*
292
+ * call-seq:
293
+ * fi.boost -> boost
294
+ *
295
+ * Return the default boost for this field
296
+ */
297
+ static VALUE frb_fi_boost(VALUE self) {
298
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
299
+ return rb_float_new((double)fi->boost);
300
+ }
301
+
302
+ /*
303
+ * call-seq:
304
+ * fi.to_s -> string
305
+ *
306
+ * Return a string representation of the FieldInfo object.
307
+ */
308
+ static VALUE frb_fi_to_s(VALUE self) {
309
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
310
+ char *fi_s = frt_fi_to_s(fi);
311
+ VALUE rfi_s = rb_str_new2(fi_s);
312
+ free(fi_s);
313
+ return rfi_s;
314
+ }
315
+
316
+ /*
317
+ * call-seq:
318
+ * fi.to_h -> Hssh
319
+ *
320
+ * Return a Hash representation of the FieldInfo object.
321
+ */
322
+ static VALUE frb_fi_to_h(VALUE self) {
323
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
324
+ VALUE hash = rb_hash_new();
325
+ VALUE val;
326
+ bool o;
327
+
328
+ // :index
329
+ if (!fi_is_indexed(fi)) val = sym_no;
330
+ else {
331
+ bool t = fi_is_tokenized(fi);
332
+ o = fi_omit_norms(fi);
333
+ if (!t && o) val = sym_untokenized_omit_norms;
334
+ else if (t && o) val = sym_omit_norms;
335
+ else if (!t && !o) val = sym_untokenized;
336
+ else val = sym_yes;
337
+ }
338
+ rb_hash_aset(hash, sym_index, val);
339
+
340
+ // :store
341
+ rb_hash_aset(hash, sym_store, fi_is_stored(fi) ? sym_yes : sym_no);
342
+
343
+ // :compress
344
+ if (!fi_is_compressed(fi)) val = sym_no;
345
+ else {
346
+ if (fi_is_compressed_brotli(fi)) val = sym_brotli;
347
+ else if (fi_is_compressed_bz2(fi)) val = sym_bz2;
348
+ else if (fi_is_compressed_lz4(fi)) val = sym_lz4;
349
+ else val = sym_yes;
350
+ }
351
+ rb_hash_aset(hash, sym_compression, val);
352
+
353
+ // :term_vector
354
+ if (!fi_store_term_vector(fi)) val = sym_no;
355
+ else {
356
+ bool p = fi_store_positions(fi);
357
+ o = fi_store_offsets(fi);
358
+ if (p && o) val = sym_with_positions_offsets;
359
+ else if (o) val = sym_with_offsets;
360
+ else if (p) val = sym_with_positions;
361
+ else val = sym_yes;
362
+ }
363
+ rb_hash_aset(hash, sym_term_vector, val);
364
+
365
+ // :boost
366
+ rb_hash_aset(hash, sym_boost, rb_float_new((double)fi->boost));
367
+
368
+ return hash;
369
+ }
370
+
371
+ /*
372
+ * Document-class: Ferret::Index::FieldInfo
373
+ *
374
+ * == Summary
375
+ *
376
+ * The FieldInfo class is the field descriptor for the index. It specifies
377
+ * whether a field is compressed or not or whether it should be indexed and
378
+ * tokenized. Every field has a name which must be a symbol. There are three
379
+ * properties that you can set, +:store+, +:index+ and +:term_vector+. You
380
+ * can also set the default +:boost+ for a field as well.
381
+ *
382
+ * == Properties
383
+ *
384
+ * === :store
385
+ *
386
+ * The +:store+ property allows you to specify how a field is stored. You can
387
+ * leave a field unstored (+:no+), store it in it's original format (+:yes+)
388
+ * or store it in compressed format (+:compressed+). By default the document
389
+ * is stored in its original format. If the field is large and it is stored
390
+ * elsewhere where it is easily accessible you might want to leave it
391
+ * unstored. This will keep the index size a lot smaller and make the
392
+ * indexing process a lot faster. For example, you should probably leave the
393
+ * +:content+ field unstored when indexing all the documents in your
394
+ * file-system.
395
+ *
396
+ * === :index
397
+ *
398
+ * The +:index+ property allows you to specify how a field is indexed. A
399
+ * field must be indexed to be searchable. However, a field doesn't need to
400
+ * be indexed to be store in the Ferret index. You may want to use the index
401
+ * as a simple database and store things like images or MP3s in the index. By
402
+ * default each field is indexed and tokenized (split into tokens) (+:yes+).
403
+ * If you don't want to index the field use +:no+. If you want the field
404
+ * indexed but not tokenized, use +:untokenized+. Do this for the fields you
405
+ * wish to sort by. There are two other values for +:index+; +:omit_norms+
406
+ * and +:untokenized_omit_norms+. These values correspond to +:yes+ and
407
+ * +:untokenized+ respectively and are useful if you are not boosting any
408
+ * fields and you'd like to speed up the index. The norms file is the file
409
+ * which contains the boost values for each document for a particular field.
410
+ *
411
+ * === :term_vector
412
+ *
413
+ * See TermVector for a description of term-vectors. You can specify whether
414
+ * or not you would like to store term-vectors. The available options are
415
+ * +:no+, +:yes+, +:with_positions+, +:with_offsets+ and
416
+ * +:with_positions_offsets+. Note that you need to store the positions to
417
+ * associate offsets with individual terms in the term_vector.
418
+ *
419
+ * == Property Table
420
+ *
421
+ * Property Value Description
422
+ * ------------------------------------------------------------------------
423
+ * :store | :no | Don't store field
424
+ * | |
425
+ * | :yes (default) | Store field in its original
426
+ * | | format. Use this value if you
427
+ * | | want to highlight matches.
428
+ * | | or print match excerpts a la
429
+ * | | Google search.
430
+ * -------------|-------------------------|------------------------------
431
+ * :compression | :no (default) | Don't compress stored field
432
+ * | |
433
+ * | :brotli | Compress field using Brotli
434
+ * | |
435
+ * | :bz2 | Compress field using BZip2
436
+ * | |
437
+ * | :lz4 | Compress field using LZ4
438
+ * -------------|-------------------------|------------------------------
439
+ * :index | :no | Do not make this field
440
+ * | | searchable.
441
+ * | |
442
+ * | :yes (default) | Make this field searchable and
443
+ * | | tokenize its contents.
444
+ * | |
445
+ * | :untokenized | Make this field searchable but
446
+ * | | do not tokenize its contents.
447
+ * | | use this value for fields you
448
+ * | | wish to sort by.
449
+ * | |
450
+ * | :omit_norms | Same as :yes except omit the
451
+ * | | norms file. The norms file can
452
+ * | | be omitted if you don't boost
453
+ * | | any fields and you don't need
454
+ * | | scoring based on field length.
455
+ * | |
456
+ * | :untokenized_omit_norms | Same as :untokenized except omit
457
+ * | | the norms file. Norms files can
458
+ * | | be omitted if you don't boost
459
+ * | | any fields and you don't need
460
+ * | | scoring based on field length.
461
+ * | |
462
+ * -------------|-------------------------|------------------------------
463
+ * :term_vector | :no | Don't store term-vectors
464
+ * | |
465
+ * | :yes | Store term-vectors without
466
+ * | | storing positions or offsets.
467
+ * | |
468
+ * | :with_positions | Store term-vectors with
469
+ * | | positions.
470
+ * | |
471
+ * | :with_offsets | Store term-vectors with
472
+ * | | offsets.
473
+ * | |
474
+ * | :with_positions_offsets | Store term-vectors with
475
+ * | (default) | positions and offsets.
476
+ * -------------|-------------------------|------------------------------
477
+ * :boost | Float | The boost property is used to
478
+ * | | set the default boost for a
479
+ * | | field. This boost value will
480
+ * | | used for all instances of the
481
+ * | | field in the index unless
482
+ * | | otherwise specified when you
483
+ * | | create the field. All values
484
+ * | | should be positive.
485
+ * | |
486
+ *
487
+ * == Examples
488
+ *
489
+ * fi = FieldInfo.new(:title, :index => :untokenized, :term_vector => :no,
490
+ * :boost => 10.0)
491
+ *
492
+ * fi = FieldInfo.new(:content)
493
+ *
494
+ * fi = FieldInfo.new(:created_on, :index => :untokenized_omit_norms,
495
+ * :term_vector => :no)
496
+ *
497
+ * fi = FieldInfo.new(:image, :store => :yes, :compression => :brotli, :index => :no,
498
+ * :term_vector => :no)
499
+ */
500
+ void Init_FieldInfo(void) {
501
+ sym_store = ID2SYM(rb_intern("store"));
502
+ sym_index = ID2SYM(rb_intern("index"));
503
+ sym_term_vector = ID2SYM(rb_intern("term_vector"));
504
+
505
+ sym_brotli = ID2SYM(rb_intern("brotli"));
506
+ sym_bz2 = ID2SYM(rb_intern("bz2"));
507
+ sym_lz4 = ID2SYM(rb_intern("lz4"));
508
+ // sym_level = ID2SYM(rb_intern("level"));
509
+ sym_compression = ID2SYM(rb_intern("compression"));
510
+
511
+ sym_untokenized = ID2SYM(rb_intern("untokenized"));
512
+ sym_omit_norms = ID2SYM(rb_intern("omit_norms"));
513
+ sym_untokenized_omit_norms = ID2SYM(rb_intern("untokenized_omit_norms"));
514
+
515
+ sym_with_positions = ID2SYM(rb_intern("with_positions"));
516
+ sym_with_offsets = ID2SYM(rb_intern("with_offsets"));
517
+ sym_with_positions_offsets = ID2SYM(rb_intern("with_positions_offsets"));
518
+
519
+ cFieldInfo = rb_define_class_under(mIndex, "FieldInfo", rb_cObject);
520
+ rb_define_alloc_func(cFieldInfo, frb_fi_alloc);
521
+
522
+ rb_define_method(cFieldInfo, "initialize", frb_fi_init, -1);
523
+ rb_define_method(cFieldInfo, "name", frb_fi_name, 0);
524
+ rb_define_method(cFieldInfo, "stored?", frb_fi_is_stored, 0);
525
+ rb_define_method(cFieldInfo, "compressed?", frb_fi_is_compressed, 0);
526
+ rb_define_method(cFieldInfo, "indexed?", frb_fi_is_indexed, 0);
527
+ rb_define_method(cFieldInfo, "tokenized?", frb_fi_is_tokenized, 0);
528
+ rb_define_method(cFieldInfo, "omit_norms?", frb_fi_omit_norms, 0);
529
+ rb_define_method(cFieldInfo, "store_term_vector?",
530
+ frb_fi_store_term_vector, 0);
531
+ rb_define_method(cFieldInfo, "store_positions?",
532
+ frb_fi_store_positions, 0);
533
+ rb_define_method(cFieldInfo, "store_offsets?",
534
+ frb_fi_store_offsets, 0);
535
+ rb_define_method(cFieldInfo, "has_norms?", frb_fi_has_norms, 0);
536
+ rb_define_method(cFieldInfo, "boost", frb_fi_boost, 0);
537
+ rb_define_method(cFieldInfo, "to_s", frb_fi_to_s, 0);
538
+ rb_define_method(cFieldInfo, "to_h", frb_fi_to_h, 0);
539
+ }
@@ -1,11 +1,8 @@
1
1
  #include "frt_index.h"
2
2
  #include "isomorfeus_ferret.h"
3
3
 
4
- // #undef close
5
-
6
4
  VALUE mIndex;
7
5
 
8
- VALUE cFieldInfo;
9
6
  VALUE cFieldInfos;
10
7
 
11
8
  VALUE cTVOffsets;
@@ -19,10 +16,11 @@ VALUE cIndexWriter;
19
16
  VALUE cIndexReader;
20
17
 
21
18
  VALUE sym_analyzer;
19
+ VALUE sym_boost;
20
+
22
21
  static VALUE sym_close_dir;
23
22
  static VALUE sym_create;
24
23
  static VALUE sym_create_if_missing;
25
-
26
24
  static VALUE sym_chunk_size;
27
25
  static VALUE sym_max_buffer_memory;
28
26
  static VALUE sym_index_interval;
@@ -32,29 +30,9 @@ static VALUE sym_max_buffered_docs;
32
30
  static VALUE sym_max_merge_docs;
33
31
  static VALUE sym_max_field_length;
34
32
  static VALUE sym_use_compound_file;
35
-
36
- static VALUE sym_boost;
37
33
  static VALUE sym_field_infos;
38
34
 
39
- static VALUE sym_store;
40
- static VALUE sym_index;
41
- static VALUE sym_term_vector;
42
-
43
- static VALUE sym_brotli;
44
- static VALUE sym_bz2;
45
- static VALUE sym_lz4;
46
- static VALUE sym_compression;
47
-
48
- static VALUE sym_untokenized;
49
- static VALUE sym_omit_norms;
50
- static VALUE sym_untokenized_omit_norms;
51
-
52
- static VALUE sym_with_positions;
53
- static VALUE sym_with_offsets;
54
- static VALUE sym_with_positions_offsets;
55
-
56
35
  static ID fsym_content;
57
-
58
36
  static ID id_term;
59
37
  static ID id_fld_num_map;
60
38
  static ID id_field_num;
@@ -62,310 +40,15 @@ static ID id_boost;
62
40
 
63
41
  extern VALUE sym_each;
64
42
  extern rb_encoding *utf8_encoding;
65
- extern void frb_set_term(VALUE rterm, FrtTerm *t);
43
+ extern void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost);
66
44
  extern FrtAnalyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
67
45
  extern VALUE frb_get_analyzer(FrtAnalyzer *a);
46
+ extern VALUE frb_get_field_info(FrtFieldInfo *fi);
68
47
  extern VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc);
69
- extern void Init_LazyDoc(void);
70
-
71
- /****************************************************************************
72
- *
73
- * FieldInfo Methods
74
- *
75
- ****************************************************************************/
76
-
77
- static void frb_fi_free(void *p) {
78
- frt_fi_deref((FrtFieldInfo *)p);
79
- }
80
-
81
- static void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost) {
82
- VALUE v;
83
- Check_Type(roptions, T_HASH);
84
- v = rb_hash_aref(roptions, sym_boost);
85
- if (Qnil != v) {
86
- *boost = (float)NUM2DBL(v);
87
- } else {
88
- *boost = 1.0f;
89
- }
90
- v = rb_hash_aref(roptions, sym_store);
91
- if (Qnil != v) Check_Type(v, T_SYMBOL);
92
- if (v == sym_no || v == sym_false || v == Qfalse) {
93
- *store = FRT_STORE_NO;
94
- } else if (v == sym_yes || v == sym_true || v == Qtrue) {
95
- *store = FRT_STORE_YES;
96
- } else if (v == Qnil) {
97
- /* leave as default */
98
- } else {
99
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]",
100
- rb_id2name(SYM2ID(v)));
101
- }
102
-
103
- v = rb_hash_aref(roptions, sym_compression);
104
- if (Qnil != v) Check_Type(v, T_SYMBOL);
105
- if (v == sym_no || v == sym_false || v == Qfalse) {
106
- *compression = FRT_COMPRESSION_NONE;
107
- } else if (v == sym_yes || v == sym_true || v == Qtrue || v == sym_brotli) {
108
- *compression = FRT_COMPRESSION_BROTLI;
109
- } else if (v == sym_bz2) {
110
- *compression = FRT_COMPRESSION_BZ2;
111
- } else if (v == sym_lz4) {
112
- *compression = FRT_COMPRESSION_LZ4;
113
- } else if (v == Qnil) {
114
- /* leave as default */
115
- } else {
116
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]",
117
- rb_id2name(SYM2ID(v)));
118
- }
119
-
120
- v = rb_hash_aref(roptions, sym_index);
121
- if (Qnil != v) Check_Type(v, T_SYMBOL);
122
- if (v == sym_no || v == sym_false || v == Qfalse) {
123
- *index = FRT_INDEX_NO;
124
- } else if (v == sym_yes || v == sym_true || v == Qtrue) {
125
- *index = FRT_INDEX_YES;
126
- } else if (v == sym_untokenized) {
127
- *index = FRT_INDEX_UNTOKENIZED;
128
- } else if (v == sym_omit_norms) {
129
- *index = FRT_INDEX_YES_OMIT_NORMS;
130
- } else if (v == sym_untokenized_omit_norms) {
131
- *index = FRT_INDEX_UNTOKENIZED_OMIT_NORMS;
132
- } else if (v == Qnil) {
133
- /* leave as default */
134
- } else {
135
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :index. Please choose from [:no, :yes, :untokenized, "
136
- ":omit_norms, :untokenized_omit_norms]", rb_id2name(SYM2ID(v)));
137
- }
138
-
139
- v = rb_hash_aref(roptions, sym_term_vector);
140
- if (Qnil != v) Check_Type(v, T_SYMBOL);
141
- if (v == sym_no || v == sym_false || v == Qfalse) {
142
- *term_vector = FRT_TERM_VECTOR_NO;
143
- } else if (v == sym_yes || v == sym_true || v == Qtrue) {
144
- *term_vector = FRT_TERM_VECTOR_YES;
145
- } else if (v == sym_with_positions) {
146
- *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS;
147
- } else if (v == sym_with_offsets) {
148
- *term_vector = FRT_TERM_VECTOR_WITH_OFFSETS;
149
- } else if (v == sym_with_positions_offsets) {
150
- *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
151
- } else if (v == Qnil) {
152
- /* leave as default */
153
- } else {
154
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :term_vector. Please choose from [:no, :yes, "
155
- ":with_positions, :with_offsets, :with_positions_offsets]", rb_id2name(SYM2ID(v)));
156
- }
157
- }
158
-
159
- static size_t frb_fi_size(const void *p) {
160
- return sizeof(FrtFieldInfo);
161
- (void)p;
162
- }
163
-
164
- const rb_data_type_t frb_field_info_t = {
165
- .wrap_struct_name = "FrbFieldInfo",
166
- .function = {
167
- .dmark = NULL,
168
- .dfree = frb_fi_free,
169
- .dsize = frb_fi_size,
170
- .dcompact = NULL,
171
- .reserved = {0},
172
- },
173
- .parent = NULL,
174
- .data = NULL,
175
- .flags = RUBY_TYPED_FREE_IMMEDIATELY
176
- };
177
-
178
- static VALUE frb_get_field_info(FrtFieldInfo *fi) {
179
- if (fi) {
180
- if (fi->rfi == 0 || fi->rfi == Qnil) {
181
- fi->rfi = TypedData_Wrap_Struct(cFieldInfo, &frb_field_info_t, fi);
182
- FRT_REF(fi);
183
- }
184
- return fi->rfi;
185
- }
186
- return Qnil;
187
- }
188
-
189
- /*
190
- * call-seq:
191
- * FieldInfo.new(name, options = {}) -> field_info
192
- *
193
- * Create a new FieldInfo object with the name +name+ and the properties
194
- * specified in +options+. The available options are [:store, :compression,
195
- * :index, :term_vector, :boost]. See the description of FieldInfo for more
196
- * information on these properties.
197
- */
198
- static VALUE frb_fi_alloc(VALUE rclass) {
199
- FrtFieldInfo *fi = frt_fi_alloc();
200
- return TypedData_Wrap_Struct(rclass, &frb_field_info_t, fi);
201
- }
202
-
203
- static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
204
- VALUE roptions, rname;
205
- FrtFieldInfo *fi;
206
- TypedData_Get_Struct(self, FrtFieldInfo, &frb_field_info_t, fi);
207
- FrtStoreValue store = FRT_STORE_YES;
208
- FrtCompressionType compression = FRT_COMPRESSION_NONE;
209
- FrtIndexValue index = FRT_INDEX_YES;
210
- FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
211
- float boost = 1.0f;
212
-
213
- rb_scan_args(argc, argv, "11", &rname, &roptions);
214
- if (argc > 1) {
215
- frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
216
- }
217
- fi = frt_fi_init(fi, frb_field(rname), store, compression, index, term_vector);
218
- fi->boost = boost;
219
- fi->rfi = self;
220
- return self;
221
- }
222
-
223
- /*
224
- * call-seq:
225
- * fi.name -> symbol
226
- *
227
- * Return the name of the field
228
- */
229
- static VALUE frb_fi_name(VALUE self) {
230
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
231
- return ID2SYM(fi->name);
232
- }
233
-
234
- /*
235
- * call-seq:
236
- * fi.stored? -> bool
237
- *
238
- * Return true if the field is stored in the index.
239
- */
240
- static VALUE frb_fi_is_stored(VALUE self) {
241
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
242
- return fi_is_stored(fi) ? Qtrue : Qfalse;
243
- }
244
-
245
- /*
246
- * call-seq:
247
- * fi.compressed? -> bool
248
- *
249
- * Return true if the field is stored in the index in compressed format.
250
- */
251
- static VALUE frb_fi_is_compressed(VALUE self) {
252
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
253
- return fi_is_compressed(fi) ? Qtrue : Qfalse;
254
- }
255
-
256
- /*
257
- * call-seq:
258
- * fi.indexed? -> bool
259
- *
260
- * Return true if the field is indexed, ie searchable in the index.
261
- */
262
- static VALUE frb_fi_is_indexed(VALUE self) {
263
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
264
- return fi_is_indexed(fi) ? Qtrue : Qfalse;
265
- }
266
-
267
- /*
268
- * call-seq:
269
- * fi.tokenized? -> bool
270
- *
271
- * Return true if the field is tokenized. Tokenizing is the process of
272
- * breaking the field up into tokens. That is "the quick brown fox" becomes:
273
- *
274
- * ["the", "quick", "brown", "fox"]
275
- *
276
- * A field can only be tokenized if it is indexed.
277
- */
278
- static VALUE frb_fi_is_tokenized(VALUE self) {
279
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
280
- return fi_is_tokenized(fi) ? Qtrue : Qfalse;
281
- }
282
-
283
- /*
284
- * call-seq:
285
- * fi.omit_norms? -> bool
286
- *
287
- * Return true if the field omits the norm file. The norm file is the file
288
- * used to store the field boosts for an indexed field. If you do not boost
289
- * any fields, and you can live without scoring based on field length then
290
- * you can omit the norms file. This will give the index a slight performance
291
- * boost and it will use less memory, especially for indexes which have a
292
- * large number of documents.
293
- */
294
- static VALUE frb_fi_omit_norms(VALUE self) {
295
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
296
- return fi_omit_norms(fi) ? Qtrue : Qfalse;
297
- }
298
-
299
- /*
300
- * call-seq:
301
- * fi.store_term_vector? -> bool
302
- *
303
- * Return true if the term-vectors are stored for this field.
304
- */
305
- static VALUE frb_fi_store_term_vector(VALUE self) {
306
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
307
- return fi_store_term_vector(fi) ? Qtrue : Qfalse;
308
- }
309
-
310
- /*
311
- * call-seq:
312
- * fi.store_positions? -> bool
313
- *
314
- * Return true if positions are stored with the term-vectors for this field.
315
- */
316
- static VALUE frb_fi_store_positions(VALUE self) {
317
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
318
- return fi_store_positions(fi) ? Qtrue : Qfalse;
319
- }
320
-
321
- /*
322
- * call-seq:
323
- * fi.store_offsets? -> bool
324
- *
325
- * Return true if offsets are stored with the term-vectors for this field.
326
- */
327
- static VALUE frb_fi_store_offsets(VALUE self) {
328
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
329
- return fi_store_offsets(fi) ? Qtrue : Qfalse;
330
- }
331
-
332
- /*
333
- * call-seq:
334
- * fi.has_norms? -> bool
335
- *
336
- * Return true if this field has a norms file. This is the same as calling;
337
- *
338
- * fi.indexed? and not fi.omit_norms?
339
- */
340
- static VALUE frb_fi_has_norms(VALUE self) {
341
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
342
- return fi_has_norms(fi) ? Qtrue : Qfalse;
343
- }
344
-
345
- /*
346
- * call-seq:
347
- * fi.boost -> boost
348
- *
349
- * Return the default boost for this field
350
- */
351
- static VALUE frb_fi_boost(VALUE self) {
352
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
353
- return rb_float_new((double)fi->boost);
354
- }
48
+ extern void frb_set_term(VALUE rterm, FrtTerm *t);
355
49
 
356
- /*
357
- * call-seq:
358
- * fi.to_s -> string
359
- *
360
- * Return a string representation of the FieldInfo object.
361
- */
362
- static VALUE frb_fi_to_s(VALUE self) {
363
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
364
- char *fi_s = frt_fi_to_s(fi);
365
- VALUE rfi_s = rb_str_new2(fi_s);
366
- free(fi_s);
367
- return rfi_s;
368
- }
50
+ extern void Init_FieldInfo(void);
51
+ extern void Init_LazyDoc(void);
369
52
 
370
53
  /****************************************************************************
371
54
  *
@@ -2656,178 +2339,6 @@ static VALUE frb_ir_each(VALUE self) {
2656
2339
  *
2657
2340
  ****************************************************************************/
2658
2341
 
2659
-
2660
- /*
2661
- * Document-class: Ferret::Index::FieldInfo
2662
- *
2663
- * == Summary
2664
- *
2665
- * The FieldInfo class is the field descriptor for the index. It specifies
2666
- * whether a field is compressed or not or whether it should be indexed and
2667
- * tokenized. Every field has a name which must be a symbol. There are three
2668
- * properties that you can set, +:store+, +:index+ and +:term_vector+. You
2669
- * can also set the default +:boost+ for a field as well.
2670
- *
2671
- * == Properties
2672
- *
2673
- * === :store
2674
- *
2675
- * The +:store+ property allows you to specify how a field is stored. You can
2676
- * leave a field unstored (+:no+), store it in it's original format (+:yes+)
2677
- * or store it in compressed format (+:compressed+). By default the document
2678
- * is stored in its original format. If the field is large and it is stored
2679
- * elsewhere where it is easily accessible you might want to leave it
2680
- * unstored. This will keep the index size a lot smaller and make the
2681
- * indexing process a lot faster. For example, you should probably leave the
2682
- * +:content+ field unstored when indexing all the documents in your
2683
- * file-system.
2684
- *
2685
- * === :index
2686
- *
2687
- * The +:index+ property allows you to specify how a field is indexed. A
2688
- * field must be indexed to be searchable. However, a field doesn't need to
2689
- * be indexed to be store in the Ferret index. You may want to use the index
2690
- * as a simple database and store things like images or MP3s in the index. By
2691
- * default each field is indexed and tokenized (split into tokens) (+:yes+).
2692
- * If you don't want to index the field use +:no+. If you want the field
2693
- * indexed but not tokenized, use +:untokenized+. Do this for the fields you
2694
- * wish to sort by. There are two other values for +:index+; +:omit_norms+
2695
- * and +:untokenized_omit_norms+. These values correspond to +:yes+ and
2696
- * +:untokenized+ respectively and are useful if you are not boosting any
2697
- * fields and you'd like to speed up the index. The norms file is the file
2698
- * which contains the boost values for each document for a particular field.
2699
- *
2700
- * === :term_vector
2701
- *
2702
- * See TermVector for a description of term-vectors. You can specify whether
2703
- * or not you would like to store term-vectors. The available options are
2704
- * +:no+, +:yes+, +:with_positions+, +:with_offsets+ and
2705
- * +:with_positions_offsets+. Note that you need to store the positions to
2706
- * associate offsets with individual terms in the term_vector.
2707
- *
2708
- * == Property Table
2709
- *
2710
- * Property Value Description
2711
- * ------------------------------------------------------------------------
2712
- * :store | :no | Don't store field
2713
- * | |
2714
- * | :yes (default) | Store field in its original
2715
- * | | format. Use this value if you
2716
- * | | want to highlight matches.
2717
- * | | or print match excerpts a la
2718
- * | | Google search.
2719
- * -------------|-------------------------|------------------------------
2720
- * :compression | :no (default) | Don't compress stored field
2721
- * | |
2722
- * | :brotli | Compress field using Brotli
2723
- * | |
2724
- * | :bz2 | Compress field using BZip2
2725
- * | |
2726
- * | :lz4 | Compress field using LZ4
2727
- * -------------|-------------------------|------------------------------
2728
- * :index | :no | Do not make this field
2729
- * | | searchable.
2730
- * | |
2731
- * | :yes (default) | Make this field searchable and
2732
- * | | tokenized its contents.
2733
- * | |
2734
- * | :untokenized | Make this field searchable but
2735
- * | | do not tokenize its contents.
2736
- * | | use this value for fields you
2737
- * | | wish to sort by.
2738
- * | |
2739
- * | :omit_norms | Same as :yes except omit the
2740
- * | | norms file. The norms file can
2741
- * | | be omitted if you don't boost
2742
- * | | any fields and you don't need
2743
- * | | scoring based on field length.
2744
- * | |
2745
- * | :untokenized_omit_norms | Same as :untokenized except omit
2746
- * | | the norms file. Norms files can
2747
- * | | be omitted if you don't boost
2748
- * | | any fields and you don't need
2749
- * | | scoring based on field length.
2750
- * | |
2751
- * -------------|-------------------------|------------------------------
2752
- * :term_vector | :no | Don't store term-vectors
2753
- * | |
2754
- * | :yes | Store term-vectors without
2755
- * | | storing positions or offsets.
2756
- * | |
2757
- * | :with_positions | Store term-vectors with
2758
- * | | positions.
2759
- * | |
2760
- * | :with_offsets | Store term-vectors with
2761
- * | | offsets.
2762
- * | |
2763
- * | :with_positions_offsets | Store term-vectors with
2764
- * | (default) | positions and offsets.
2765
- * -------------|-------------------------|------------------------------
2766
- * :boost | Float | The boost property is used to
2767
- * | | set the default boost for a
2768
- * | | field. This boost value will
2769
- * | | used for all instances of the
2770
- * | | field in the index unless
2771
- * | | otherwise specified when you
2772
- * | | create the field. All values
2773
- * | | should be positive.
2774
- * | |
2775
- *
2776
- * == Examples
2777
- *
2778
- * fi = FieldInfo.new(:title, :index => :untokenized, :term_vector => :no,
2779
- * :boost => 10.0)
2780
- *
2781
- * fi = FieldInfo.new(:content)
2782
- *
2783
- * fi = FieldInfo.new(:created_on, :index => :untokenized_omit_norms,
2784
- * :term_vector => :no)
2785
- *
2786
- * fi = FieldInfo.new(:image, :store => :yes, :compression => :brotli, :index => :no,
2787
- * :term_vector => :no)
2788
- */
2789
- static void
2790
- Init_FieldInfo(void)
2791
- {
2792
- sym_store = ID2SYM(rb_intern("store"));
2793
- sym_index = ID2SYM(rb_intern("index"));
2794
- sym_term_vector = ID2SYM(rb_intern("term_vector"));
2795
-
2796
- sym_brotli = ID2SYM(rb_intern("brotli"));
2797
- sym_bz2 = ID2SYM(rb_intern("bz2"));
2798
- sym_lz4 = ID2SYM(rb_intern("lz4"));
2799
- // sym_level = ID2SYM(rb_intern("level"));
2800
- sym_compression = ID2SYM(rb_intern("compression"));
2801
-
2802
- sym_untokenized = ID2SYM(rb_intern("untokenized"));
2803
- sym_omit_norms = ID2SYM(rb_intern("omit_norms"));
2804
- sym_untokenized_omit_norms = ID2SYM(rb_intern("untokenized_omit_norms"));
2805
-
2806
- sym_with_positions = ID2SYM(rb_intern("with_positions"));
2807
- sym_with_offsets = ID2SYM(rb_intern("with_offsets"));
2808
- sym_with_positions_offsets = ID2SYM(rb_intern("with_positions_offsets"));
2809
-
2810
- cFieldInfo = rb_define_class_under(mIndex, "FieldInfo", rb_cObject);
2811
- rb_define_alloc_func(cFieldInfo, frb_fi_alloc);
2812
-
2813
- rb_define_method(cFieldInfo, "initialize", frb_fi_init, -1);
2814
- rb_define_method(cFieldInfo, "name", frb_fi_name, 0);
2815
- rb_define_method(cFieldInfo, "stored?", frb_fi_is_stored, 0);
2816
- rb_define_method(cFieldInfo, "compressed?", frb_fi_is_compressed, 0);
2817
- rb_define_method(cFieldInfo, "indexed?", frb_fi_is_indexed, 0);
2818
- rb_define_method(cFieldInfo, "tokenized?", frb_fi_is_tokenized, 0);
2819
- rb_define_method(cFieldInfo, "omit_norms?", frb_fi_omit_norms, 0);
2820
- rb_define_method(cFieldInfo, "store_term_vector?",
2821
- frb_fi_store_term_vector, 0);
2822
- rb_define_method(cFieldInfo, "store_positions?",
2823
- frb_fi_store_positions, 0);
2824
- rb_define_method(cFieldInfo, "store_offsets?",
2825
- frb_fi_store_offsets, 0);
2826
- rb_define_method(cFieldInfo, "has_norms?", frb_fi_has_norms, 0);
2827
- rb_define_method(cFieldInfo, "boost", frb_fi_boost, 0);
2828
- rb_define_method(cFieldInfo, "to_s", frb_fi_to_s, 0);
2829
- }
2830
-
2831
2342
  /*
2832
2343
  * Document-class: Ferret::Index::FieldInfos
2833
2344
  *
@@ -2,7 +2,7 @@
2
2
  #include "isomorfeus_ferret.h"
3
3
 
4
4
  static VALUE cQueryParser;
5
- VALUE cQueryParseException;
5
+ static VALUE cQueryParseException;
6
6
 
7
7
  extern VALUE sym_analyzer;
8
8
  static VALUE sym_wild_card_downcase;
@@ -3789,10 +3789,6 @@ VALUE frb_get_q(FrtQuery *q) {
3789
3789
  *
3790
3790
  ****************************************************************************/
3791
3791
 
3792
- /* rdochack
3793
- cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
3794
- */
3795
-
3796
3792
  /*
3797
3793
  * Document-class: Ferret::Search::Hit
3798
3794
  *
@@ -3806,9 +3802,6 @@ cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
3806
3802
  */
3807
3803
  static void Init_Hit(void) {
3808
3804
  const char *hit_class = "Hit";
3809
- /* rdochack
3810
- cHit = rb_define_class_under(mSearch, "Hit", rb_cObject);
3811
- */
3812
3805
  cHit = rb_struct_define(hit_class, "doc", "score", NULL);
3813
3806
  rb_set_class_path(cHit, mSearch, hit_class);
3814
3807
  rb_const_set(mSearch, rb_intern(hit_class), cHit);
@@ -3836,9 +3829,6 @@ static void Init_Hit(void) {
3836
3829
  */
3837
3830
  static void Init_TopDocs(void) {
3838
3831
  const char *td_class = "TopDocs";
3839
- /* rdochack
3840
- cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
3841
- */
3842
3832
  cTopDocs = rb_struct_define(td_class,
3843
3833
  "total_hits",
3844
3834
  "hits",
@@ -41,12 +41,12 @@ extern VALUE cLockError;
41
41
  extern VALUE cTerm;
42
42
 
43
43
  /* Ferret Inits */
44
- extern void Init_Utils();
45
- extern void Init_Analysis();
46
- extern void Init_Store();
47
- extern void Init_Index();
48
- extern void Init_Search();
49
- extern void Init_QueryParser();
44
+ extern void Init_Utils(void);
45
+ extern void Init_Analysis(void);
46
+ extern void Init_Store(void);
47
+ extern void Init_Index(void);
48
+ extern void Init_Search(void);
49
+ extern void Init_QueryParser(void);
50
50
 
51
51
  extern void frb_raise(int excode, const char *msg);
52
52
  extern void frb_create_dir(VALUE rpath);
@@ -384,14 +384,11 @@ module Isomorfeus
384
384
  # puts "hit document number #{doc} with a score of #{score}"
385
385
  # end
386
386
  #
387
- def search_each(query, options = {}) # :yield: doc, score
387
+ def search_each(query, options = {}, &block) # :yield: doc, score
388
388
  @dir.synchronize do
389
389
  ensure_searcher_open()
390
390
  query = do_process_query(query)
391
-
392
- @searcher.search_each(query, options) do |doc, score|
393
- yield doc, score
394
- end
391
+ @searcher.search_each(query, options, &block)
395
392
  end
396
393
  end
397
394
 
@@ -1,5 +1,5 @@
1
1
  module Isomorfeus
2
2
  module Ferret
3
- VERSION = '0.14.0'
3
+ VERSION = '0.14.1'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isomorfeus-ferret
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.14.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Biedermann
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-28 00:00:00.000000000 Z
11
+ date: 2022-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: oj
@@ -180,6 +180,7 @@ files:
180
180
  - ext/isomorfeus_ferret_ext/extconf.rb
181
181
  - ext/isomorfeus_ferret_ext/fio_tmpfile.h
182
182
  - ext/isomorfeus_ferret_ext/frb_analysis.c
183
+ - ext/isomorfeus_ferret_ext/frb_field_info.c
183
184
  - ext/isomorfeus_ferret_ext/frb_index.c
184
185
  - ext/isomorfeus_ferret_ext/frb_lazy_doc.c
185
186
  - ext/isomorfeus_ferret_ext/frb_qparser.c