isomorfeus-ferret 0.14.0 → 0.14.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e6893e7012cf75189d3ff378b6e869a831a5281472f84ea5ab4e354bd92bfcee
4
- data.tar.gz: 0a4cad49faae062c29e0bed8fd7f87c5e3875c548cbb6e168907719b52777306
3
+ metadata.gz: 5818fce6d84b9bd4814be3bbed270127e05297dcf85adeebc495c8f334430d88
4
+ data.tar.gz: 77c9c3246c7777947084b47620d3aeeeb9eb76d7b0a17a4d30a37a38547a54da
5
5
  SHA512:
6
- metadata.gz: 4e48ec64d99af7fe0440480f11f22fd79dd9fca0c6dd09ce58bc0953f7f556ee4f61bdf18810f5cb2f39c8f80c99b406c5319c93235d06974009cefb1c73fccb
7
- data.tar.gz: 254960eb7543fb59e1d12087f83feaeb4abd957314f76b56778baf7a2fef2e090922b4b99d653e8532e30690a14d90c661ec5f849588a7e384b2a65346f7f04d
6
+ metadata.gz: 59632a0b46b9bd247da0f8b3908654a8027fbcef2aadc897f7681d25b03d4404191d037be323f666ef9bae679c72b135318aa853158e6bf0205b754ec3b2b18f
7
+ data.tar.gz: 2a037003347c6bca0900bf80410e83f43d397400f37e22f112e6ef6893a568dba29561b12594f803f3b28baee9f5f1ae67595c244d91b7dffa9d06e4e493c891
data/LICENSE CHANGED
@@ -143,7 +143,7 @@ The following licenses apply to files, which are distributed within the repo
143
143
  but not distributed with the gem and not used at runtime:
144
144
 
145
145
 
146
- For the Reuter-21578 files in the misc/ferret_vs_lucene directory (corpus, etc.),
146
+ For the Reuter-21578 files in the misc/ferret_vs_others directory (corpus, etc.),
147
147
  used for research for developing search engine technology:
148
148
 
149
149
  The copyright for the text of newswire articles and Reuters
@@ -156,7 +156,7 @@ Distribution 1.0", and inform your readers of the current location of
156
156
  the data set (see "Availability & Questions").
157
157
 
158
158
 
159
- Apache Lucene jars in the misc/ferret_vs_lucene directory:
159
+ Apache Lucene jars in the misc/ferret_vs_others directory:
160
160
 
161
161
 
162
162
  Apache License
data/README.md CHANGED
@@ -69,7 +69,7 @@ Compression semantics have changed, now Brotli, BZip2 and LZ4 compression codecs
69
69
  - LZ4: fast compression, fast decrompression, low compression ratio
70
70
 
71
71
  To see performance and compression ratios `rake ferret_compression_bench` can be run from the cloned repo.
72
- It uses data and code within the misc/ferret_vs_lucene directory.
72
+ It uses data and code within the misc/ferret_vs_others directory.
73
73
 
74
74
  To compress a stored field the :compression option can be used with one of: :no, :brotli, :bz2 or :lz4.
75
75
  Example:
@@ -96,7 +96,7 @@ https://github.com/isomorfeus/isomorfeus-ferret/blob/master/lib/isomorfeus/ferre
96
96
  The query language and parser are documented here:
97
97
  https://github.com/isomorfeus/isomorfeus-ferret/blob/master/ext/isomorfeus_ferret_ext/frb_qparser.c
98
98
 
99
- Examples can be found in the 'test' directory or in 'misc/ferret_vs_lucene'.
99
+ Examples can be found in the 'test' directory or in 'misc/ferret_vs_others'.
100
100
 
101
101
  ## Running Specs
102
102
 
@@ -111,24 +111,24 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
111
111
  ### Indexing and Searching
112
112
  - clone repo
113
113
  - bundle install
114
- - rake ferret_vs_lucene
114
+ - rake ferret_vs_others
115
115
 
116
116
  A recent Java JDK must be installed to compile and run lucene benchmarks.
117
117
 
118
- Results, Ferret 0.14.0 vs. Lucene 9.1.0, WhitespaceAnalyzer,
119
- Linux Ubuntu 20.04, FreeBSD 13.1 and Windows 10 on old Intel Core i5 from 2015,
118
+ Results, Ferret 0.14.0 vs. Lucene 9.2.0, WhitespaceAnalyzer,
119
+ Linux Ubuntu 22.04, FreeBSD 13.1 and Windows 10 on old Intel Core i5 from 2015,
120
120
  LinuxPi on RaspberryPi 400:
121
121
 
122
122
  | OS | Task | Ferret | Lucene* |
123
123
  |---------|------------|-----------------|----------------|
124
- | Linux | Indexing | 5125 docs/s | 4671 docs/s |
124
+ | Linux | Indexing | 5125 docs/s | 4959 docs/s |
125
125
  | FreeBSD | Indexing | 4537 docs/s | 3831 docs/s |
126
126
  | Windows | Indexing | 2488 docs/s | 2588 docs/s |
127
- | LinuxPi | Indexing | 1200 docs/s | 551 docs/s |
127
+ | LinuxPi | Indexing | 1200 docs/s | 755 docs/s |
128
128
  | Linux | Searching | 26610 queries/s | 7165 queries/s |
129
129
  | FreeBSD | Searching | 24167 queries/s | 4288 queries/s |
130
130
  | Windows | Searching | 3901 queries/s | 1033 queries/s |
131
- | LinuxPi | Searching | 6194 queries/s | 769 queries/s |
131
+ | LinuxPi | Searching | 6194 queries/s | 785 queries/s |
132
132
  | | Index Size | 28 MB | 35 MB |
133
133
 
134
134
  * JVM Versions:
@@ -0,0 +1,539 @@
1
+ #include "frt_index.h"
2
+ #include "isomorfeus_ferret.h"
3
+
4
+ VALUE cFieldInfo;
5
+
6
+ static VALUE sym_store;
7
+ static VALUE sym_index;
8
+ static VALUE sym_compression;
9
+ static VALUE sym_brotli;
10
+ static VALUE sym_bz2;
11
+ static VALUE sym_lz4;
12
+ static VALUE sym_term_vector;
13
+ static VALUE sym_omit_norms;
14
+ static VALUE sym_untokenized;
15
+ static VALUE sym_untokenized_omit_norms;
16
+ static VALUE sym_with_offsets;
17
+ static VALUE sym_with_positions;
18
+ static VALUE sym_with_positions_offsets;
19
+
20
+ extern VALUE sym_boost;
21
+
22
+ void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost) {
23
+ VALUE v;
24
+ Check_Type(roptions, T_HASH);
25
+ v = rb_hash_aref(roptions, sym_boost);
26
+ if (Qnil != v) {
27
+ *boost = (float)NUM2DBL(v);
28
+ } else {
29
+ *boost = 1.0f;
30
+ }
31
+ v = rb_hash_aref(roptions, sym_store);
32
+ if (Qnil != v) Check_Type(v, T_SYMBOL);
33
+ if (v == sym_no || v == sym_false || v == Qfalse) {
34
+ *store = FRT_STORE_NO;
35
+ } else if (v == sym_yes || v == sym_true || v == Qtrue) {
36
+ *store = FRT_STORE_YES;
37
+ } else if (v == Qnil) {
38
+ /* leave as default */
39
+ } else {
40
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]",
41
+ rb_id2name(SYM2ID(v)));
42
+ }
43
+
44
+ v = rb_hash_aref(roptions, sym_compression);
45
+ if (Qnil != v) Check_Type(v, T_SYMBOL);
46
+ if (v == sym_no || v == sym_false || v == Qfalse) {
47
+ *compression = FRT_COMPRESSION_NONE;
48
+ } else if (v == sym_yes || v == sym_true || v == Qtrue || v == sym_brotli) {
49
+ *compression = FRT_COMPRESSION_BROTLI;
50
+ } else if (v == sym_bz2) {
51
+ *compression = FRT_COMPRESSION_BZ2;
52
+ } else if (v == sym_lz4) {
53
+ *compression = FRT_COMPRESSION_LZ4;
54
+ } else if (v == Qnil) {
55
+ /* leave as default */
56
+ } else {
57
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]",
58
+ rb_id2name(SYM2ID(v)));
59
+ }
60
+
61
+ v = rb_hash_aref(roptions, sym_index);
62
+ if (Qnil != v) Check_Type(v, T_SYMBOL);
63
+ if (v == sym_no || v == sym_false || v == Qfalse) {
64
+ *index = FRT_INDEX_NO;
65
+ } else if (v == sym_yes || v == sym_true || v == Qtrue) {
66
+ *index = FRT_INDEX_YES;
67
+ } else if (v == sym_untokenized) {
68
+ *index = FRT_INDEX_UNTOKENIZED;
69
+ } else if (v == sym_omit_norms) {
70
+ *index = FRT_INDEX_YES_OMIT_NORMS;
71
+ } else if (v == sym_untokenized_omit_norms) {
72
+ *index = FRT_INDEX_UNTOKENIZED_OMIT_NORMS;
73
+ } else if (v == Qnil) {
74
+ /* leave as default */
75
+ } else {
76
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :index. Please choose from [:no, :yes, :untokenized, "
77
+ ":omit_norms, :untokenized_omit_norms]", rb_id2name(SYM2ID(v)));
78
+ }
79
+
80
+ v = rb_hash_aref(roptions, sym_term_vector);
81
+ if (Qnil != v) Check_Type(v, T_SYMBOL);
82
+ if (v == sym_no || v == sym_false || v == Qfalse) {
83
+ *term_vector = FRT_TERM_VECTOR_NO;
84
+ } else if (v == sym_yes || v == sym_true || v == Qtrue) {
85
+ *term_vector = FRT_TERM_VECTOR_YES;
86
+ } else if (v == sym_with_positions) {
87
+ *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS;
88
+ } else if (v == sym_with_offsets) {
89
+ *term_vector = FRT_TERM_VECTOR_WITH_OFFSETS;
90
+ } else if (v == sym_with_positions_offsets) {
91
+ *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
92
+ } else if (v == Qnil) {
93
+ /* leave as default */
94
+ if (*index == FRT_INDEX_NO) *term_vector = FRT_TERM_VECTOR_NO;
95
+ } else {
96
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :term_vector. Please choose from [:no, :yes, "
97
+ ":with_positions, :with_offsets, :with_positions_offsets]", rb_id2name(SYM2ID(v)));
98
+ }
99
+ }
100
+
101
+ static void frb_fi_free(void *p) {
102
+ frt_fi_deref((FrtFieldInfo *)p);
103
+ }
104
+
105
+ static size_t frb_fi_size(const void *p) {
106
+ return sizeof(FrtFieldInfo);
107
+ (void)p;
108
+ }
109
+
110
+ const rb_data_type_t frb_field_info_t = {
111
+ .wrap_struct_name = "FrbFieldInfo",
112
+ .function = {
113
+ .dmark = NULL,
114
+ .dfree = frb_fi_free,
115
+ .dsize = frb_fi_size,
116
+ .dcompact = NULL,
117
+ .reserved = {0},
118
+ },
119
+ .parent = NULL,
120
+ .data = NULL,
121
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
122
+ };
123
+
124
+ VALUE frb_get_field_info(FrtFieldInfo *fi) {
125
+ if (fi) {
126
+ if (fi->rfi == 0 || fi->rfi == Qnil) {
127
+ fi->rfi = TypedData_Wrap_Struct(cFieldInfo, &frb_field_info_t, fi);
128
+ FRT_REF(fi);
129
+ }
130
+ return fi->rfi;
131
+ }
132
+ return Qnil;
133
+ }
134
+
135
+ /*
136
+ * call-seq:
137
+ * FieldInfo.new(name, options = {}) -> field_info
138
+ *
139
+ * Create a new FieldInfo object with the name +name+ and the properties
140
+ * specified in +options+. The available options are [:store, :compression,
141
+ * :index, :term_vector, :boost]. See the description of FieldInfo for more
142
+ * information on these properties.
143
+ */
144
+ static VALUE frb_fi_alloc(VALUE rclass) {
145
+ FrtFieldInfo *fi = frt_fi_alloc();
146
+ return TypedData_Wrap_Struct(rclass, &frb_field_info_t, fi);
147
+ }
148
+
149
+ static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
150
+ VALUE roptions, rname;
151
+ FrtFieldInfo *fi;
152
+ TypedData_Get_Struct(self, FrtFieldInfo, &frb_field_info_t, fi);
153
+ FrtStoreValue store = FRT_STORE_YES;
154
+ FrtCompressionType compression = FRT_COMPRESSION_NONE;
155
+ FrtIndexValue index = FRT_INDEX_YES;
156
+ FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
157
+ float boost = 1.0f;
158
+
159
+ rb_scan_args(argc, argv, "11", &rname, &roptions);
160
+ if (argc > 1) {
161
+ frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
162
+ }
163
+ fi = frt_fi_init(fi, frb_field(rname), store, compression, index, term_vector);
164
+ fi->boost = boost;
165
+ fi->rfi = self;
166
+ return self;
167
+ }
168
+
169
+ /*
170
+ * call-seq:
171
+ * fi.name -> symbol
172
+ *
173
+ * Return the name of the field
174
+ */
175
+ static VALUE frb_fi_name(VALUE self) {
176
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
177
+ return ID2SYM(fi->name);
178
+ }
179
+
180
+ /*
181
+ * call-seq:
182
+ * fi.stored? -> bool
183
+ *
184
+ * Return true if the field is stored in the index.
185
+ */
186
+ static VALUE frb_fi_is_stored(VALUE self) {
187
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
188
+ return fi_is_stored(fi) ? Qtrue : Qfalse;
189
+ }
190
+
191
+ /*
192
+ * call-seq:
193
+ * fi.compressed? -> bool
194
+ *
195
+ * Return true if the field is stored in the index in compressed format.
196
+ */
197
+ static VALUE frb_fi_is_compressed(VALUE self) {
198
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
199
+ return fi_is_compressed(fi) ? Qtrue : Qfalse;
200
+ }
201
+
202
+ /*
203
+ * call-seq:
204
+ * fi.indexed? -> bool
205
+ *
206
+ * Return true if the field is indexed, ie searchable in the index.
207
+ */
208
+ static VALUE frb_fi_is_indexed(VALUE self) {
209
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
210
+ return fi_is_indexed(fi) ? Qtrue : Qfalse;
211
+ }
212
+
213
+ /*
214
+ * call-seq:
215
+ * fi.tokenized? -> bool
216
+ *
217
+ * Return true if the field is tokenized. Tokenizing is the process of
218
+ * breaking the field up into tokens. That is "the quick brown fox" becomes:
219
+ *
220
+ * ["the", "quick", "brown", "fox"]
221
+ *
222
+ * A field can only be tokenized if it is indexed.
223
+ */
224
+ static VALUE frb_fi_is_tokenized(VALUE self) {
225
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
226
+ return fi_is_tokenized(fi) ? Qtrue : Qfalse;
227
+ }
228
+
229
+ /*
230
+ * call-seq:
231
+ * fi.omit_norms? -> bool
232
+ *
233
+ * Return true if the field omits the norm file. The norm file is the file
234
+ * used to store the field boosts for an indexed field. If you do not boost
235
+ * any fields, and you can live without scoring based on field length then
236
+ * you can omit the norms file. This will give the index a slight performance
237
+ * boost and it will use less memory, especially for indexes which have a
238
+ * large number of documents.
239
+ */
240
+ static VALUE frb_fi_omit_norms(VALUE self) {
241
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
242
+ return fi_omit_norms(fi) ? Qtrue : Qfalse;
243
+ }
244
+
245
+ /*
246
+ * call-seq:
247
+ * fi.store_term_vector? -> bool
248
+ *
249
+ * Return true if the term-vectors are stored for this field.
250
+ */
251
+ static VALUE frb_fi_store_term_vector(VALUE self) {
252
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
253
+ return fi_store_term_vector(fi) ? Qtrue : Qfalse;
254
+ }
255
+
256
+ /*
257
+ * call-seq:
258
+ * fi.store_positions? -> bool
259
+ *
260
+ * Return true if positions are stored with the term-vectors for this field.
261
+ */
262
+ static VALUE frb_fi_store_positions(VALUE self) {
263
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
264
+ return fi_store_positions(fi) ? Qtrue : Qfalse;
265
+ }
266
+
267
+ /*
268
+ * call-seq:
269
+ * fi.store_offsets? -> bool
270
+ *
271
+ * Return true if offsets are stored with the term-vectors for this field.
272
+ */
273
+ static VALUE frb_fi_store_offsets(VALUE self) {
274
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
275
+ return fi_store_offsets(fi) ? Qtrue : Qfalse;
276
+ }
277
+
278
+ /*
279
+ * call-seq:
280
+ * fi.has_norms? -> bool
281
+ *
282
+ * Return true if this field has a norms file. This is the same as calling;
283
+ *
284
+ * fi.indexed? and not fi.omit_norms?
285
+ */
286
+ static VALUE frb_fi_has_norms(VALUE self) {
287
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
288
+ return fi_has_norms(fi) ? Qtrue : Qfalse;
289
+ }
290
+
291
+ /*
292
+ * call-seq:
293
+ * fi.boost -> boost
294
+ *
295
+ * Return the default boost for this field
296
+ */
297
+ static VALUE frb_fi_boost(VALUE self) {
298
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
299
+ return rb_float_new((double)fi->boost);
300
+ }
301
+
302
+ /*
303
+ * call-seq:
304
+ * fi.to_s -> string
305
+ *
306
+ * Return a string representation of the FieldInfo object.
307
+ */
308
+ static VALUE frb_fi_to_s(VALUE self) {
309
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
310
+ char *fi_s = frt_fi_to_s(fi);
311
+ VALUE rfi_s = rb_str_new2(fi_s);
312
+ free(fi_s);
313
+ return rfi_s;
314
+ }
315
+
316
+ /*
317
+ * call-seq:
318
+ * fi.to_h -> Hssh
319
+ *
320
+ * Return a Hash representation of the FieldInfo object.
321
+ */
322
+ static VALUE frb_fi_to_h(VALUE self) {
323
+ FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
324
+ VALUE hash = rb_hash_new();
325
+ VALUE val;
326
+ bool o;
327
+
328
+ // :index
329
+ if (!fi_is_indexed(fi)) val = sym_no;
330
+ else {
331
+ bool t = fi_is_tokenized(fi);
332
+ o = fi_omit_norms(fi);
333
+ if (!t && o) val = sym_untokenized_omit_norms;
334
+ else if (t && o) val = sym_omit_norms;
335
+ else if (!t && !o) val = sym_untokenized;
336
+ else val = sym_yes;
337
+ }
338
+ rb_hash_aset(hash, sym_index, val);
339
+
340
+ // :store
341
+ rb_hash_aset(hash, sym_store, fi_is_stored(fi) ? sym_yes : sym_no);
342
+
343
+ // :compress
344
+ if (!fi_is_compressed(fi)) val = sym_no;
345
+ else {
346
+ if (fi_is_compressed_brotli(fi)) val = sym_brotli;
347
+ else if (fi_is_compressed_bz2(fi)) val = sym_bz2;
348
+ else if (fi_is_compressed_lz4(fi)) val = sym_lz4;
349
+ else val = sym_yes;
350
+ }
351
+ rb_hash_aset(hash, sym_compression, val);
352
+
353
+ // :term_vector
354
+ if (!fi_store_term_vector(fi)) val = sym_no;
355
+ else {
356
+ bool p = fi_store_positions(fi);
357
+ o = fi_store_offsets(fi);
358
+ if (p && o) val = sym_with_positions_offsets;
359
+ else if (o) val = sym_with_offsets;
360
+ else if (p) val = sym_with_positions;
361
+ else val = sym_yes;
362
+ }
363
+ rb_hash_aset(hash, sym_term_vector, val);
364
+
365
+ // :boost
366
+ rb_hash_aset(hash, sym_boost, rb_float_new((double)fi->boost));
367
+
368
+ return hash;
369
+ }
370
+
371
+ /*
372
+ * Document-class: Ferret::Index::FieldInfo
373
+ *
374
+ * == Summary
375
+ *
376
+ * The FieldInfo class is the field descriptor for the index. It specifies
377
+ * whether a field is compressed or not or whether it should be indexed and
378
+ * tokenized. Every field has a name which must be a symbol. There are three
379
+ * properties that you can set, +:store+, +:index+ and +:term_vector+. You
380
+ * can also set the default +:boost+ for a field as well.
381
+ *
382
+ * == Properties
383
+ *
384
+ * === :store
385
+ *
386
+ * The +:store+ property allows you to specify how a field is stored. You can
387
+ * leave a field unstored (+:no+), store it in it's original format (+:yes+)
388
+ * or store it in compressed format (+:compressed+). By default the document
389
+ * is stored in its original format. If the field is large and it is stored
390
+ * elsewhere where it is easily accessible you might want to leave it
391
+ * unstored. This will keep the index size a lot smaller and make the
392
+ * indexing process a lot faster. For example, you should probably leave the
393
+ * +:content+ field unstored when indexing all the documents in your
394
+ * file-system.
395
+ *
396
+ * === :index
397
+ *
398
+ * The +:index+ property allows you to specify how a field is indexed. A
399
+ * field must be indexed to be searchable. However, a field doesn't need to
400
+ * be indexed to be store in the Ferret index. You may want to use the index
401
+ * as a simple database and store things like images or MP3s in the index. By
402
+ * default each field is indexed and tokenized (split into tokens) (+:yes+).
403
+ * If you don't want to index the field use +:no+. If you want the field
404
+ * indexed but not tokenized, use +:untokenized+. Do this for the fields you
405
+ * wish to sort by. There are two other values for +:index+; +:omit_norms+
406
+ * and +:untokenized_omit_norms+. These values correspond to +:yes+ and
407
+ * +:untokenized+ respectively and are useful if you are not boosting any
408
+ * fields and you'd like to speed up the index. The norms file is the file
409
+ * which contains the boost values for each document for a particular field.
410
+ *
411
+ * === :term_vector
412
+ *
413
+ * See TermVector for a description of term-vectors. You can specify whether
414
+ * or not you would like to store term-vectors. The available options are
415
+ * +:no+, +:yes+, +:with_positions+, +:with_offsets+ and
416
+ * +:with_positions_offsets+. Note that you need to store the positions to
417
+ * associate offsets with individual terms in the term_vector.
418
+ *
419
+ * == Property Table
420
+ *
421
+ * Property Value Description
422
+ * ------------------------------------------------------------------------
423
+ * :store | :no | Don't store field
424
+ * | |
425
+ * | :yes (default) | Store field in its original
426
+ * | | format. Use this value if you
427
+ * | | want to highlight matches.
428
+ * | | or print match excerpts a la
429
+ * | | Google search.
430
+ * -------------|-------------------------|------------------------------
431
+ * :compression | :no (default) | Don't compress stored field
432
+ * | |
433
+ * | :brotli | Compress field using Brotli
434
+ * | |
435
+ * | :bz2 | Compress field using BZip2
436
+ * | |
437
+ * | :lz4 | Compress field using LZ4
438
+ * -------------|-------------------------|------------------------------
439
+ * :index | :no | Do not make this field
440
+ * | | searchable.
441
+ * | |
442
+ * | :yes (default) | Make this field searchable and
443
+ * | | tokenize its contents.
444
+ * | |
445
+ * | :untokenized | Make this field searchable but
446
+ * | | do not tokenize its contents.
447
+ * | | use this value for fields you
448
+ * | | wish to sort by.
449
+ * | |
450
+ * | :omit_norms | Same as :yes except omit the
451
+ * | | norms file. The norms file can
452
+ * | | be omitted if you don't boost
453
+ * | | any fields and you don't need
454
+ * | | scoring based on field length.
455
+ * | |
456
+ * | :untokenized_omit_norms | Same as :untokenized except omit
457
+ * | | the norms file. Norms files can
458
+ * | | be omitted if you don't boost
459
+ * | | any fields and you don't need
460
+ * | | scoring based on field length.
461
+ * | |
462
+ * -------------|-------------------------|------------------------------
463
+ * :term_vector | :no | Don't store term-vectors
464
+ * | |
465
+ * | :yes | Store term-vectors without
466
+ * | | storing positions or offsets.
467
+ * | |
468
+ * | :with_positions | Store term-vectors with
469
+ * | | positions.
470
+ * | |
471
+ * | :with_offsets | Store term-vectors with
472
+ * | | offsets.
473
+ * | |
474
+ * | :with_positions_offsets | Store term-vectors with
475
+ * | (default) | positions and offsets.
476
+ * -------------|-------------------------|------------------------------
477
+ * :boost | Float | The boost property is used to
478
+ * | | set the default boost for a
479
+ * | | field. This boost value will
480
+ * | | used for all instances of the
481
+ * | | field in the index unless
482
+ * | | otherwise specified when you
483
+ * | | create the field. All values
484
+ * | | should be positive.
485
+ * | |
486
+ *
487
+ * == Examples
488
+ *
489
+ * fi = FieldInfo.new(:title, :index => :untokenized, :term_vector => :no,
490
+ * :boost => 10.0)
491
+ *
492
+ * fi = FieldInfo.new(:content)
493
+ *
494
+ * fi = FieldInfo.new(:created_on, :index => :untokenized_omit_norms,
495
+ * :term_vector => :no)
496
+ *
497
+ * fi = FieldInfo.new(:image, :store => :yes, :compression => :brotli, :index => :no,
498
+ * :term_vector => :no)
499
+ */
500
+ void Init_FieldInfo(void) {
501
+ sym_store = ID2SYM(rb_intern("store"));
502
+ sym_index = ID2SYM(rb_intern("index"));
503
+ sym_term_vector = ID2SYM(rb_intern("term_vector"));
504
+
505
+ sym_brotli = ID2SYM(rb_intern("brotli"));
506
+ sym_bz2 = ID2SYM(rb_intern("bz2"));
507
+ sym_lz4 = ID2SYM(rb_intern("lz4"));
508
+ // sym_level = ID2SYM(rb_intern("level"));
509
+ sym_compression = ID2SYM(rb_intern("compression"));
510
+
511
+ sym_untokenized = ID2SYM(rb_intern("untokenized"));
512
+ sym_omit_norms = ID2SYM(rb_intern("omit_norms"));
513
+ sym_untokenized_omit_norms = ID2SYM(rb_intern("untokenized_omit_norms"));
514
+
515
+ sym_with_positions = ID2SYM(rb_intern("with_positions"));
516
+ sym_with_offsets = ID2SYM(rb_intern("with_offsets"));
517
+ sym_with_positions_offsets = ID2SYM(rb_intern("with_positions_offsets"));
518
+
519
+ cFieldInfo = rb_define_class_under(mIndex, "FieldInfo", rb_cObject);
520
+ rb_define_alloc_func(cFieldInfo, frb_fi_alloc);
521
+
522
+ rb_define_method(cFieldInfo, "initialize", frb_fi_init, -1);
523
+ rb_define_method(cFieldInfo, "name", frb_fi_name, 0);
524
+ rb_define_method(cFieldInfo, "stored?", frb_fi_is_stored, 0);
525
+ rb_define_method(cFieldInfo, "compressed?", frb_fi_is_compressed, 0);
526
+ rb_define_method(cFieldInfo, "indexed?", frb_fi_is_indexed, 0);
527
+ rb_define_method(cFieldInfo, "tokenized?", frb_fi_is_tokenized, 0);
528
+ rb_define_method(cFieldInfo, "omit_norms?", frb_fi_omit_norms, 0);
529
+ rb_define_method(cFieldInfo, "store_term_vector?",
530
+ frb_fi_store_term_vector, 0);
531
+ rb_define_method(cFieldInfo, "store_positions?",
532
+ frb_fi_store_positions, 0);
533
+ rb_define_method(cFieldInfo, "store_offsets?",
534
+ frb_fi_store_offsets, 0);
535
+ rb_define_method(cFieldInfo, "has_norms?", frb_fi_has_norms, 0);
536
+ rb_define_method(cFieldInfo, "boost", frb_fi_boost, 0);
537
+ rb_define_method(cFieldInfo, "to_s", frb_fi_to_s, 0);
538
+ rb_define_method(cFieldInfo, "to_h", frb_fi_to_h, 0);
539
+ }
@@ -1,11 +1,8 @@
1
1
  #include "frt_index.h"
2
2
  #include "isomorfeus_ferret.h"
3
3
 
4
- // #undef close
5
-
6
4
  VALUE mIndex;
7
5
 
8
- VALUE cFieldInfo;
9
6
  VALUE cFieldInfos;
10
7
 
11
8
  VALUE cTVOffsets;
@@ -19,10 +16,11 @@ VALUE cIndexWriter;
19
16
  VALUE cIndexReader;
20
17
 
21
18
  VALUE sym_analyzer;
19
+ VALUE sym_boost;
20
+
22
21
  static VALUE sym_close_dir;
23
22
  static VALUE sym_create;
24
23
  static VALUE sym_create_if_missing;
25
-
26
24
  static VALUE sym_chunk_size;
27
25
  static VALUE sym_max_buffer_memory;
28
26
  static VALUE sym_index_interval;
@@ -32,29 +30,9 @@ static VALUE sym_max_buffered_docs;
32
30
  static VALUE sym_max_merge_docs;
33
31
  static VALUE sym_max_field_length;
34
32
  static VALUE sym_use_compound_file;
35
-
36
- static VALUE sym_boost;
37
33
  static VALUE sym_field_infos;
38
34
 
39
- static VALUE sym_store;
40
- static VALUE sym_index;
41
- static VALUE sym_term_vector;
42
-
43
- static VALUE sym_brotli;
44
- static VALUE sym_bz2;
45
- static VALUE sym_lz4;
46
- static VALUE sym_compression;
47
-
48
- static VALUE sym_untokenized;
49
- static VALUE sym_omit_norms;
50
- static VALUE sym_untokenized_omit_norms;
51
-
52
- static VALUE sym_with_positions;
53
- static VALUE sym_with_offsets;
54
- static VALUE sym_with_positions_offsets;
55
-
56
35
  static ID fsym_content;
57
-
58
36
  static ID id_term;
59
37
  static ID id_fld_num_map;
60
38
  static ID id_field_num;
@@ -62,310 +40,15 @@ static ID id_boost;
62
40
 
63
41
  extern VALUE sym_each;
64
42
  extern rb_encoding *utf8_encoding;
65
- extern void frb_set_term(VALUE rterm, FrtTerm *t);
43
+ extern void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost);
66
44
  extern FrtAnalyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
67
45
  extern VALUE frb_get_analyzer(FrtAnalyzer *a);
46
+ extern VALUE frb_get_field_info(FrtFieldInfo *fi);
68
47
  extern VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc);
69
- extern void Init_LazyDoc(void);
70
-
71
- /****************************************************************************
72
- *
73
- * FieldInfo Methods
74
- *
75
- ****************************************************************************/
76
-
77
- static void frb_fi_free(void *p) {
78
- frt_fi_deref((FrtFieldInfo *)p);
79
- }
80
-
81
- static void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost) {
82
- VALUE v;
83
- Check_Type(roptions, T_HASH);
84
- v = rb_hash_aref(roptions, sym_boost);
85
- if (Qnil != v) {
86
- *boost = (float)NUM2DBL(v);
87
- } else {
88
- *boost = 1.0f;
89
- }
90
- v = rb_hash_aref(roptions, sym_store);
91
- if (Qnil != v) Check_Type(v, T_SYMBOL);
92
- if (v == sym_no || v == sym_false || v == Qfalse) {
93
- *store = FRT_STORE_NO;
94
- } else if (v == sym_yes || v == sym_true || v == Qtrue) {
95
- *store = FRT_STORE_YES;
96
- } else if (v == Qnil) {
97
- /* leave as default */
98
- } else {
99
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]",
100
- rb_id2name(SYM2ID(v)));
101
- }
102
-
103
- v = rb_hash_aref(roptions, sym_compression);
104
- if (Qnil != v) Check_Type(v, T_SYMBOL);
105
- if (v == sym_no || v == sym_false || v == Qfalse) {
106
- *compression = FRT_COMPRESSION_NONE;
107
- } else if (v == sym_yes || v == sym_true || v == Qtrue || v == sym_brotli) {
108
- *compression = FRT_COMPRESSION_BROTLI;
109
- } else if (v == sym_bz2) {
110
- *compression = FRT_COMPRESSION_BZ2;
111
- } else if (v == sym_lz4) {
112
- *compression = FRT_COMPRESSION_LZ4;
113
- } else if (v == Qnil) {
114
- /* leave as default */
115
- } else {
116
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]",
117
- rb_id2name(SYM2ID(v)));
118
- }
119
-
120
- v = rb_hash_aref(roptions, sym_index);
121
- if (Qnil != v) Check_Type(v, T_SYMBOL);
122
- if (v == sym_no || v == sym_false || v == Qfalse) {
123
- *index = FRT_INDEX_NO;
124
- } else if (v == sym_yes || v == sym_true || v == Qtrue) {
125
- *index = FRT_INDEX_YES;
126
- } else if (v == sym_untokenized) {
127
- *index = FRT_INDEX_UNTOKENIZED;
128
- } else if (v == sym_omit_norms) {
129
- *index = FRT_INDEX_YES_OMIT_NORMS;
130
- } else if (v == sym_untokenized_omit_norms) {
131
- *index = FRT_INDEX_UNTOKENIZED_OMIT_NORMS;
132
- } else if (v == Qnil) {
133
- /* leave as default */
134
- } else {
135
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :index. Please choose from [:no, :yes, :untokenized, "
136
- ":omit_norms, :untokenized_omit_norms]", rb_id2name(SYM2ID(v)));
137
- }
138
-
139
- v = rb_hash_aref(roptions, sym_term_vector);
140
- if (Qnil != v) Check_Type(v, T_SYMBOL);
141
- if (v == sym_no || v == sym_false || v == Qfalse) {
142
- *term_vector = FRT_TERM_VECTOR_NO;
143
- } else if (v == sym_yes || v == sym_true || v == Qtrue) {
144
- *term_vector = FRT_TERM_VECTOR_YES;
145
- } else if (v == sym_with_positions) {
146
- *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS;
147
- } else if (v == sym_with_offsets) {
148
- *term_vector = FRT_TERM_VECTOR_WITH_OFFSETS;
149
- } else if (v == sym_with_positions_offsets) {
150
- *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
151
- } else if (v == Qnil) {
152
- /* leave as default */
153
- } else {
154
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :term_vector. Please choose from [:no, :yes, "
155
- ":with_positions, :with_offsets, :with_positions_offsets]", rb_id2name(SYM2ID(v)));
156
- }
157
- }
158
-
159
- static size_t frb_fi_size(const void *p) {
160
- return sizeof(FrtFieldInfo);
161
- (void)p;
162
- }
163
-
164
- const rb_data_type_t frb_field_info_t = {
165
- .wrap_struct_name = "FrbFieldInfo",
166
- .function = {
167
- .dmark = NULL,
168
- .dfree = frb_fi_free,
169
- .dsize = frb_fi_size,
170
- .dcompact = NULL,
171
- .reserved = {0},
172
- },
173
- .parent = NULL,
174
- .data = NULL,
175
- .flags = RUBY_TYPED_FREE_IMMEDIATELY
176
- };
177
-
178
- static VALUE frb_get_field_info(FrtFieldInfo *fi) {
179
- if (fi) {
180
- if (fi->rfi == 0 || fi->rfi == Qnil) {
181
- fi->rfi = TypedData_Wrap_Struct(cFieldInfo, &frb_field_info_t, fi);
182
- FRT_REF(fi);
183
- }
184
- return fi->rfi;
185
- }
186
- return Qnil;
187
- }
188
-
189
- /*
190
- * call-seq:
191
- * FieldInfo.new(name, options = {}) -> field_info
192
- *
193
- * Create a new FieldInfo object with the name +name+ and the properties
194
- * specified in +options+. The available options are [:store, :compression,
195
- * :index, :term_vector, :boost]. See the description of FieldInfo for more
196
- * information on these properties.
197
- */
198
- static VALUE frb_fi_alloc(VALUE rclass) {
199
- FrtFieldInfo *fi = frt_fi_alloc();
200
- return TypedData_Wrap_Struct(rclass, &frb_field_info_t, fi);
201
- }
202
-
203
- static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
204
- VALUE roptions, rname;
205
- FrtFieldInfo *fi;
206
- TypedData_Get_Struct(self, FrtFieldInfo, &frb_field_info_t, fi);
207
- FrtStoreValue store = FRT_STORE_YES;
208
- FrtCompressionType compression = FRT_COMPRESSION_NONE;
209
- FrtIndexValue index = FRT_INDEX_YES;
210
- FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
211
- float boost = 1.0f;
212
-
213
- rb_scan_args(argc, argv, "11", &rname, &roptions);
214
- if (argc > 1) {
215
- frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
216
- }
217
- fi = frt_fi_init(fi, frb_field(rname), store, compression, index, term_vector);
218
- fi->boost = boost;
219
- fi->rfi = self;
220
- return self;
221
- }
222
-
223
- /*
224
- * call-seq:
225
- * fi.name -> symbol
226
- *
227
- * Return the name of the field
228
- */
229
- static VALUE frb_fi_name(VALUE self) {
230
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
231
- return ID2SYM(fi->name);
232
- }
233
-
234
- /*
235
- * call-seq:
236
- * fi.stored? -> bool
237
- *
238
- * Return true if the field is stored in the index.
239
- */
240
- static VALUE frb_fi_is_stored(VALUE self) {
241
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
242
- return fi_is_stored(fi) ? Qtrue : Qfalse;
243
- }
244
-
245
- /*
246
- * call-seq:
247
- * fi.compressed? -> bool
248
- *
249
- * Return true if the field is stored in the index in compressed format.
250
- */
251
- static VALUE frb_fi_is_compressed(VALUE self) {
252
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
253
- return fi_is_compressed(fi) ? Qtrue : Qfalse;
254
- }
255
-
256
- /*
257
- * call-seq:
258
- * fi.indexed? -> bool
259
- *
260
- * Return true if the field is indexed, ie searchable in the index.
261
- */
262
- static VALUE frb_fi_is_indexed(VALUE self) {
263
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
264
- return fi_is_indexed(fi) ? Qtrue : Qfalse;
265
- }
266
-
267
- /*
268
- * call-seq:
269
- * fi.tokenized? -> bool
270
- *
271
- * Return true if the field is tokenized. Tokenizing is the process of
272
- * breaking the field up into tokens. That is "the quick brown fox" becomes:
273
- *
274
- * ["the", "quick", "brown", "fox"]
275
- *
276
- * A field can only be tokenized if it is indexed.
277
- */
278
- static VALUE frb_fi_is_tokenized(VALUE self) {
279
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
280
- return fi_is_tokenized(fi) ? Qtrue : Qfalse;
281
- }
282
-
283
- /*
284
- * call-seq:
285
- * fi.omit_norms? -> bool
286
- *
287
- * Return true if the field omits the norm file. The norm file is the file
288
- * used to store the field boosts for an indexed field. If you do not boost
289
- * any fields, and you can live without scoring based on field length then
290
- * you can omit the norms file. This will give the index a slight performance
291
- * boost and it will use less memory, especially for indexes which have a
292
- * large number of documents.
293
- */
294
- static VALUE frb_fi_omit_norms(VALUE self) {
295
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
296
- return fi_omit_norms(fi) ? Qtrue : Qfalse;
297
- }
298
-
299
- /*
300
- * call-seq:
301
- * fi.store_term_vector? -> bool
302
- *
303
- * Return true if the term-vectors are stored for this field.
304
- */
305
- static VALUE frb_fi_store_term_vector(VALUE self) {
306
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
307
- return fi_store_term_vector(fi) ? Qtrue : Qfalse;
308
- }
309
-
310
- /*
311
- * call-seq:
312
- * fi.store_positions? -> bool
313
- *
314
- * Return true if positions are stored with the term-vectors for this field.
315
- */
316
- static VALUE frb_fi_store_positions(VALUE self) {
317
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
318
- return fi_store_positions(fi) ? Qtrue : Qfalse;
319
- }
320
-
321
- /*
322
- * call-seq:
323
- * fi.store_offsets? -> bool
324
- *
325
- * Return true if offsets are stored with the term-vectors for this field.
326
- */
327
- static VALUE frb_fi_store_offsets(VALUE self) {
328
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
329
- return fi_store_offsets(fi) ? Qtrue : Qfalse;
330
- }
331
-
332
- /*
333
- * call-seq:
334
- * fi.has_norms? -> bool
335
- *
336
- * Return true if this field has a norms file. This is the same as calling;
337
- *
338
- * fi.indexed? and not fi.omit_norms?
339
- */
340
- static VALUE frb_fi_has_norms(VALUE self) {
341
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
342
- return fi_has_norms(fi) ? Qtrue : Qfalse;
343
- }
344
-
345
- /*
346
- * call-seq:
347
- * fi.boost -> boost
348
- *
349
- * Return the default boost for this field
350
- */
351
- static VALUE frb_fi_boost(VALUE self) {
352
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
353
- return rb_float_new((double)fi->boost);
354
- }
48
+ extern void frb_set_term(VALUE rterm, FrtTerm *t);
355
49
 
356
- /*
357
- * call-seq:
358
- * fi.to_s -> string
359
- *
360
- * Return a string representation of the FieldInfo object.
361
- */
362
- static VALUE frb_fi_to_s(VALUE self) {
363
- FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
364
- char *fi_s = frt_fi_to_s(fi);
365
- VALUE rfi_s = rb_str_new2(fi_s);
366
- free(fi_s);
367
- return rfi_s;
368
- }
50
+ extern void Init_FieldInfo(void);
51
+ extern void Init_LazyDoc(void);
369
52
 
370
53
  /****************************************************************************
371
54
  *
@@ -2656,178 +2339,6 @@ static VALUE frb_ir_each(VALUE self) {
2656
2339
  *
2657
2340
  ****************************************************************************/
2658
2341
 
2659
-
2660
- /*
2661
- * Document-class: Ferret::Index::FieldInfo
2662
- *
2663
- * == Summary
2664
- *
2665
- * The FieldInfo class is the field descriptor for the index. It specifies
2666
- * whether a field is compressed or not or whether it should be indexed and
2667
- * tokenized. Every field has a name which must be a symbol. There are three
2668
- * properties that you can set, +:store+, +:index+ and +:term_vector+. You
2669
- * can also set the default +:boost+ for a field as well.
2670
- *
2671
- * == Properties
2672
- *
2673
- * === :store
2674
- *
2675
- * The +:store+ property allows you to specify how a field is stored. You can
2676
- * leave a field unstored (+:no+), store it in it's original format (+:yes+)
2677
- * or store it in compressed format (+:compressed+). By default the document
2678
- * is stored in its original format. If the field is large and it is stored
2679
- * elsewhere where it is easily accessible you might want to leave it
2680
- * unstored. This will keep the index size a lot smaller and make the
2681
- * indexing process a lot faster. For example, you should probably leave the
2682
- * +:content+ field unstored when indexing all the documents in your
2683
- * file-system.
2684
- *
2685
- * === :index
2686
- *
2687
- * The +:index+ property allows you to specify how a field is indexed. A
2688
- * field must be indexed to be searchable. However, a field doesn't need to
2689
- * be indexed to be store in the Ferret index. You may want to use the index
2690
- * as a simple database and store things like images or MP3s in the index. By
2691
- * default each field is indexed and tokenized (split into tokens) (+:yes+).
2692
- * If you don't want to index the field use +:no+. If you want the field
2693
- * indexed but not tokenized, use +:untokenized+. Do this for the fields you
2694
- * wish to sort by. There are two other values for +:index+; +:omit_norms+
2695
- * and +:untokenized_omit_norms+. These values correspond to +:yes+ and
2696
- * +:untokenized+ respectively and are useful if you are not boosting any
2697
- * fields and you'd like to speed up the index. The norms file is the file
2698
- * which contains the boost values for each document for a particular field.
2699
- *
2700
- * === :term_vector
2701
- *
2702
- * See TermVector for a description of term-vectors. You can specify whether
2703
- * or not you would like to store term-vectors. The available options are
2704
- * +:no+, +:yes+, +:with_positions+, +:with_offsets+ and
2705
- * +:with_positions_offsets+. Note that you need to store the positions to
2706
- * associate offsets with individual terms in the term_vector.
2707
- *
2708
- * == Property Table
2709
- *
2710
- * Property Value Description
2711
- * ------------------------------------------------------------------------
2712
- * :store | :no | Don't store field
2713
- * | |
2714
- * | :yes (default) | Store field in its original
2715
- * | | format. Use this value if you
2716
- * | | want to highlight matches.
2717
- * | | or print match excerpts a la
2718
- * | | Google search.
2719
- * -------------|-------------------------|------------------------------
2720
- * :compression | :no (default) | Don't compress stored field
2721
- * | |
2722
- * | :brotli | Compress field using Brotli
2723
- * | |
2724
- * | :bz2 | Compress field using BZip2
2725
- * | |
2726
- * | :lz4 | Compress field using LZ4
2727
- * -------------|-------------------------|------------------------------
2728
- * :index | :no | Do not make this field
2729
- * | | searchable.
2730
- * | |
2731
- * | :yes (default) | Make this field searchable and
2732
- * | | tokenized its contents.
2733
- * | |
2734
- * | :untokenized | Make this field searchable but
2735
- * | | do not tokenize its contents.
2736
- * | | use this value for fields you
2737
- * | | wish to sort by.
2738
- * | |
2739
- * | :omit_norms | Same as :yes except omit the
2740
- * | | norms file. The norms file can
2741
- * | | be omitted if you don't boost
2742
- * | | any fields and you don't need
2743
- * | | scoring based on field length.
2744
- * | |
2745
- * | :untokenized_omit_norms | Same as :untokenized except omit
2746
- * | | the norms file. Norms files can
2747
- * | | be omitted if you don't boost
2748
- * | | any fields and you don't need
2749
- * | | scoring based on field length.
2750
- * | |
2751
- * -------------|-------------------------|------------------------------
2752
- * :term_vector | :no | Don't store term-vectors
2753
- * | |
2754
- * | :yes | Store term-vectors without
2755
- * | | storing positions or offsets.
2756
- * | |
2757
- * | :with_positions | Store term-vectors with
2758
- * | | positions.
2759
- * | |
2760
- * | :with_offsets | Store term-vectors with
2761
- * | | offsets.
2762
- * | |
2763
- * | :with_positions_offsets | Store term-vectors with
2764
- * | (default) | positions and offsets.
2765
- * -------------|-------------------------|------------------------------
2766
- * :boost | Float | The boost property is used to
2767
- * | | set the default boost for a
2768
- * | | field. This boost value will
2769
- * | | used for all instances of the
2770
- * | | field in the index unless
2771
- * | | otherwise specified when you
2772
- * | | create the field. All values
2773
- * | | should be positive.
2774
- * | |
2775
- *
2776
- * == Examples
2777
- *
2778
- * fi = FieldInfo.new(:title, :index => :untokenized, :term_vector => :no,
2779
- * :boost => 10.0)
2780
- *
2781
- * fi = FieldInfo.new(:content)
2782
- *
2783
- * fi = FieldInfo.new(:created_on, :index => :untokenized_omit_norms,
2784
- * :term_vector => :no)
2785
- *
2786
- * fi = FieldInfo.new(:image, :store => :yes, :compression => :brotli, :index => :no,
2787
- * :term_vector => :no)
2788
- */
2789
- static void
2790
- Init_FieldInfo(void)
2791
- {
2792
- sym_store = ID2SYM(rb_intern("store"));
2793
- sym_index = ID2SYM(rb_intern("index"));
2794
- sym_term_vector = ID2SYM(rb_intern("term_vector"));
2795
-
2796
- sym_brotli = ID2SYM(rb_intern("brotli"));
2797
- sym_bz2 = ID2SYM(rb_intern("bz2"));
2798
- sym_lz4 = ID2SYM(rb_intern("lz4"));
2799
- // sym_level = ID2SYM(rb_intern("level"));
2800
- sym_compression = ID2SYM(rb_intern("compression"));
2801
-
2802
- sym_untokenized = ID2SYM(rb_intern("untokenized"));
2803
- sym_omit_norms = ID2SYM(rb_intern("omit_norms"));
2804
- sym_untokenized_omit_norms = ID2SYM(rb_intern("untokenized_omit_norms"));
2805
-
2806
- sym_with_positions = ID2SYM(rb_intern("with_positions"));
2807
- sym_with_offsets = ID2SYM(rb_intern("with_offsets"));
2808
- sym_with_positions_offsets = ID2SYM(rb_intern("with_positions_offsets"));
2809
-
2810
- cFieldInfo = rb_define_class_under(mIndex, "FieldInfo", rb_cObject);
2811
- rb_define_alloc_func(cFieldInfo, frb_fi_alloc);
2812
-
2813
- rb_define_method(cFieldInfo, "initialize", frb_fi_init, -1);
2814
- rb_define_method(cFieldInfo, "name", frb_fi_name, 0);
2815
- rb_define_method(cFieldInfo, "stored?", frb_fi_is_stored, 0);
2816
- rb_define_method(cFieldInfo, "compressed?", frb_fi_is_compressed, 0);
2817
- rb_define_method(cFieldInfo, "indexed?", frb_fi_is_indexed, 0);
2818
- rb_define_method(cFieldInfo, "tokenized?", frb_fi_is_tokenized, 0);
2819
- rb_define_method(cFieldInfo, "omit_norms?", frb_fi_omit_norms, 0);
2820
- rb_define_method(cFieldInfo, "store_term_vector?",
2821
- frb_fi_store_term_vector, 0);
2822
- rb_define_method(cFieldInfo, "store_positions?",
2823
- frb_fi_store_positions, 0);
2824
- rb_define_method(cFieldInfo, "store_offsets?",
2825
- frb_fi_store_offsets, 0);
2826
- rb_define_method(cFieldInfo, "has_norms?", frb_fi_has_norms, 0);
2827
- rb_define_method(cFieldInfo, "boost", frb_fi_boost, 0);
2828
- rb_define_method(cFieldInfo, "to_s", frb_fi_to_s, 0);
2829
- }
2830
-
2831
2342
  /*
2832
2343
  * Document-class: Ferret::Index::FieldInfos
2833
2344
  *
@@ -2,7 +2,7 @@
2
2
  #include "isomorfeus_ferret.h"
3
3
 
4
4
  static VALUE cQueryParser;
5
- VALUE cQueryParseException;
5
+ static VALUE cQueryParseException;
6
6
 
7
7
  extern VALUE sym_analyzer;
8
8
  static VALUE sym_wild_card_downcase;
@@ -3789,10 +3789,6 @@ VALUE frb_get_q(FrtQuery *q) {
3789
3789
  *
3790
3790
  ****************************************************************************/
3791
3791
 
3792
- /* rdochack
3793
- cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
3794
- */
3795
-
3796
3792
  /*
3797
3793
  * Document-class: Ferret::Search::Hit
3798
3794
  *
@@ -3806,9 +3802,6 @@ cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
3806
3802
  */
3807
3803
  static void Init_Hit(void) {
3808
3804
  const char *hit_class = "Hit";
3809
- /* rdochack
3810
- cHit = rb_define_class_under(mSearch, "Hit", rb_cObject);
3811
- */
3812
3805
  cHit = rb_struct_define(hit_class, "doc", "score", NULL);
3813
3806
  rb_set_class_path(cHit, mSearch, hit_class);
3814
3807
  rb_const_set(mSearch, rb_intern(hit_class), cHit);
@@ -3836,9 +3829,6 @@ static void Init_Hit(void) {
3836
3829
  */
3837
3830
  static void Init_TopDocs(void) {
3838
3831
  const char *td_class = "TopDocs";
3839
- /* rdochack
3840
- cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
3841
- */
3842
3832
  cTopDocs = rb_struct_define(td_class,
3843
3833
  "total_hits",
3844
3834
  "hits",
@@ -41,12 +41,12 @@ extern VALUE cLockError;
41
41
  extern VALUE cTerm;
42
42
 
43
43
  /* Ferret Inits */
44
- extern void Init_Utils();
45
- extern void Init_Analysis();
46
- extern void Init_Store();
47
- extern void Init_Index();
48
- extern void Init_Search();
49
- extern void Init_QueryParser();
44
+ extern void Init_Utils(void);
45
+ extern void Init_Analysis(void);
46
+ extern void Init_Store(void);
47
+ extern void Init_Index(void);
48
+ extern void Init_Search(void);
49
+ extern void Init_QueryParser(void);
50
50
 
51
51
  extern void frb_raise(int excode, const char *msg);
52
52
  extern void frb_create_dir(VALUE rpath);
@@ -384,14 +384,11 @@ module Isomorfeus
384
384
  # puts "hit document number #{doc} with a score of #{score}"
385
385
  # end
386
386
  #
387
- def search_each(query, options = {}) # :yield: doc, score
387
+ def search_each(query, options = {}, &block) # :yield: doc, score
388
388
  @dir.synchronize do
389
389
  ensure_searcher_open()
390
390
  query = do_process_query(query)
391
-
392
- @searcher.search_each(query, options) do |doc, score|
393
- yield doc, score
394
- end
391
+ @searcher.search_each(query, options, &block)
395
392
  end
396
393
  end
397
394
 
@@ -1,5 +1,5 @@
1
1
  module Isomorfeus
2
2
  module Ferret
3
- VERSION = '0.14.0'
3
+ VERSION = '0.14.1'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isomorfeus-ferret
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.14.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Biedermann
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-28 00:00:00.000000000 Z
11
+ date: 2022-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: oj
@@ -180,6 +180,7 @@ files:
180
180
  - ext/isomorfeus_ferret_ext/extconf.rb
181
181
  - ext/isomorfeus_ferret_ext/fio_tmpfile.h
182
182
  - ext/isomorfeus_ferret_ext/frb_analysis.c
183
+ - ext/isomorfeus_ferret_ext/frb_field_info.c
183
184
  - ext/isomorfeus_ferret_ext/frb_index.c
184
185
  - ext/isomorfeus_ferret_ext/frb_lazy_doc.c
185
186
  - ext/isomorfeus_ferret_ext/frb_qparser.c