isomorfeus-ferret 0.13.10 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +40 -19
- data/ext/isomorfeus_ferret_ext/bzlib.h +83 -82
- data/ext/isomorfeus_ferret_ext/frb_index.c +55 -194
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +705 -0
- data/ext/isomorfeus_ferret_ext/frb_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_config.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_hash.h +6 -8
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +5 -5
- data/ext/isomorfeus_ferret_ext/frt_index.c +8 -3
- data/ext/isomorfeus_ferret_ext/frt_index.h +3 -1
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_store.h +1 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +8 -6
- data/lib/isomorfeus/ferret/index/index.rb +11 -8
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +3 -2
@@ -1,6 +1,5 @@
|
|
1
1
|
#include "frt_index.h"
|
2
2
|
#include "isomorfeus_ferret.h"
|
3
|
-
#include <ruby.h>
|
4
3
|
|
5
4
|
// #undef close
|
6
5
|
|
@@ -16,8 +15,6 @@ VALUE cTermVector;
|
|
16
15
|
VALUE cTermEnum;
|
17
16
|
VALUE cTermDocEnum;
|
18
17
|
|
19
|
-
VALUE cLazyDoc;
|
20
|
-
VALUE cLazyDocData;
|
21
18
|
VALUE cIndexWriter;
|
22
19
|
VALUE cIndexReader;
|
23
20
|
|
@@ -59,15 +56,17 @@ static VALUE sym_with_positions_offsets;
|
|
59
56
|
static ID fsym_content;
|
60
57
|
|
61
58
|
static ID id_term;
|
62
|
-
static ID id_fields;
|
63
59
|
static ID id_fld_num_map;
|
64
60
|
static ID id_field_num;
|
65
61
|
static ID id_boost;
|
66
62
|
|
63
|
+
extern VALUE sym_each;
|
67
64
|
extern rb_encoding *utf8_encoding;
|
68
65
|
extern void frb_set_term(VALUE rterm, FrtTerm *t);
|
69
66
|
extern FrtAnalyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
|
70
67
|
extern VALUE frb_get_analyzer(FrtAnalyzer *a);
|
68
|
+
extern VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc);
|
69
|
+
extern void Init_LazyDoc(void);
|
71
70
|
|
72
71
|
/****************************************************************************
|
73
72
|
*
|
@@ -229,7 +228,7 @@ static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
|
|
229
228
|
*/
|
230
229
|
static VALUE frb_fi_name(VALUE self) {
|
231
230
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
232
|
-
return
|
231
|
+
return ID2SYM(fi->name);
|
233
232
|
}
|
234
233
|
|
235
234
|
/*
|
@@ -659,7 +658,7 @@ frb_fis_get_tk_fields(VALUE self)
|
|
659
658
|
int i;
|
660
659
|
for (i = 0; i < fis->size; i++) {
|
661
660
|
if (!fi_is_tokenized(fis->fields[i])) continue;
|
662
|
-
rb_ary_push(rfield_names,
|
661
|
+
rb_ary_push(rfield_names, ID2SYM(fis->fields[i]->name));
|
663
662
|
}
|
664
663
|
return rfield_names;
|
665
664
|
}
|
@@ -1987,125 +1986,6 @@ frb_iw_set_use_compound_file(VALUE self, VALUE rval)
|
|
1987
1986
|
return rval;
|
1988
1987
|
}
|
1989
1988
|
|
1990
|
-
/****************************************************************************
|
1991
|
-
*
|
1992
|
-
* LazyDoc Methods
|
1993
|
-
*
|
1994
|
-
****************************************************************************/
|
1995
|
-
|
1996
|
-
static void frb_lzd_data_free(void *p) {
|
1997
|
-
frt_lazy_doc_close((FrtLazyDoc *)p);
|
1998
|
-
}
|
1999
|
-
|
2000
|
-
static size_t frb_lazy_doc_size(const void *p) {
|
2001
|
-
return sizeof(FrtLazyDoc);
|
2002
|
-
(void)p;
|
2003
|
-
}
|
2004
|
-
|
2005
|
-
const rb_data_type_t frb_lazy_doc_t = {
|
2006
|
-
.wrap_struct_name = "FrbLazyDoc",
|
2007
|
-
.function = {
|
2008
|
-
.dmark = NULL,
|
2009
|
-
.dfree = frb_lzd_data_free,
|
2010
|
-
.dsize = frb_lazy_doc_size,
|
2011
|
-
.dcompact = NULL,
|
2012
|
-
.reserved = {0},
|
2013
|
-
},
|
2014
|
-
.parent = NULL,
|
2015
|
-
.data = NULL,
|
2016
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY
|
2017
|
-
};
|
2018
|
-
|
2019
|
-
static VALUE frb_lzd_alloc(VALUE klass) {
|
2020
|
-
FrtLazyDoc *ld = FRT_ALLOC(FrtLazyDoc);
|
2021
|
-
return TypedData_Wrap_Struct(klass, &frb_lazy_doc_t, ld);
|
2022
|
-
}
|
2023
|
-
|
2024
|
-
static VALUE frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
|
2025
|
-
VALUE rdata = Qnil;
|
2026
|
-
if (lazy_df) {
|
2027
|
-
if (lazy_df->size == 1) {
|
2028
|
-
char *data = frt_lazy_df_get_data(lazy_df, 0);
|
2029
|
-
rdata = rb_str_new(data, lazy_df->data[0].length);
|
2030
|
-
rb_enc_associate(rdata, lazy_df->data[0].encoding);
|
2031
|
-
} else {
|
2032
|
-
int i;
|
2033
|
-
VALUE rstr;
|
2034
|
-
rdata = rb_ary_new2(lazy_df->size);
|
2035
|
-
for (i = 0; i < lazy_df->size; i++) {
|
2036
|
-
char *data = frt_lazy_df_get_data(lazy_df, i);
|
2037
|
-
rstr = rb_str_new(data, lazy_df->data[i].length);
|
2038
|
-
rb_enc_associate(rstr, lazy_df->data[i].encoding);
|
2039
|
-
rb_ary_store(rdata, i, rstr);
|
2040
|
-
}
|
2041
|
-
}
|
2042
|
-
rb_hash_aset(self, rkey, rdata);
|
2043
|
-
}
|
2044
|
-
return rdata;
|
2045
|
-
}
|
2046
|
-
|
2047
|
-
/*
|
2048
|
-
* call-seq:
|
2049
|
-
* lazy_doc.default(key) -> string
|
2050
|
-
*
|
2051
|
-
* This method is used internally to lazily load fields. You should never
|
2052
|
-
* really need to call it yourself.
|
2053
|
-
*/
|
2054
|
-
static VALUE frb_lzd_default(VALUE self, VALUE rkey) {
|
2055
|
-
FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
|
2056
|
-
ID field = frb_field(rkey);
|
2057
|
-
VALUE rfield = ID2SYM(field);
|
2058
|
-
|
2059
|
-
return frb_lazy_df_load(self, rfield, frt_lazy_doc_get(lazy_doc, field));
|
2060
|
-
}
|
2061
|
-
|
2062
|
-
/*
|
2063
|
-
* call-seq:
|
2064
|
-
* lazy_doc.fields -> array of available fields
|
2065
|
-
*
|
2066
|
-
* Returns the list of fields stored for this particular document. If you try
|
2067
|
-
* to access any of these fields in the document the field will be loaded.
|
2068
|
-
* Try to access any other field an nil will be returned.
|
2069
|
-
*/
|
2070
|
-
static VALUE frb_lzd_fields(VALUE self) {
|
2071
|
-
return rb_ivar_get(self, id_fields);
|
2072
|
-
}
|
2073
|
-
|
2074
|
-
/*
|
2075
|
-
* call-seq:
|
2076
|
-
* lazy_doc.load -> lazy_doc
|
2077
|
-
*
|
2078
|
-
* Load all unloaded fields in the document from the index.
|
2079
|
-
*/
|
2080
|
-
static VALUE frb_lzd_load(VALUE self) {
|
2081
|
-
FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
|
2082
|
-
int i;
|
2083
|
-
for (i = 0; i < lazy_doc->size; i++) {
|
2084
|
-
FrtLazyDocField *lazy_df = lazy_doc->fields[i];
|
2085
|
-
frb_lazy_df_load(self, ID2SYM(lazy_df->name), lazy_df);
|
2086
|
-
}
|
2087
|
-
return self;
|
2088
|
-
}
|
2089
|
-
|
2090
|
-
VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc) {
|
2091
|
-
int i;
|
2092
|
-
VALUE rfields = rb_ary_new2(lazy_doc->size);
|
2093
|
-
|
2094
|
-
VALUE self, rdata;
|
2095
|
-
self = rb_hash_new();
|
2096
|
-
OBJSETUP(self, cLazyDoc, T_HASH);
|
2097
|
-
|
2098
|
-
rdata = TypedData_Wrap_Struct(cLazyDocData, &frb_lazy_doc_t, lazy_doc);
|
2099
|
-
rb_ivar_set(self, id_data, rdata);
|
2100
|
-
|
2101
|
-
for (i = 0; i < lazy_doc->size; i++) {
|
2102
|
-
rb_ary_store(rfields, i, ID2SYM(lazy_doc->fields[i]->name));
|
2103
|
-
}
|
2104
|
-
rb_ivar_set(self, id_fields, rfields);
|
2105
|
-
|
2106
|
-
return self;
|
2107
|
-
}
|
2108
|
-
|
2109
1989
|
/****************************************************************************
|
2110
1990
|
*
|
2111
1991
|
* IndexReader Methods
|
@@ -2743,12 +2623,33 @@ frb_ir_tk_fields(VALUE self)
|
|
2743
2623
|
* Returns the current version of the index reader.
|
2744
2624
|
*/
|
2745
2625
|
static VALUE
|
2746
|
-
frb_ir_version(VALUE self)
|
2747
|
-
{
|
2626
|
+
frb_ir_version(VALUE self) {
|
2748
2627
|
FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
|
2749
2628
|
return ULL2NUM(ir->sis->version);
|
2750
2629
|
}
|
2751
2630
|
|
2631
|
+
static VALUE frb_ir_to_enum(VALUE self) {
|
2632
|
+
return rb_enumeratorize(self, sym_each, 0, NULL);
|
2633
|
+
}
|
2634
|
+
|
2635
|
+
static VALUE frb_ir_each(VALUE self) {
|
2636
|
+
FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
|
2637
|
+
if (rb_block_given_p()) {
|
2638
|
+
long i;
|
2639
|
+
long max_doc = ir->max_doc(ir);
|
2640
|
+
VALUE rld;
|
2641
|
+
for (i = 0; i < max_doc; i++) {
|
2642
|
+
if (ir->is_deleted(ir, i)) continue;
|
2643
|
+
rld = frb_get_lazy_doc(ir->get_lazy_doc(ir, i));
|
2644
|
+
rb_yield(rld);
|
2645
|
+
}
|
2646
|
+
return self;
|
2647
|
+
} else {
|
2648
|
+
return frb_ir_to_enum(self);
|
2649
|
+
}
|
2650
|
+
|
2651
|
+
}
|
2652
|
+
|
2752
2653
|
/****************************************************************************
|
2753
2654
|
*
|
2754
2655
|
* Init Functions
|
@@ -3350,48 +3251,6 @@ void Init_IndexWriter(void) {
|
|
3350
3251
|
rb_define_method(cIndexWriter, "use_compound_file=", frb_iw_set_use_compound_file, 1);
|
3351
3252
|
}
|
3352
3253
|
|
3353
|
-
/*
|
3354
|
-
* Document-class: Ferret::Index::LazyDoc
|
3355
|
-
*
|
3356
|
-
* == Summary
|
3357
|
-
*
|
3358
|
-
* When a document is retrieved from the index a LazyDoc is returned.
|
3359
|
-
* Actually, LazyDoc is just a modified Hash object which lazily adds fields
|
3360
|
-
* to itself when they are accessed. You should note that the keys method
|
3361
|
-
* will return nothing until you actually access one of the fields. To see
|
3362
|
-
* what fields are available use LazyDoc#fields rather than LazyDoc#keys. To
|
3363
|
-
* load all fields use the LazyDoc#load method.
|
3364
|
-
*
|
3365
|
-
* == Example
|
3366
|
-
*
|
3367
|
-
* doc = index_reader[0]
|
3368
|
-
*
|
3369
|
-
* doc.keys #=> []
|
3370
|
-
* doc.values #=> []
|
3371
|
-
* doc.fields #=> [:title, :content]
|
3372
|
-
*
|
3373
|
-
* title = doc[:title] #=> "the title"
|
3374
|
-
* doc.keys #=> [:title]
|
3375
|
-
* doc.values #=> ["the title"]
|
3376
|
-
* doc.fields #=> [:title, :content]
|
3377
|
-
*
|
3378
|
-
* doc.load
|
3379
|
-
* doc.keys #=> [:title, :content]
|
3380
|
-
* doc.values #=> ["the title", "the content"]
|
3381
|
-
* doc.fields #=> [:title, :content]
|
3382
|
-
*/
|
3383
|
-
void Init_LazyDoc(void) {
|
3384
|
-
id_fields = rb_intern("@fields");
|
3385
|
-
|
3386
|
-
cLazyDoc = rb_define_class_under(mIndex, "LazyDoc", rb_cHash);
|
3387
|
-
rb_define_method(cLazyDoc, "default", frb_lzd_default, 1);
|
3388
|
-
rb_define_method(cLazyDoc, "load", frb_lzd_load, 0);
|
3389
|
-
rb_define_method(cLazyDoc, "fields", frb_lzd_fields, 0);
|
3390
|
-
|
3391
|
-
cLazyDocData = rb_define_class_under(cLazyDoc, "LazyDocData", rb_cObject);
|
3392
|
-
rb_define_alloc_func(cLazyDocData, frb_lzd_alloc);
|
3393
|
-
}
|
3394
|
-
|
3395
3254
|
/*
|
3396
3255
|
* Document-class: Ferret::Index::IndexReader
|
3397
3256
|
*
|
@@ -3405,36 +3264,38 @@ void Init_LazyDoc(void) {
|
|
3405
3264
|
void Init_IndexReader(void) {
|
3406
3265
|
cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
|
3407
3266
|
rb_define_alloc_func(cIndexReader, frb_ir_alloc);
|
3408
|
-
rb_define_method(cIndexReader, "initialize", frb_ir_init,
|
3409
|
-
rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm,
|
3410
|
-
rb_define_method(cIndexReader, "norms", frb_ir_norms,
|
3267
|
+
rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
|
3268
|
+
rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
|
3269
|
+
rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
|
3411
3270
|
rb_define_method(cIndexReader, "get_norms_into", frb_ir_get_norms_into, 3);
|
3412
|
-
rb_define_method(cIndexReader, "commit", frb_ir_commit,
|
3413
|
-
rb_define_method(cIndexReader, "close", frb_ir_close,
|
3271
|
+
rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
|
3272
|
+
rb_define_method(cIndexReader, "close", frb_ir_close, 0);
|
3414
3273
|
rb_define_method(cIndexReader, "has_deletions?", frb_ir_has_deletions, 0);
|
3415
|
-
rb_define_method(cIndexReader, "delete", frb_ir_delete,
|
3416
|
-
rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted,
|
3417
|
-
rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc,
|
3418
|
-
rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs,
|
3419
|
-
rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all,
|
3420
|
-
rb_define_method(cIndexReader, "latest?", frb_ir_is_latest,
|
3421
|
-
rb_define_method(cIndexReader, "get_document", frb_ir_get_doc,
|
3422
|
-
rb_define_method(cIndexReader, "[]", frb_ir_get_doc,
|
3423
|
-
rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector,
|
3424
|
-
rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors,
|
3425
|
-
rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs,
|
3274
|
+
rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
|
3275
|
+
rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
|
3276
|
+
rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
|
3277
|
+
rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
|
3278
|
+
rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
|
3279
|
+
rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
|
3280
|
+
rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
|
3281
|
+
rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
|
3282
|
+
rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
|
3283
|
+
rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
|
3284
|
+
rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
|
3426
3285
|
rb_define_method(cIndexReader, "term_positions", frb_ir_term_positions, 0);
|
3427
3286
|
rb_define_method(cIndexReader, "term_docs_for", frb_ir_term_docs_for, 2);
|
3428
3287
|
rb_define_method(cIndexReader, "term_positions_for", frb_ir_t_pos_for, 2);
|
3429
|
-
rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq,
|
3430
|
-
rb_define_method(cIndexReader, "terms", frb_ir_terms,
|
3431
|
-
rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from,
|
3432
|
-
rb_define_method(cIndexReader, "term_count", frb_ir_term_count,
|
3433
|
-
rb_define_method(cIndexReader, "fields", frb_ir_fields,
|
3434
|
-
rb_define_method(cIndexReader, "field_names", frb_ir_fields,
|
3435
|
-
rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos,
|
3436
|
-
rb_define_method(cIndexReader, "tokenized_fields", frb_ir_tk_fields,
|
3437
|
-
rb_define_method(cIndexReader, "version", frb_ir_version,
|
3288
|
+
rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
|
3289
|
+
rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
|
3290
|
+
rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
|
3291
|
+
rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
|
3292
|
+
rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
|
3293
|
+
rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
|
3294
|
+
rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
|
3295
|
+
rb_define_method(cIndexReader, "tokenized_fields", frb_ir_tk_fields, 0);
|
3296
|
+
rb_define_method(cIndexReader, "version", frb_ir_version, 0);
|
3297
|
+
rb_define_method(cIndexReader, "each", frb_ir_each, 0);
|
3298
|
+
rb_define_method(cIndexReader, "to_enum", frb_ir_to_enum, 0);
|
3438
3299
|
}
|
3439
3300
|
|
3440
3301
|
/* rdoc hack
|