isomorfeus-ferret 0.13.10 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,5 @@
1
1
  #include "frt_index.h"
2
2
  #include "isomorfeus_ferret.h"
3
- #include <ruby.h>
4
3
 
5
4
  // #undef close
6
5
 
@@ -16,8 +15,6 @@ VALUE cTermVector;
16
15
  VALUE cTermEnum;
17
16
  VALUE cTermDocEnum;
18
17
 
19
- VALUE cLazyDoc;
20
- VALUE cLazyDocData;
21
18
  VALUE cIndexWriter;
22
19
  VALUE cIndexReader;
23
20
 
@@ -59,15 +56,17 @@ static VALUE sym_with_positions_offsets;
59
56
  static ID fsym_content;
60
57
 
61
58
  static ID id_term;
62
- static ID id_fields;
63
59
  static ID id_fld_num_map;
64
60
  static ID id_field_num;
65
61
  static ID id_boost;
66
62
 
63
+ extern VALUE sym_each;
67
64
  extern rb_encoding *utf8_encoding;
68
65
  extern void frb_set_term(VALUE rterm, FrtTerm *t);
69
66
  extern FrtAnalyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
70
67
  extern VALUE frb_get_analyzer(FrtAnalyzer *a);
68
+ extern VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc);
69
+ extern void Init_LazyDoc(void);
71
70
 
72
71
  /****************************************************************************
73
72
  *
@@ -229,7 +228,7 @@ static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
229
228
  */
230
229
  static VALUE frb_fi_name(VALUE self) {
231
230
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
232
- return rb_str_new_cstr(rb_id2name(fi->name));
231
+ return ID2SYM(fi->name);
233
232
  }
234
233
 
235
234
  /*
@@ -659,7 +658,7 @@ frb_fis_get_tk_fields(VALUE self)
659
658
  int i;
660
659
  for (i = 0; i < fis->size; i++) {
661
660
  if (!fi_is_tokenized(fis->fields[i])) continue;
662
- rb_ary_push(rfield_names, rb_str_new_cstr(rb_id2name(fis->fields[i]->name)));
661
+ rb_ary_push(rfield_names, ID2SYM(fis->fields[i]->name));
663
662
  }
664
663
  return rfield_names;
665
664
  }
@@ -1987,125 +1986,6 @@ frb_iw_set_use_compound_file(VALUE self, VALUE rval)
1987
1986
  return rval;
1988
1987
  }
1989
1988
 
1990
- /****************************************************************************
1991
- *
1992
- * LazyDoc Methods
1993
- *
1994
- ****************************************************************************/
1995
-
1996
- static void frb_lzd_data_free(void *p) {
1997
- frt_lazy_doc_close((FrtLazyDoc *)p);
1998
- }
1999
-
2000
- static size_t frb_lazy_doc_size(const void *p) {
2001
- return sizeof(FrtLazyDoc);
2002
- (void)p;
2003
- }
2004
-
2005
- const rb_data_type_t frb_lazy_doc_t = {
2006
- .wrap_struct_name = "FrbLazyDoc",
2007
- .function = {
2008
- .dmark = NULL,
2009
- .dfree = frb_lzd_data_free,
2010
- .dsize = frb_lazy_doc_size,
2011
- .dcompact = NULL,
2012
- .reserved = {0},
2013
- },
2014
- .parent = NULL,
2015
- .data = NULL,
2016
- .flags = RUBY_TYPED_FREE_IMMEDIATELY
2017
- };
2018
-
2019
- static VALUE frb_lzd_alloc(VALUE klass) {
2020
- FrtLazyDoc *ld = FRT_ALLOC(FrtLazyDoc);
2021
- return TypedData_Wrap_Struct(klass, &frb_lazy_doc_t, ld);
2022
- }
2023
-
2024
- static VALUE frb_lazy_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
2025
- VALUE rdata = Qnil;
2026
- if (lazy_df) {
2027
- if (lazy_df->size == 1) {
2028
- char *data = frt_lazy_df_get_data(lazy_df, 0);
2029
- rdata = rb_str_new(data, lazy_df->data[0].length);
2030
- rb_enc_associate(rdata, lazy_df->data[0].encoding);
2031
- } else {
2032
- int i;
2033
- VALUE rstr;
2034
- rdata = rb_ary_new2(lazy_df->size);
2035
- for (i = 0; i < lazy_df->size; i++) {
2036
- char *data = frt_lazy_df_get_data(lazy_df, i);
2037
- rstr = rb_str_new(data, lazy_df->data[i].length);
2038
- rb_enc_associate(rstr, lazy_df->data[i].encoding);
2039
- rb_ary_store(rdata, i, rstr);
2040
- }
2041
- }
2042
- rb_hash_aset(self, rkey, rdata);
2043
- }
2044
- return rdata;
2045
- }
2046
-
2047
- /*
2048
- * call-seq:
2049
- * lazy_doc.default(key) -> string
2050
- *
2051
- * This method is used internally to lazily load fields. You should never
2052
- * really need to call it yourself.
2053
- */
2054
- static VALUE frb_lzd_default(VALUE self, VALUE rkey) {
2055
- FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
2056
- ID field = frb_field(rkey);
2057
- VALUE rfield = ID2SYM(field);
2058
-
2059
- return frb_lazy_df_load(self, rfield, frt_lazy_doc_get(lazy_doc, field));
2060
- }
2061
-
2062
- /*
2063
- * call-seq:
2064
- * lazy_doc.fields -> array of available fields
2065
- *
2066
- * Returns the list of fields stored for this particular document. If you try
2067
- * to access any of these fields in the document the field will be loaded.
2068
- * Try to access any other field an nil will be returned.
2069
- */
2070
- static VALUE frb_lzd_fields(VALUE self) {
2071
- return rb_ivar_get(self, id_fields);
2072
- }
2073
-
2074
- /*
2075
- * call-seq:
2076
- * lazy_doc.load -> lazy_doc
2077
- *
2078
- * Load all unloaded fields in the document from the index.
2079
- */
2080
- static VALUE frb_lzd_load(VALUE self) {
2081
- FrtLazyDoc *lazy_doc = (FrtLazyDoc *)DATA_PTR(rb_ivar_get(self, id_data));
2082
- int i;
2083
- for (i = 0; i < lazy_doc->size; i++) {
2084
- FrtLazyDocField *lazy_df = lazy_doc->fields[i];
2085
- frb_lazy_df_load(self, ID2SYM(lazy_df->name), lazy_df);
2086
- }
2087
- return self;
2088
- }
2089
-
2090
- VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc) {
2091
- int i;
2092
- VALUE rfields = rb_ary_new2(lazy_doc->size);
2093
-
2094
- VALUE self, rdata;
2095
- self = rb_hash_new();
2096
- OBJSETUP(self, cLazyDoc, T_HASH);
2097
-
2098
- rdata = TypedData_Wrap_Struct(cLazyDocData, &frb_lazy_doc_t, lazy_doc);
2099
- rb_ivar_set(self, id_data, rdata);
2100
-
2101
- for (i = 0; i < lazy_doc->size; i++) {
2102
- rb_ary_store(rfields, i, ID2SYM(lazy_doc->fields[i]->name));
2103
- }
2104
- rb_ivar_set(self, id_fields, rfields);
2105
-
2106
- return self;
2107
- }
2108
-
2109
1989
  /****************************************************************************
2110
1990
  *
2111
1991
  * IndexReader Methods
@@ -2743,12 +2623,33 @@ frb_ir_tk_fields(VALUE self)
2743
2623
  * Returns the current version of the index reader.
2744
2624
  */
2745
2625
  static VALUE
2746
- frb_ir_version(VALUE self)
2747
- {
2626
+ frb_ir_version(VALUE self) {
2748
2627
  FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2749
2628
  return ULL2NUM(ir->sis->version);
2750
2629
  }
2751
2630
 
2631
+ static VALUE frb_ir_to_enum(VALUE self) {
2632
+ return rb_enumeratorize(self, sym_each, 0, NULL);
2633
+ }
2634
+
2635
+ static VALUE frb_ir_each(VALUE self) {
2636
+ FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2637
+ if (rb_block_given_p()) {
2638
+ long i;
2639
+ long max_doc = ir->max_doc(ir);
2640
+ VALUE rld;
2641
+ for (i = 0; i < max_doc; i++) {
2642
+ if (ir->is_deleted(ir, i)) continue;
2643
+ rld = frb_get_lazy_doc(ir->get_lazy_doc(ir, i));
2644
+ rb_yield(rld);
2645
+ }
2646
+ return self;
2647
+ } else {
2648
+ return frb_ir_to_enum(self);
2649
+ }
2650
+
2651
+ }
2652
+
2752
2653
  /****************************************************************************
2753
2654
  *
2754
2655
  * Init Functions
@@ -3350,48 +3251,6 @@ void Init_IndexWriter(void) {
3350
3251
  rb_define_method(cIndexWriter, "use_compound_file=", frb_iw_set_use_compound_file, 1);
3351
3252
  }
3352
3253
 
3353
- /*
3354
- * Document-class: Ferret::Index::LazyDoc
3355
- *
3356
- * == Summary
3357
- *
3358
- * When a document is retrieved from the index a LazyDoc is returned.
3359
- * Actually, LazyDoc is just a modified Hash object which lazily adds fields
3360
- * to itself when they are accessed. You should note that the keys method
3361
- * will return nothing until you actually access one of the fields. To see
3362
- * what fields are available use LazyDoc#fields rather than LazyDoc#keys. To
3363
- * load all fields use the LazyDoc#load method.
3364
- *
3365
- * == Example
3366
- *
3367
- * doc = index_reader[0]
3368
- *
3369
- * doc.keys #=> []
3370
- * doc.values #=> []
3371
- * doc.fields #=> [:title, :content]
3372
- *
3373
- * title = doc[:title] #=> "the title"
3374
- * doc.keys #=> [:title]
3375
- * doc.values #=> ["the title"]
3376
- * doc.fields #=> [:title, :content]
3377
- *
3378
- * doc.load
3379
- * doc.keys #=> [:title, :content]
3380
- * doc.values #=> ["the title", "the content"]
3381
- * doc.fields #=> [:title, :content]
3382
- */
3383
- void Init_LazyDoc(void) {
3384
- id_fields = rb_intern("@fields");
3385
-
3386
- cLazyDoc = rb_define_class_under(mIndex, "LazyDoc", rb_cHash);
3387
- rb_define_method(cLazyDoc, "default", frb_lzd_default, 1);
3388
- rb_define_method(cLazyDoc, "load", frb_lzd_load, 0);
3389
- rb_define_method(cLazyDoc, "fields", frb_lzd_fields, 0);
3390
-
3391
- cLazyDocData = rb_define_class_under(cLazyDoc, "LazyDocData", rb_cObject);
3392
- rb_define_alloc_func(cLazyDocData, frb_lzd_alloc);
3393
- }
3394
-
3395
3254
  /*
3396
3255
  * Document-class: Ferret::Index::IndexReader
3397
3256
  *
@@ -3405,36 +3264,38 @@ void Init_LazyDoc(void) {
3405
3264
  void Init_IndexReader(void) {
3406
3265
  cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
3407
3266
  rb_define_alloc_func(cIndexReader, frb_ir_alloc);
3408
- rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
3409
- rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
3410
- rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
3267
+ rb_define_method(cIndexReader, "initialize", frb_ir_init, 1);
3268
+ rb_define_method(cIndexReader, "set_norm", frb_ir_set_norm, 3);
3269
+ rb_define_method(cIndexReader, "norms", frb_ir_norms, 1);
3411
3270
  rb_define_method(cIndexReader, "get_norms_into", frb_ir_get_norms_into, 3);
3412
- rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
3413
- rb_define_method(cIndexReader, "close", frb_ir_close, 0);
3271
+ rb_define_method(cIndexReader, "commit", frb_ir_commit, 0);
3272
+ rb_define_method(cIndexReader, "close", frb_ir_close, 0);
3414
3273
  rb_define_method(cIndexReader, "has_deletions?", frb_ir_has_deletions, 0);
3415
- rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
3416
- rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
3417
- rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
3418
- rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
3419
- rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
3420
- rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
3421
- rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
3422
- rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
3423
- rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
3424
- rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
3425
- rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
3274
+ rb_define_method(cIndexReader, "delete", frb_ir_delete, 1);
3275
+ rb_define_method(cIndexReader, "deleted?", frb_ir_is_deleted, 1);
3276
+ rb_define_method(cIndexReader, "max_doc", frb_ir_max_doc, 0);
3277
+ rb_define_method(cIndexReader, "num_docs", frb_ir_num_docs, 0);
3278
+ rb_define_method(cIndexReader, "undelete_all", frb_ir_undelete_all, 0);
3279
+ rb_define_method(cIndexReader, "latest?", frb_ir_is_latest, 0);
3280
+ rb_define_method(cIndexReader, "get_document", frb_ir_get_doc, -1);
3281
+ rb_define_method(cIndexReader, "[]", frb_ir_get_doc, -1);
3282
+ rb_define_method(cIndexReader, "term_vector", frb_ir_term_vector, 2);
3283
+ rb_define_method(cIndexReader, "term_vectors", frb_ir_term_vectors, 1);
3284
+ rb_define_method(cIndexReader, "term_docs", frb_ir_term_docs, 0);
3426
3285
  rb_define_method(cIndexReader, "term_positions", frb_ir_term_positions, 0);
3427
3286
  rb_define_method(cIndexReader, "term_docs_for", frb_ir_term_docs_for, 2);
3428
3287
  rb_define_method(cIndexReader, "term_positions_for", frb_ir_t_pos_for, 2);
3429
- rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
3430
- rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
3431
- rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
3432
- rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
3433
- rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
3434
- rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
3435
- rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
3436
- rb_define_method(cIndexReader, "tokenized_fields", frb_ir_tk_fields, 0);
3437
- rb_define_method(cIndexReader, "version", frb_ir_version, 0);
3288
+ rb_define_method(cIndexReader, "doc_freq", frb_ir_doc_freq, 2);
3289
+ rb_define_method(cIndexReader, "terms", frb_ir_terms, 1);
3290
+ rb_define_method(cIndexReader, "terms_from", frb_ir_terms_from, 2);
3291
+ rb_define_method(cIndexReader, "term_count", frb_ir_term_count, 1);
3292
+ rb_define_method(cIndexReader, "fields", frb_ir_fields, 0);
3293
+ rb_define_method(cIndexReader, "field_names", frb_ir_fields, 0);
3294
+ rb_define_method(cIndexReader, "field_infos", frb_ir_field_infos, 0);
3295
+ rb_define_method(cIndexReader, "tokenized_fields", frb_ir_tk_fields, 0);
3296
+ rb_define_method(cIndexReader, "version", frb_ir_version, 0);
3297
+ rb_define_method(cIndexReader, "each", frb_ir_each, 0);
3298
+ rb_define_method(cIndexReader, "to_enum", frb_ir_to_enum, 0);
3438
3299
  }
3439
3300
 
3440
3301
  /* rdoc hack