isomorfeus-ferret 0.13.10 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +40 -19
- data/ext/isomorfeus_ferret_ext/bzlib.h +83 -82
- data/ext/isomorfeus_ferret_ext/frb_index.c +55 -194
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +705 -0
- data/ext/isomorfeus_ferret_ext/frb_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_config.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_hash.h +6 -8
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +5 -5
- data/ext/isomorfeus_ferret_ext/frt_index.c +8 -3
- data/ext/isomorfeus_ferret_ext/frt_index.h +3 -1
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_store.h +1 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +8 -6
- data/lib/isomorfeus/ferret/index/index.rb +11 -8
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +3 -2
@@ -58,10 +58,10 @@ typedef struct FrtHash {
|
|
58
58
|
* used outside of the Hash methods */
|
59
59
|
FrtHashEntry *(*lookup_i)(struct FrtHash *self,
|
60
60
|
register const void *key);
|
61
|
-
unsigned long
|
62
|
-
int
|
63
|
-
void
|
64
|
-
void
|
61
|
+
unsigned long (*hash_i)(const void *key);
|
62
|
+
int (*eq_i)(const void *key1, const void *key2);
|
63
|
+
void (*free_key_i)(void *p);
|
64
|
+
void (*free_value_i)(void *p);
|
65
65
|
} FrtHash;
|
66
66
|
|
67
67
|
/**
|
@@ -140,8 +140,7 @@ extern FrtHash *frt_h_new(frt_hash_ft hash,
|
|
140
140
|
* pass NULL in place of this parameter the value will not be destroyed.
|
141
141
|
* @return A newly allocated Hash
|
142
142
|
*/
|
143
|
-
extern FrtHash *frt_h_new_str(frt_free_ft free_key,
|
144
|
-
frt_free_ft free_value);
|
143
|
+
extern FrtHash *frt_h_new_str(frt_free_ft free_key, frt_free_ft free_value);
|
145
144
|
|
146
145
|
/**
|
147
146
|
* Create a new Hash that uses integers as its keys. The Hash will store all
|
@@ -258,8 +257,7 @@ extern void *frt_h_rem(FrtHash *self, const void *key, bool del_key);
|
|
258
257
|
* the existing key so no key was freed
|
259
258
|
* </pre>
|
260
259
|
*/
|
261
|
-
extern FrtHashKeyStatus frt_h_set(FrtHash *self,
|
262
|
-
const void *key, void *value);
|
260
|
+
extern FrtHashKeyStatus frt_h_set(FrtHash *self, const void *key, void *value);
|
263
261
|
|
264
262
|
/**
|
265
263
|
* Add the value +value+ to the Hash referencing it with key +key+. If
|
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
/*
|
5
5
|
* The HashSet contains an array +elems+ of the elements that have been added.
|
6
|
-
* It always has +size+ elements so +size+
|
6
|
+
* It always has +size+ elements so +size+ and +elems+ can be used to iterate
|
7
7
|
* over all alements in the HashSet. It also uses a Hash to keep track of
|
8
8
|
* which elements have been added and their index in the +elems+ array.
|
9
9
|
*/
|
@@ -37,7 +37,7 @@ FrtHashSet *frt_hs_new_ptr(frt_free_ft free_func)
|
|
37
37
|
return hs;
|
38
38
|
}
|
39
39
|
|
40
|
-
static void
|
40
|
+
static void hs_clear(FrtHashSet *hs, bool destroy)
|
41
41
|
{
|
42
42
|
FrtHashSetEntry *curr, *next = hs->first;
|
43
43
|
frt_free_ft do_free = destroy ? hs->free_elem_i : &frt_dummy_free;
|
@@ -52,20 +52,20 @@ static void clear(FrtHashSet *hs, bool destroy)
|
|
52
52
|
|
53
53
|
void frt_hs_clear(FrtHashSet *hs)
|
54
54
|
{
|
55
|
-
|
55
|
+
hs_clear(hs, true);
|
56
56
|
frt_h_clear(hs->ht);
|
57
57
|
}
|
58
58
|
|
59
59
|
void frt_hs_free(FrtHashSet *hs)
|
60
60
|
{
|
61
|
-
|
61
|
+
hs_clear(hs, false);
|
62
62
|
frt_h_destroy(hs->ht);
|
63
63
|
free(hs);
|
64
64
|
}
|
65
65
|
|
66
66
|
void frt_hs_destroy(FrtHashSet *hs)
|
67
67
|
{
|
68
|
-
|
68
|
+
hs_clear(hs, true);
|
69
69
|
frt_h_destroy(hs->ht);
|
70
70
|
free(hs);
|
71
71
|
}
|
@@ -116,7 +116,7 @@ char *frt_fn_for_generation(char *buf, const char *base, const char *ext, frt_i6
|
|
116
116
|
} else {
|
117
117
|
char b[FRT_SEGMENT_NAME_MAX_LENGTH];
|
118
118
|
char *u = u64_to_str36(b, FRT_SEGMENT_NAME_MAX_LENGTH, (frt_u64)gen);
|
119
|
-
if (ext == NULL) {
|
119
|
+
if (ext == NULL || (strcmp(ext, "") == 0)) {
|
120
120
|
sprintf(buf, "%s_%s", base, u);
|
121
121
|
} else {
|
122
122
|
sprintf(buf, "%s_%s.%s", base, u, ext);
|
@@ -170,9 +170,9 @@ static char *fn_for_gen_field(char *buf,
|
|
170
170
|
*
|
171
171
|
***************************************************************************/
|
172
172
|
|
173
|
-
static unsigned long
|
173
|
+
static unsigned long co_hash(const void *key)
|
174
174
|
{
|
175
|
-
return (unsigned long
|
175
|
+
return (unsigned long)key;
|
176
176
|
}
|
177
177
|
|
178
178
|
static int co_eq(const void *key1, const void *key2)
|
@@ -303,6 +303,7 @@ FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, FrtStoreValue store, FrtCom
|
|
303
303
|
fi_set_compression(fi, compression);
|
304
304
|
fi_set_index(fi, index);
|
305
305
|
fi_set_term_vector(fi, term_vector);
|
306
|
+
fi->number = 0;
|
306
307
|
fi->ref_cnt = 1;
|
307
308
|
fi->rfi = Qnil;
|
308
309
|
return fi;
|
@@ -394,6 +395,7 @@ FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
|
|
394
395
|
if (!frt_h_set_safe(fis->field_dict, (void *)fi->name, fi)) {
|
395
396
|
FRT_RAISE(FRT_ARG_ERROR, "Field :%s already exists", rb_id2name(fi->name));
|
396
397
|
}
|
398
|
+
FRT_REF(fi);
|
397
399
|
fi->number = fis->size;
|
398
400
|
fis->fields[fis->size] = fi;
|
399
401
|
fis->size++;
|
@@ -1161,6 +1163,7 @@ static FrtLazyDocField *lazy_df_new(ID name, const int size, FrtCompressionType
|
|
1161
1163
|
self->data = FRT_ALLOC_AND_ZERO_N(FrtLazyDocFieldData, size);
|
1162
1164
|
self->compression = compression;
|
1163
1165
|
self->decompressed = false;
|
1166
|
+
self->loaded = false;
|
1164
1167
|
return self;
|
1165
1168
|
}
|
1166
1169
|
|
@@ -1398,6 +1401,7 @@ char *frt_lazy_df_get_data(FrtLazyDocField *self, int i) {
|
|
1398
1401
|
frt_is_read_bytes(self->doc->fields_in, (frt_uchar *)text, read_len);
|
1399
1402
|
text[read_len - 1] = '\0';
|
1400
1403
|
}
|
1404
|
+
self->loaded = true;
|
1401
1405
|
}
|
1402
1406
|
}
|
1403
1407
|
|
@@ -1471,6 +1475,7 @@ static FrtLazyDoc *lazy_doc_new(int size, FrtInStream *fdt_in)
|
|
1471
1475
|
self->size = size;
|
1472
1476
|
self->fields = FRT_ALLOC_AND_ZERO_N(FrtLazyDocField *, size);
|
1473
1477
|
self->fields_in = frt_is_clone(fdt_in);
|
1478
|
+
self->loaded = false;
|
1474
1479
|
return self;
|
1475
1480
|
}
|
1476
1481
|
|
@@ -529,7 +529,7 @@ extern FrtTVTerm *frt_tv_get_tv_term(FrtTermVector *tv, const char *term);
|
|
529
529
|
|
530
530
|
/* * * FrtLazyDocField * * */
|
531
531
|
typedef struct FrtLazyDocFieldData {
|
532
|
-
frt_off_t
|
532
|
+
frt_off_t start;
|
533
533
|
int length;
|
534
534
|
rb_encoding *encoding;
|
535
535
|
FrtCompressionType compression; /* as stored */
|
@@ -545,6 +545,7 @@ typedef struct FrtLazyDocField {
|
|
545
545
|
int len; /* length of data elements concatenated */
|
546
546
|
FrtCompressionType compression; /* as configured */
|
547
547
|
bool decompressed;
|
548
|
+
bool loaded;
|
548
549
|
} FrtLazyDocField;
|
549
550
|
|
550
551
|
extern char *frt_lazy_df_get_data(FrtLazyDocField *self, int i);
|
@@ -556,6 +557,7 @@ struct FrtLazyDoc {
|
|
556
557
|
int size;
|
557
558
|
FrtLazyDocField **fields;
|
558
559
|
FrtInStream *fields_in;
|
560
|
+
bool loaded;
|
559
561
|
};
|
560
562
|
|
561
563
|
extern void frt_lazy_doc_close(FrtLazyDoc *self);
|
@@ -2801,6 +2801,7 @@ FrtQParser *frt_qp_init(FrtQParser *self, FrtAnalyzer *analyzer) {
|
|
2801
2801
|
qp_push_fields(self, self->def_fields, false);
|
2802
2802
|
|
2803
2803
|
self->analyzer = analyzer;
|
2804
|
+
if (analyzer) FRT_REF(analyzer);
|
2804
2805
|
self->ts_cache = frt_h_new_ptr((frt_free_ft)&frt_ts_deref);
|
2805
2806
|
self->buf_index = 0;
|
2806
2807
|
self->dynbuf = NULL;
|
@@ -44,7 +44,7 @@ struct FrtOutStreamMethods {
|
|
44
44
|
* Close any resources used by the output stream +os+
|
45
45
|
*
|
46
46
|
* @param os self
|
47
|
-
* @raise FRT_IO_ERROR if there is an closing the file
|
47
|
+
* @raise FRT_IO_ERROR if there is an error closing the file
|
48
48
|
*/
|
49
49
|
void (*close_i)(struct FrtOutStream *os);
|
50
50
|
};
|
@@ -29,6 +29,7 @@ VALUE sym_true;
|
|
29
29
|
VALUE sym_false;
|
30
30
|
VALUE sym_path;
|
31
31
|
VALUE sym_dir;
|
32
|
+
VALUE sym_each;
|
32
33
|
|
33
34
|
/* Modules */
|
34
35
|
VALUE mIsomorfeus;
|
@@ -272,12 +273,13 @@ void Init_isomorfeus_ferret_ext(void) {
|
|
272
273
|
id_data = rb_intern("@data");
|
273
274
|
|
274
275
|
/* Symbols */
|
275
|
-
sym_yes = ID2SYM(rb_intern("yes"))
|
276
|
-
sym_no = ID2SYM(rb_intern("no"))
|
277
|
-
sym_true = ID2SYM(rb_intern("true"))
|
278
|
-
sym_false = ID2SYM(rb_intern("false"))
|
279
|
-
sym_path = ID2SYM(rb_intern("path"))
|
280
|
-
sym_dir = ID2SYM(rb_intern("dir"))
|
276
|
+
sym_yes = ID2SYM(rb_intern("yes"));
|
277
|
+
sym_no = ID2SYM(rb_intern("no"));
|
278
|
+
sym_true = ID2SYM(rb_intern("true"));
|
279
|
+
sym_false = ID2SYM(rb_intern("false"));
|
280
|
+
sym_path = ID2SYM(rb_intern("path"));
|
281
|
+
sym_dir = ID2SYM(rb_intern("dir"));
|
282
|
+
sym_each = ID2SYM(rb_intern("each"));
|
281
283
|
|
282
284
|
mIsomorfeus = rb_define_module("Isomorfeus");
|
283
285
|
mFerret = rb_define_module_under(mIsomorfeus, "Ferret");
|
@@ -5,6 +5,7 @@ module Isomorfeus
|
|
5
5
|
# information on how to use this class.
|
6
6
|
class Index
|
7
7
|
include MonitorMixin
|
8
|
+
include Enumerable
|
8
9
|
include Isomorfeus::Ferret::Store
|
9
10
|
include Isomorfeus::Ferret::Search
|
10
11
|
|
@@ -485,15 +486,11 @@ module Isomorfeus
|
|
485
486
|
end
|
486
487
|
end
|
487
488
|
|
488
|
-
# iterate through all documents in the index.
|
489
|
-
|
490
|
-
# fields.
|
491
|
-
def each
|
489
|
+
# iterate through all documents in the index.
|
490
|
+
def each(&block)
|
492
491
|
@dir.synchronize do
|
493
492
|
ensure_reader_open
|
494
|
-
|
495
|
-
yield @reader[i].load unless @reader.deleted?(i)
|
496
|
-
end
|
493
|
+
@reader.each(&block)
|
497
494
|
end
|
498
495
|
end
|
499
496
|
|
@@ -679,7 +676,7 @@ module Isomorfeus
|
|
679
676
|
docs_to_add = []
|
680
677
|
query = do_process_query(query)
|
681
678
|
@searcher.search_each(query, :limit => :all) do |id, score|
|
682
|
-
document = @searcher[id].
|
679
|
+
document = @searcher[id].to_h
|
683
680
|
if new_val.is_a?(Hash)
|
684
681
|
document.merge!(new_val)
|
685
682
|
else new_val.is_a?(String) or new_val.is_a?(Symbol)
|
@@ -850,6 +847,12 @@ module Isomorfeus
|
|
850
847
|
end
|
851
848
|
end
|
852
849
|
|
850
|
+
def to_enum
|
851
|
+
@dir.synchronize do
|
852
|
+
ensure_reader_open
|
853
|
+
@reader.to_enum
|
854
|
+
end
|
855
|
+
end
|
853
856
|
|
854
857
|
protected
|
855
858
|
def ensure_writer_open()
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isomorfeus-ferret
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Biedermann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: oj
|
@@ -181,6 +181,7 @@ files:
|
|
181
181
|
- ext/isomorfeus_ferret_ext/fio_tmpfile.h
|
182
182
|
- ext/isomorfeus_ferret_ext/frb_analysis.c
|
183
183
|
- ext/isomorfeus_ferret_ext/frb_index.c
|
184
|
+
- ext/isomorfeus_ferret_ext/frb_lazy_doc.c
|
184
185
|
- ext/isomorfeus_ferret_ext/frb_qparser.c
|
185
186
|
- ext/isomorfeus_ferret_ext/frb_search.c
|
186
187
|
- ext/isomorfeus_ferret_ext/frb_store.c
|