isomorfeus-ferret 0.13.5 → 0.13.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +26 -34
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +7 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +2 -1
- data/ext/isomorfeus_ferret_ext/frb_search.c +22 -15
- data/ext/isomorfeus_ferret_ext/frb_store.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_compound_io.c +4 -1
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_ind.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +34 -64
- data/ext/isomorfeus_ferret_ext/frt_index.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_search.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_sort.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_store.c +5 -8
- data/ext/isomorfeus_ferret_ext/test.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_filter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_hash.c +1 -0
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_index.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_q_span.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_search.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_sort.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_term.c +1 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e6a7a9c17cc344699bee0c8116188f86da73e11fa2a4eed02eea1c4647b6a99
|
4
|
+
data.tar.gz: c94c9a2735b010c02c8d5a793057b94abf8950117153415a731347c21fadf66e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 549a2d3b09dc535278f6c9735f55a47203166df046e5a75ff55589085336bcf2cb7c25cdf491c07eca68da25682316c4bb15e5079f09df95f8a494558fe22260
|
7
|
+
data.tar.gz: e188f428fd881a0285ece69807f13c07dfd1eee131edcad3e38a713ffa820364992bb9068d5f94f567bd7769c7882441ce8eb67199b95b491528fecdd34503f4
|
data/README.md
CHANGED
@@ -11,12 +11,12 @@ At the [Isomorfeus Framework Project](https://isomorfeus.com)
|
|
11
11
|
|
12
12
|
## About this project
|
13
13
|
|
14
|
-
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain.
|
14
|
+
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain, [https://github.com/dbalmain/ferret](https://github.com/dbalmain/ferret).
|
15
15
|
During revival many things havbe been fixed, now all tests pass, no crashes and it
|
16
16
|
successfully compiles and runs with rubys >3. Its no longer a goal to have
|
17
17
|
a c library available, but instead the usage is meant as ruby gem with a c extension only.
|
18
18
|
|
19
|
-
It
|
19
|
+
It works on *nixes, *nuxes, *BSDs and also works on Windows and RaspberryPi.
|
20
20
|
|
21
21
|
## Improvements and Changes in Version 0.13
|
22
22
|
|
@@ -63,14 +63,8 @@ fis.add_field(:compressed_field, :store => :yes, :compression => :brotli, :term_
|
|
63
63
|
|
64
64
|
### Performance
|
65
65
|
|
66
|
-
|
67
|
-
On Windows
|
68
|
-
|
69
|
-
Search performance is still excellent and multiple times faster than Lucene.
|
70
|
-
|
71
|
-
Lucene achieves roughly double the indexing performance. This seems to be because of the different way strings and
|
72
|
-
encodings are handled in Java. For example, the Java WhitespaceTokenizer code requires only one method call per character (check for whitespace), but for Ruby, to support all the different encodings, several method calls are required per character (retrieve character according to encoding, check character for whitespace).
|
73
|
-
Ferret is internally using the standard Ruby string encoding methods.
|
66
|
+
For version 0.13.7 the performance bottle neck has been identified and removed, ferret now delivers excellent indexing perfomance on all platforms, see numbers below.
|
67
|
+
On Windows performance is still not as good as on Linux, but that is equally true for Lucene and because of how the Windows filesystem works.
|
74
68
|
|
75
69
|
## Documentation
|
76
70
|
|
@@ -105,43 +99,41 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
|
|
105
99
|
|
106
100
|
A recent Java JDK must be installed to compile and run lucene benchmarks.
|
107
101
|
|
108
|
-
Results
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
JVM 11.0.14.1 (Ubuntu)
|
125
|
-
```
|
102
|
+
Results, Ferret 0.13.7 vs. Lucene 9.1.0, WhitespaceAnalyzer,
|
103
|
+
Linux Ubuntu 20.04, FreeBSD 13.0 and Windows 10 on old Intel Core i5 from 2015,
|
104
|
+
LinuxPi on RaspberryPi 400:
|
105
|
+
|
106
|
+
| OS | Task | Ferret | Lucene* |
|
107
|
+
|---------|------------|-----------------|----------------|
|
108
|
+
| Linux | Indexing | 4905 docs/s | 4785 docs/s |
|
109
|
+
| FreeBSD | Indexing | 4516 docs/s | - |
|
110
|
+
| Windows | Indexing | 2361 docs/s | 2395 docs/s |
|
111
|
+
| LinuxPi | Indexing | 1161 docs/s | 707 docs/s |
|
112
|
+
| Linux | Searching | 25664 queries/s | 4708 queries/s |
|
113
|
+
| FreeBSD | Searching | 25073 queries/s | - |
|
114
|
+
| Windows | Searching | 3646 queries/s | 935 queries/s |
|
115
|
+
| LinuxPi | Searching | 5768 queries/s | 680 queries/s |
|
116
|
+
| | Index Size | 28 MB | 35 MB |
|
117
|
+
|
118
|
+
*Lucene 9.1.0 on JVM 11.0.14.1 (Ubuntu)
|
126
119
|
|
127
120
|
### Storing Fields with Compression, Indexing and Retrieval
|
128
121
|
- clone repo
|
129
122
|
- bundle install
|
130
123
|
- rake ferret_compression_benchmark
|
131
124
|
|
132
|
-
Results on Linux, 0.13.
|
125
|
+
Results on Linux, 0.13.7, on old Intel Core i5 from 2015:
|
133
126
|
|
134
127
|
| Compression | Index & Store | Retrieve | Index size |
|
135
128
|
|-------------|---------------|---------------|------------|
|
136
|
-
| none |
|
137
|
-
| brotli |
|
138
|
-
| bzip2 |
|
139
|
-
| lz4 |
|
129
|
+
| none | 4866 docs/s | 153853 docs/s | 43 MB |
|
130
|
+
| brotli | 3539 docs/s | 58315 docs/s | 36 MB |
|
131
|
+
| bzip2 | 2624 docs/s | 15382 docs/s | 38 MB |
|
132
|
+
| lz4 | 4639 docs/s | 127100 docs/s | 41 MB |
|
140
133
|
|
141
134
|
## Future
|
142
135
|
|
143
136
|
Lots of things to do:
|
144
|
-
- Improve indexing performance on Windows (WriteFile is terribly slow, maybe use mapping, see libuv)
|
145
137
|
- Bring documentation in order in a docs directory
|
146
138
|
- Review code (especially for memory/stack issues, typical c issues)
|
147
139
|
- Take care of ruby GVL and threading
|
@@ -458,7 +458,7 @@ static VALUE frb_ts_next(VALUE self) {
|
|
458
458
|
|
459
459
|
static void frb_tf_mark(void *p) {
|
460
460
|
FrtTokenStream *ts = (FrtTokenStream *)p;
|
461
|
-
if (TkFilt(ts)->sub_ts->rts)
|
461
|
+
if (TkFilt(ts)->sub_ts && TkFilt(ts)->sub_ts->rts)
|
462
462
|
rb_gc_mark(TkFilt(ts)->sub_ts->rts);
|
463
463
|
}
|
464
464
|
|
@@ -1331,9 +1331,10 @@ static void frb_h_mark_values_i(void *key, void *value, void *arg) {
|
|
1331
1331
|
}
|
1332
1332
|
|
1333
1333
|
static void frb_pfa_mark(void *p) {
|
1334
|
-
if (PFA(p)->default_a->ranalyzer)
|
1334
|
+
if (PFA(p)->default_a && PFA(p)->default_a->ranalyzer)
|
1335
1335
|
rb_gc_mark(PFA(p)->default_a->ranalyzer);
|
1336
|
-
|
1336
|
+
if (PFA(p)->dict)
|
1337
|
+
frt_h_each(PFA(p)->dict, &frb_h_mark_values_i, NULL);
|
1337
1338
|
}
|
1338
1339
|
|
1339
1340
|
/*** PerFieldAnalyzer ***/
|
@@ -1370,6 +1371,8 @@ const rb_data_type_t frb_per_field_analyzer_t = {
|
|
1370
1371
|
|
1371
1372
|
static VALUE frb_per_field_analyzer_alloc(VALUE rclass) {
|
1372
1373
|
FrtAnalyzer *a = frt_per_field_analyzer_alloc();
|
1374
|
+
PFA(a)->default_a = NULL;
|
1375
|
+
PFA(a)->dict = NULL;
|
1373
1376
|
return TypedData_Wrap_Struct(rclass, &frb_per_field_analyzer_t, a);
|
1374
1377
|
}
|
1375
1378
|
|
@@ -1435,7 +1438,7 @@ static VALUE frb_pfa_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rstri
|
|
1435
1438
|
/*** RegExpAnalyzer ***/
|
1436
1439
|
|
1437
1440
|
static void frb_re_analyzer_mark(void *p) {
|
1438
|
-
if (((FrtAnalyzer *)p)->current_ts->rts)
|
1441
|
+
if (((FrtAnalyzer *)p)->current_ts && ((FrtAnalyzer *)p)->current_ts->rts)
|
1439
1442
|
rb_gc_mark(((FrtAnalyzer *)p)->current_ts->rts);
|
1440
1443
|
}
|
1441
1444
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#include "isomorfeus_ferret.h"
|
3
3
|
#include <ruby.h>
|
4
4
|
|
5
|
-
#undef close
|
5
|
+
// #undef close
|
6
6
|
|
7
7
|
VALUE mIndex;
|
8
8
|
|
@@ -429,6 +429,7 @@ static VALUE frb_get_field_infos(FrtFieldInfos *fis) {
|
|
429
429
|
|
430
430
|
static VALUE frb_fis_alloc(VALUE rclass) {
|
431
431
|
FrtFieldInfos *fis = frt_fis_alloc();
|
432
|
+
fis->size = 0;
|
432
433
|
return TypedData_Wrap_Struct(rclass, &frb_field_infos_t, fis);
|
433
434
|
}
|
434
435
|
|
@@ -674,8 +675,7 @@ frb_fis_get_tk_fields(VALUE self)
|
|
674
675
|
****************************************************************************/
|
675
676
|
|
676
677
|
static void frb_te_free(void *p) {
|
677
|
-
FrtTermEnum *
|
678
|
-
te->close(te);
|
678
|
+
((FrtTermEnum *)p)->close((FrtTermEnum *)p);
|
679
679
|
}
|
680
680
|
|
681
681
|
static size_t frb_te_size(const void *p) {
|
@@ -34,7 +34,7 @@ static void frb_qp_free(void *p) {
|
|
34
34
|
}
|
35
35
|
|
36
36
|
static void frb_qp_mark(void *p) {
|
37
|
-
if (((FrtQParser *)p)->analyzer->ranalyzer)
|
37
|
+
if (((FrtQParser *)p)->analyzer && ((FrtQParser *)p)->analyzer->ranalyzer)
|
38
38
|
rb_gc_mark(((FrtQParser *)p)->analyzer->ranalyzer);
|
39
39
|
}
|
40
40
|
|
@@ -59,6 +59,7 @@ const rb_data_type_t frb_qp_t = {
|
|
59
59
|
|
60
60
|
static VALUE frb_qp_alloc(VALUE rclass) {
|
61
61
|
FrtQParser *qp = frt_qp_alloc();
|
62
|
+
qp->analyzer = NULL;
|
62
63
|
return TypedData_Wrap_Struct(rclass, &frb_qp_t, qp);
|
63
64
|
}
|
64
65
|
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#include "isomorfeus_ferret.h"
|
5
5
|
#include <ruby.h>
|
6
6
|
|
7
|
-
#undef close
|
7
|
+
// #undef close
|
8
8
|
|
9
9
|
VALUE mSearch;
|
10
10
|
|
@@ -713,7 +713,7 @@ static size_t frb_boolean_clause_t_size(const void *p) {
|
|
713
713
|
}
|
714
714
|
|
715
715
|
static void frb_bc_mark(void *p) {
|
716
|
-
if (((FrtBooleanClause *)p)->query->rquery)
|
716
|
+
if (((FrtBooleanClause *)p)->query && ((FrtBooleanClause *)p)->query->rquery)
|
717
717
|
rb_gc_mark(((FrtBooleanClause *)p)->query->rquery);
|
718
718
|
}
|
719
719
|
|
@@ -1837,11 +1837,11 @@ static size_t frb_filtered_query_size(const void *p) {
|
|
1837
1837
|
}
|
1838
1838
|
|
1839
1839
|
static void frb_fqq_mark(void *p) {
|
1840
|
-
FrtFilteredQuery *
|
1841
|
-
if (
|
1842
|
-
rb_gc_mark(
|
1843
|
-
if (
|
1844
|
-
rb_gc_mark(
|
1840
|
+
FrtFilteredQuery *fqq = (FrtFilteredQuery *)p;
|
1841
|
+
if (fqq->query && fqq->query->rquery)
|
1842
|
+
rb_gc_mark(fqq->query->rquery);
|
1843
|
+
if (fqq->filter && fqq->filter->rfilter)
|
1844
|
+
rb_gc_mark(fqq->filter->rfilter);
|
1845
1845
|
}
|
1846
1846
|
|
1847
1847
|
const rb_data_type_t frb_filtered_query_t = {
|
@@ -1860,6 +1860,8 @@ const rb_data_type_t frb_filtered_query_t = {
|
|
1860
1860
|
|
1861
1861
|
static VALUE frb_fqq_alloc(VALUE rclass) {
|
1862
1862
|
FrtQuery *fqq = frt_fq_alloc();
|
1863
|
+
((FrtFilteredQuery *)fqq)->query = NULL;
|
1864
|
+
((FrtFilteredQuery *)fqq)->filter = NULL;
|
1863
1865
|
return TypedData_Wrap_Struct(rclass, &frb_filtered_query_t, fqq);
|
1864
1866
|
}
|
1865
1867
|
|
@@ -2172,6 +2174,7 @@ const rb_data_type_t frb_span_near_query_t = {
|
|
2172
2174
|
|
2173
2175
|
static VALUE frb_spannq_alloc(VALUE rclass) {
|
2174
2176
|
FrtQuery *snq = frt_spannq_alloc();
|
2177
|
+
((FrtSpanNearQuery *)snq)->c_cnt = 0;
|
2175
2178
|
return TypedData_Wrap_Struct(rclass, &frb_span_near_query_t, snq);
|
2176
2179
|
}
|
2177
2180
|
|
@@ -2352,9 +2355,9 @@ static size_t frb_span_not_query_size(const void *p) {
|
|
2352
2355
|
|
2353
2356
|
static void frb_spanxq_mark(void *p) {
|
2354
2357
|
FrtSpanNotQuery *sxq = (FrtSpanNotQuery *)p;
|
2355
|
-
if (sxq->inc->rquery)
|
2358
|
+
if (sxq->inc && sxq->inc->rquery)
|
2356
2359
|
rb_gc_mark(sxq->inc->rquery);
|
2357
|
-
if (sxq->exc->rquery)
|
2360
|
+
if (sxq->exc && sxq->exc->rquery)
|
2358
2361
|
rb_gc_mark(sxq->exc->rquery);
|
2359
2362
|
}
|
2360
2363
|
|
@@ -3557,10 +3560,12 @@ static size_t frb_index_searcher_size(const void *p) {
|
|
3557
3560
|
|
3558
3561
|
static void frb_sea_mark(void *p) {
|
3559
3562
|
FrtIndexSearcher *isea = (FrtIndexSearcher *)p;
|
3560
|
-
if (isea->ir
|
3561
|
-
|
3562
|
-
|
3563
|
-
|
3563
|
+
if (isea->ir) {
|
3564
|
+
if (isea->ir->rir)
|
3565
|
+
rb_gc_mark(isea->ir->rir);
|
3566
|
+
if (isea->ir->store && isea->ir->store->rstore)
|
3567
|
+
rb_gc_mark(isea->ir->store->rstore);
|
3568
|
+
}
|
3564
3569
|
}
|
3565
3570
|
|
3566
3571
|
const rb_data_type_t frb_index_searcher_t = {
|
@@ -3578,8 +3583,9 @@ const rb_data_type_t frb_index_searcher_t = {
|
|
3578
3583
|
};
|
3579
3584
|
|
3580
3585
|
static VALUE frb_sea_alloc(VALUE rclass) {
|
3581
|
-
FrtSearcher *
|
3582
|
-
|
3586
|
+
FrtSearcher *sea = frt_isea_alloc();
|
3587
|
+
((FrtIndexSearcher *)sea)->ir = NULL;
|
3588
|
+
return TypedData_Wrap_Struct(rclass, &frb_index_searcher_t, sea);
|
3583
3589
|
}
|
3584
3590
|
|
3585
3591
|
/*
|
@@ -3665,6 +3671,7 @@ const rb_data_type_t frb_multi_searcher_t = {
|
|
3665
3671
|
|
3666
3672
|
static VALUE frb_ms_alloc(VALUE rclass) {
|
3667
3673
|
FrtSearcher *s = frt_msea_alloc();
|
3674
|
+
((FrtMultiSearcher *)s)->s_cnt = 0;
|
3668
3675
|
return TypedData_Wrap_Struct(rclass, &frb_multi_searcher_t, s);
|
3669
3676
|
}
|
3670
3677
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#include "isomorfeus_ferret.h"
|
3
3
|
#include <time.h>
|
4
4
|
|
5
|
-
#undef rename
|
5
|
+
// #undef rename
|
6
6
|
|
7
7
|
static ID id_ref_cnt;
|
8
8
|
VALUE cLock;
|
@@ -34,7 +34,7 @@ void frb_lock_free(void *p) {
|
|
34
34
|
|
35
35
|
void frb_lock_mark(void *p) {
|
36
36
|
FrtLock *lock = (FrtLock *)p;
|
37
|
-
if (lock->store->rstore)
|
37
|
+
if (lock->store && lock->store->rstore)
|
38
38
|
rb_gc_mark(lock->store->rstore);
|
39
39
|
}
|
40
40
|
|
@@ -329,7 +329,7 @@ static void cw_copy_file(FrtCompoundWriter *cw, FrtCWFileEntry *src, FrtOutStrea
|
|
329
329
|
frt_is_close(is);
|
330
330
|
}
|
331
331
|
|
332
|
-
void frt_cw_close(FrtCompoundWriter *cw)
|
332
|
+
void frt_cw_close(FrtCompoundWriter *cw, FrtDeleter *dlr)
|
333
333
|
{
|
334
334
|
FrtOutStream *os = NULL;
|
335
335
|
int i;
|
@@ -356,6 +356,9 @@ void frt_cw_close(FrtCompoundWriter *cw)
|
|
356
356
|
for (i = 0; i < frt_ary_size(cw->file_entries); i++) {
|
357
357
|
cw->file_entries[i].data_offset = frt_os_pos(os);
|
358
358
|
cw_copy_file(cw, &cw->file_entries[i], os);
|
359
|
+
if (dlr) {
|
360
|
+
frt_deleter_queue_file(dlr, cw->file_entries[i].name);
|
361
|
+
}
|
359
362
|
}
|
360
363
|
|
361
364
|
/* Write the data offsets into the directory of the compound stream */
|
@@ -272,15 +272,13 @@ static const struct FrtOutStreamMethods FS_OUT_STREAM_METHODS = {
|
|
272
272
|
static FrtOutStream *fs_new_output(FrtStore *store, const char *filename)
|
273
273
|
{
|
274
274
|
char path[FRT_MAX_FILE_PATH];
|
275
|
-
int fd = open(join_path(path, store->dir.path, filename),
|
276
|
-
O_WRONLY | O_CREAT | O_BINARY, store->file_mode);
|
277
|
-
FrtOutStream *os;
|
275
|
+
int fd = open(join_path(path, store->dir.path, filename), O_WRONLY | O_CREAT | O_BINARY, store->file_mode);
|
278
276
|
if (fd < 0) {
|
279
277
|
FRT_RAISE(FRT_IO_ERROR, "couldn't create OutStream %s: <%s>",
|
280
278
|
path, strerror(errno));
|
281
279
|
}
|
282
280
|
|
283
|
-
os = frt_os_new();
|
281
|
+
FrtOutStream *os = frt_os_new();
|
284
282
|
os->file.fd = fd;
|
285
283
|
os->m = &FS_OUT_STREAM_METHODS;
|
286
284
|
return os;
|
@@ -11,8 +11,8 @@
|
|
11
11
|
#include "bzlib.h"
|
12
12
|
#include "lz4frame.h"
|
13
13
|
|
14
|
-
#undef close
|
15
|
-
#undef read
|
14
|
+
// #undef close
|
15
|
+
// #undef read
|
16
16
|
|
17
17
|
extern rb_encoding *utf8_encoding;
|
18
18
|
extern void frt_micro_sleep(const int micro_seconds);
|
@@ -681,15 +681,13 @@ static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num)
|
|
681
681
|
if (field_num >= si->norm_gens_size
|
682
682
|
|| 0 > (norm_gen = si->norm_gens[field_num])) {
|
683
683
|
return NULL;
|
684
|
-
}
|
685
|
-
else {
|
684
|
+
} else {
|
686
685
|
const char *ext = (si->use_compound_file && norm_gen > 0) ? "s" : "f";
|
687
686
|
return fn_for_gen_field(buf, si->name, ext, norm_gen, field_num);
|
688
687
|
}
|
689
688
|
}
|
690
689
|
|
691
|
-
|
692
|
-
#define DEL(file_name) deleter_queue_file(dlr, file_name)
|
690
|
+
void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name);
|
693
691
|
|
694
692
|
static void si_delete_files(FrtSegmentInfo *si, FrtFieldInfos *fis, FrtDeleter *dlr)
|
695
693
|
{
|
@@ -700,7 +698,7 @@ static void si_delete_files(FrtSegmentInfo *si, FrtFieldInfos *fis, FrtDeleter *
|
|
700
698
|
|
701
699
|
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
702
700
|
if (0 <= si->norm_gens[i]) {
|
703
|
-
|
701
|
+
frt_deleter_queue_file(dlr, si_norm_file_name(si, file_name, fis->fields[i]->number));
|
704
702
|
}
|
705
703
|
}
|
706
704
|
|
@@ -710,15 +708,14 @@ static void si_delete_files(FrtSegmentInfo *si, FrtFieldInfos *fis, FrtDeleter *
|
|
710
708
|
|
711
709
|
if (si->use_compound_file) {
|
712
710
|
memcpy(ext, "cfs", 4);
|
713
|
-
|
711
|
+
frt_deleter_queue_file(dlr, file_name);
|
714
712
|
if (0 <= si->del_gen) {
|
715
|
-
|
713
|
+
frt_deleter_queue_file(dlr, frt_fn_for_generation(file_name, si->name, "del", si->del_gen));
|
716
714
|
}
|
717
|
-
}
|
718
|
-
else {
|
715
|
+
} else {
|
719
716
|
for (i = FRT_NELEMS(INDEX_EXTENSIONS) - 1; i >= 0; i--) {
|
720
717
|
memcpy(ext, INDEX_EXTENSIONS[i], 4);
|
721
|
-
|
718
|
+
frt_deleter_queue_file(dlr, file_name);
|
722
719
|
}
|
723
720
|
}
|
724
721
|
}
|
@@ -3695,8 +3692,7 @@ static bool file_name_filter_is_cfs_file(const char *file_name) {
|
|
3695
3692
|
****************************************************************************/
|
3696
3693
|
|
3697
3694
|
#define DELETABLE_START_CAPA 8
|
3698
|
-
FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store)
|
3699
|
-
{
|
3695
|
+
FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store) {
|
3700
3696
|
FrtDeleter *dlr = FRT_ALLOC(FrtDeleter);
|
3701
3697
|
dlr->sis = sis;
|
3702
3698
|
dlr->store = store;
|
@@ -3704,19 +3700,16 @@ FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store)
|
|
3704
3700
|
return dlr;
|
3705
3701
|
}
|
3706
3702
|
|
3707
|
-
void frt_deleter_destroy(FrtDeleter *dlr)
|
3708
|
-
{
|
3703
|
+
void frt_deleter_destroy(FrtDeleter *dlr) {
|
3709
3704
|
frt_hs_destroy(dlr->pending);
|
3710
3705
|
free(dlr);
|
3711
3706
|
}
|
3712
3707
|
|
3713
|
-
|
3714
|
-
{
|
3708
|
+
void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name) {
|
3715
3709
|
frt_hs_add(dlr->pending, frt_estrdup(file_name));
|
3716
3710
|
}
|
3717
3711
|
|
3718
|
-
void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name)
|
3719
|
-
{
|
3712
|
+
void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name) {
|
3720
3713
|
FrtStore *store = dlr->store;
|
3721
3714
|
FRT_TRY
|
3722
3715
|
if (store->exists(store, file_name)) {
|
@@ -3728,8 +3721,7 @@ void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name)
|
|
3728
3721
|
FRT_XENDTRY
|
3729
3722
|
}
|
3730
3723
|
|
3731
|
-
static void deleter_commit_pending_deletions(FrtDeleter *dlr)
|
3732
|
-
{
|
3724
|
+
static void deleter_commit_pending_deletions(FrtDeleter *dlr) {
|
3733
3725
|
FrtHashSetEntry *hse, *hse_next = dlr->pending->first;
|
3734
3726
|
while ((hse = hse_next) != NULL) {
|
3735
3727
|
hse_next = hse->next;
|
@@ -3737,23 +3729,13 @@ static void deleter_commit_pending_deletions(FrtDeleter *dlr)
|
|
3737
3729
|
}
|
3738
3730
|
}
|
3739
3731
|
|
3740
|
-
void frt_deleter_delete_files(FrtDeleter *dlr, char **files, int file_cnt)
|
3741
|
-
{
|
3742
|
-
int i;
|
3743
|
-
for (i = file_cnt - 1; i >= 0; i--) {
|
3744
|
-
deleter_queue_file(dlr, files[i]);
|
3745
|
-
}
|
3746
|
-
deleter_commit_pending_deletions(dlr);
|
3747
|
-
}
|
3748
|
-
|
3749
3732
|
struct DelFilesArg {
|
3750
3733
|
char curr_seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
3751
3734
|
FrtDeleter *dlr;
|
3752
3735
|
FrtHash *current;
|
3753
3736
|
};
|
3754
3737
|
|
3755
|
-
static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
|
3756
|
-
{
|
3738
|
+
static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg) {
|
3757
3739
|
struct DelFilesArg *dfa = (struct DelFilesArg *)arg;
|
3758
3740
|
FrtDeleter *dlr = dfa->dlr;
|
3759
3741
|
|
@@ -3773,8 +3755,7 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
|
|
3773
3755
|
if (NULL != p) {
|
3774
3756
|
*p = '\0';
|
3775
3757
|
extension = p + 1;
|
3776
|
-
}
|
3777
|
-
else {
|
3758
|
+
} else {
|
3778
3759
|
extension = NULL;
|
3779
3760
|
}
|
3780
3761
|
|
@@ -3789,16 +3770,14 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
|
|
3789
3770
|
if (NULL == (si = (FrtSegmentInfo *)frt_h_get(dfa->current, segment_name))) {
|
3790
3771
|
/* Delete if segment is not referenced: */
|
3791
3772
|
do_delete = true;
|
3792
|
-
}
|
3793
|
-
else {
|
3773
|
+
} else {
|
3794
3774
|
char tmp_fn[FRT_SEGMENT_NAME_MAX_LENGTH];
|
3795
3775
|
/* OK, segment is referenced, but file may still be orphan'd: */
|
3796
3776
|
if (file_name_filter_is_cfs_file(file_name)
|
3797
3777
|
&& si->use_compound_file) {
|
3798
3778
|
/* This file is stored in a CFS file for this segment: */
|
3799
3779
|
do_delete = true;
|
3800
|
-
}
|
3801
|
-
else if (0 == strcmp("del", extension)) {
|
3780
|
+
} else if (0 == strcmp("del", extension)) {
|
3802
3781
|
/* This is a _segmentName_N.del file: */
|
3803
3782
|
if (!frt_fn_for_generation(tmp_fn, segment_name, "del", si->del_gen)
|
3804
3783
|
|| 0 != strcmp(file_name, tmp_fn)) {
|
@@ -3807,8 +3786,7 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
|
|
3807
3786
|
* this segment, then delete it: */
|
3808
3787
|
do_delete = true;
|
3809
3788
|
}
|
3810
|
-
}
|
3811
|
-
else if (NULL != extension
|
3789
|
+
} else if (NULL != extension
|
3812
3790
|
&& ('s' == *extension || 'f' == *extension)
|
3813
3791
|
&& isdigit(extension[1])) {
|
3814
3792
|
si_norm_file_name(si, tmp_fn, atoi(extension + 1));
|
@@ -3817,15 +3795,14 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
|
|
3817
3795
|
/* This is an orphan'd norms file: */
|
3818
3796
|
do_delete = true;
|
3819
3797
|
}
|
3820
|
-
}
|
3821
|
-
else if (0 == strcmp("cfs", extension) && !si->use_compound_file) {
|
3798
|
+
} else if (0 == strcmp("cfs", extension) && !si->use_compound_file) {
|
3822
3799
|
/* This is a partially written _segmentName.cfs: */
|
3823
3800
|
do_delete = true;
|
3824
3801
|
}
|
3825
3802
|
}
|
3826
3803
|
|
3827
3804
|
if (do_delete) {
|
3828
|
-
|
3805
|
+
frt_deleter_queue_file(dlr, file_name);
|
3829
3806
|
}
|
3830
3807
|
}
|
3831
3808
|
}
|
@@ -3837,8 +3814,7 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
|
|
3837
3814
|
* create the unused file (eg when merging segments), and we only remove from
|
3838
3815
|
* deletable when a file is successfully deleted.
|
3839
3816
|
*/
|
3840
|
-
void frt_deleter_find_deletable_files(FrtDeleter *dlr)
|
3841
|
-
{
|
3817
|
+
void frt_deleter_find_deletable_files(FrtDeleter *dlr) {
|
3842
3818
|
/* Gather all "current" segments: */
|
3843
3819
|
int i;
|
3844
3820
|
FrtSegmentInfos *sis = dlr->sis;
|
@@ -3937,9 +3913,10 @@ static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentI
|
|
3937
3913
|
ir->store = store;
|
3938
3914
|
FRT_REF(store);
|
3939
3915
|
}
|
3940
|
-
ir->sis
|
3941
|
-
ir->fis
|
3916
|
+
ir->sis = sis;
|
3917
|
+
ir->fis = fis;
|
3942
3918
|
ir->ref_cnt = 1;
|
3919
|
+
ir->rir = Qnil;
|
3943
3920
|
|
3944
3921
|
ir->is_owner = is_owner;
|
3945
3922
|
if (is_owner) {
|
@@ -4207,7 +4184,7 @@ static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
|
|
4207
4184
|
const int field_num = norm->field_num;
|
4208
4185
|
|
4209
4186
|
if (si_norm_file_name(si, norm_file_name, field_num)) {
|
4210
|
-
|
4187
|
+
frt_deleter_queue_file(dlr, norm_file_name);
|
4211
4188
|
}
|
4212
4189
|
frt_si_advance_norm_gen(si, field_num);
|
4213
4190
|
si_norm_file_name(si, norm_file_name, field_num);
|
@@ -4363,7 +4340,7 @@ static void sr_commit_i(FrtIndexReader *ir)
|
|
4363
4340
|
if (SR(ir)->undelete_all || SR(ir)->deleted_docs_dirty) {
|
4364
4341
|
if (si->del_gen >= 0) {
|
4365
4342
|
frt_fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
|
4366
|
-
|
4343
|
+
frt_deleter_queue_file(ir->deleter, tmp_file_name);
|
4367
4344
|
}
|
4368
4345
|
if (SR(ir)->undelete_all) {
|
4369
4346
|
si->del_gen = -1;
|
@@ -5687,7 +5664,6 @@ static SegmentMergeInfo *smi_new(int base, FrtStore *store, FrtSegmentInfo *si)
|
|
5687
5664
|
smi->store = frt_open_cmpd_store(store, file_name);
|
5688
5665
|
}
|
5689
5666
|
|
5690
|
-
|
5691
5667
|
sprintf(file_name, "%s.fdx", segment);
|
5692
5668
|
smi->doc_cnt = smi->max_doc
|
5693
5669
|
= smi->store->length(smi->store, file_name) / FIELDS_IDX_PTR_SIZE;
|
@@ -6116,14 +6092,7 @@ int frt_iw_doc_count(FrtIndexWriter *iw)
|
|
6116
6092
|
return doc_cnt;
|
6117
6093
|
}
|
6118
6094
|
|
6119
|
-
|
6120
|
-
deleter_queue_file(dlr, file_name);\
|
6121
|
-
frt_cw_add_file(cw, file_name)
|
6122
|
-
|
6123
|
-
static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis,
|
6124
|
-
FrtSegmentInfo *si, char *cfs_file_name,
|
6125
|
-
FrtDeleter *dlr)
|
6126
|
-
{
|
6095
|
+
static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis, FrtSegmentInfo *si, char *cfs_file_name, FrtDeleter *dlr) {
|
6127
6096
|
int i;
|
6128
6097
|
FrtCompoundWriter *cw;
|
6129
6098
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -6137,19 +6106,18 @@ static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis,
|
|
6137
6106
|
cw = frt_open_cw(store, cfs_file_name);
|
6138
6107
|
for (i = 0; i < FRT_NELEMS(COMPOUND_EXTENSIONS); i++) {
|
6139
6108
|
memcpy(ext, COMPOUND_EXTENSIONS[i], 4);
|
6140
|
-
|
6109
|
+
frt_cw_add_file(cw, file_name);
|
6141
6110
|
}
|
6142
6111
|
|
6143
6112
|
/* Field norm file_names */
|
6144
6113
|
for (i = fis->size - 1; i >= 0; i--) {
|
6145
|
-
if (fi_has_norms(fis->fields[i])
|
6146
|
-
|
6147
|
-
MOVE_TO_COMPOUND_DIR(file_name);
|
6114
|
+
if (fi_has_norms(fis->fields[i]) && si_norm_file_name(si, file_name, i)) {
|
6115
|
+
frt_cw_add_file(cw, file_name);
|
6148
6116
|
}
|
6149
6117
|
}
|
6150
6118
|
|
6151
6119
|
/* Perform the merge */
|
6152
|
-
frt_cw_close(cw);
|
6120
|
+
frt_cw_close(cw, dlr);
|
6153
6121
|
}
|
6154
6122
|
|
6155
6123
|
static void iw_commit_compound_file(FrtIndexWriter *iw, FrtSegmentInfo *si)
|
@@ -6464,6 +6432,8 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
|
|
6464
6432
|
sprintf(file_name, "%s.del", segment);
|
6465
6433
|
del_out = store_out->new_output(store_out, file_name);
|
6466
6434
|
frt_is2os_copy_bytes(del_in, del_out, frt_is_length(del_in));
|
6435
|
+
frt_os_close(del_out);
|
6436
|
+
frt_is_close(del_in);
|
6467
6437
|
}
|
6468
6438
|
|
6469
6439
|
if (map) {
|
@@ -627,9 +627,9 @@ struct FrtDeleter {
|
|
627
627
|
|
628
628
|
extern FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store);
|
629
629
|
extern void frt_deleter_destroy(FrtDeleter *dlr);
|
630
|
+
extern void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name);
|
630
631
|
extern void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name);
|
631
632
|
extern void frt_deleter_find_deletable_files(FrtDeleter *dlr);
|
632
|
-
extern void frt_deleter_delete_files(FrtDeleter *dlr, char **files, int file_cnt);
|
633
633
|
|
634
634
|
/****************************************************************************
|
635
635
|
*
|
@@ -859,6 +859,6 @@ typedef struct FrtCompoundWriter {
|
|
859
859
|
|
860
860
|
extern FrtCompoundWriter *frt_open_cw(FrtStore *store, char *name);
|
861
861
|
extern void frt_cw_add_file(FrtCompoundWriter *cw, char *id);
|
862
|
-
extern void frt_cw_close(FrtCompoundWriter *cw);
|
862
|
+
extern void frt_cw_close(FrtCompoundWriter *cw, FrtDeleter *dlr);
|
863
863
|
|
864
864
|
#endif
|
@@ -1280,7 +1280,7 @@ static int bc_eq(FrtBooleanClause *self, FrtBooleanClause *o) {
|
|
1280
1280
|
}
|
1281
1281
|
|
1282
1282
|
FrtBooleanClause *frt_bc_alloc(void) {
|
1283
|
-
return
|
1283
|
+
return FRT_ALLOC_AND_ZERO(FrtBooleanClause);
|
1284
1284
|
}
|
1285
1285
|
|
1286
1286
|
FrtBooleanClause *frt_bc_init(FrtBooleanClause *self, FrtQuery *query, FrtBCType occur) {
|
@@ -149,7 +149,7 @@ void frt_os_seek(FrtOutStream *os, off_t new_pos)
|
|
149
149
|
*/
|
150
150
|
void frt_os_write_byte(FrtOutStream *os, frt_uchar b)
|
151
151
|
{
|
152
|
-
if (os->buf.pos >= FRT_BUFFER_SIZE) {
|
152
|
+
if (os->buf.pos >= (FRT_BUFFER_SIZE - 1)) {
|
153
153
|
frt_os_flush(os);
|
154
154
|
}
|
155
155
|
write_byte(os, b);
|
@@ -157,15 +157,12 @@ void frt_os_write_byte(FrtOutStream *os, frt_uchar b)
|
|
157
157
|
|
158
158
|
void frt_os_write_bytes(FrtOutStream *os, const frt_uchar *buf, int len)
|
159
159
|
{
|
160
|
-
if (os->buf.pos
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
if (len < FRT_BUFFER_SIZE) {
|
165
|
-
os->m->flush_i(os, buf, len);
|
166
|
-
os->buf.start += len;
|
160
|
+
if (len < (FRT_BUFFER_SIZE - os->buf.pos)) {
|
161
|
+
memcpy(os->buf.buf + os->buf.pos, buf, len);
|
162
|
+
os->buf.pos += len;
|
167
163
|
}
|
168
164
|
else {
|
165
|
+
frt_os_flush(os);
|
169
166
|
int pos = 0;
|
170
167
|
int size;
|
171
168
|
while (pos < len) {
|
@@ -810,7 +810,7 @@ void Init_Test(void) {
|
|
810
810
|
rb_define_singleton_method(mTest, "filter", frb_ts_filter, 0);
|
811
811
|
rb_define_singleton_method(mTest, "fs_store", frb_ts_fs_store, 0);
|
812
812
|
rb_define_singleton_method(mTest, "global", frb_ts_global, 0);
|
813
|
-
rb_define_singleton_method(mTest, "
|
813
|
+
rb_define_singleton_method(mTest, "test_hash", frb_ts_hash, 0);
|
814
814
|
rb_define_singleton_method(mTest, "hashset", frb_ts_hashset, 0);
|
815
815
|
rb_define_singleton_method(mTest, "helper", frb_ts_helper, 0);
|
816
816
|
rb_define_singleton_method(mTest, "highlighter", frb_ts_highlighter, 0);
|
@@ -48,7 +48,7 @@ void test_compound_writer(TestCase *tc, void *data)
|
|
48
48
|
cw = frt_open_cw(store, (char *)"cfile");
|
49
49
|
frt_cw_add_file(cw, (char *)"file1");
|
50
50
|
frt_cw_add_file(cw, (char *)"file2");
|
51
|
-
frt_cw_close(cw);
|
51
|
+
frt_cw_close(cw, NULL);
|
52
52
|
|
53
53
|
is = store->open_input(store, "cfile");
|
54
54
|
Aiequal(2, frt_is_read_vint(is));
|
@@ -94,7 +94,7 @@ void test_compound_io(TestCase *tc, void *data)
|
|
94
94
|
frt_cw_add_file(cw, (char *)"file1");
|
95
95
|
frt_cw_add_file(cw, (char *)"file2");
|
96
96
|
frt_cw_add_file(cw, (char *)"file3");
|
97
|
-
frt_cw_close(cw);
|
97
|
+
frt_cw_close(cw, NULL);
|
98
98
|
|
99
99
|
c_reader = frt_open_cmpd_store(store, "cfile");
|
100
100
|
is1 = c_reader->open_input(c_reader, "file1");
|
@@ -137,7 +137,7 @@ void test_compound_io_many_files(TestCase *tc, void *data)
|
|
137
137
|
frt_os_write_vint(os, MAGIC);
|
138
138
|
frt_os_close(os);
|
139
139
|
}
|
140
|
-
frt_cw_close(cw);
|
140
|
+
frt_cw_close(cw, NULL);
|
141
141
|
|
142
142
|
c_reader = frt_open_cmpd_store(store, "_.cfs");
|
143
143
|
for (i = 0; i < TEST_FILE_CNT; i++) {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isomorfeus-ferret
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Biedermann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|