isomorfeus-ferret 0.13.4 → 0.13.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -32
- data/ext/isomorfeus_ferret_ext/benchmark.c +2 -2
- data/ext/isomorfeus_ferret_ext/frb_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_compound_io.c +4 -1
- data/ext/isomorfeus_ferret_ext/frt_except.c +0 -1
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -6
- data/ext/isomorfeus_ferret_ext/frt_global.h +1 -8
- data/ext/isomorfeus_ferret_ext/frt_index.c +27 -60
- data/ext/isomorfeus_ferret_ext/frt_index.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_lang.c +8 -1
- data/ext/isomorfeus_ferret_ext/frt_store.c +5 -8
- data/ext/isomorfeus_ferret_ext/frt_store.h +1 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +2 -2
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +1 -2
- data/ext/isomorfeus_ferret_ext/test.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_hash.c +1 -0
- data/ext/isomorfeus_ferret_ext/test_threading.c +1 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +2 -5
- data/ext/isomorfeus_ferret_ext/frb_lang.c +0 -9
- data/ext/isomorfeus_ferret_ext/frb_lang.h +0 -17
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 479f3df28144a3ca25194afc36590058e027af38ba43cc42d24ffd83a35a6da7
|
4
|
+
data.tar.gz: 95c80530e4012e0f7219e3b60cc82748f4e6d1824f0958cfda6cd4735c92a82a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a72ddf5c974896ac0e5d11cdab105c02894f12ffac43db12b7651393f0bfd5ebd5f99f4e03e7714a8d230c6e77f71a1f5ee1c243b01608b23555a170eabe5c9
|
7
|
+
data.tar.gz: 58a803a8de3afae0b2bdf6e0eeda41f6a2e29dc649d8e219d2d7d5c667bd27bea3fa8dd80f16af934456e9414cd0f6a2ec9b998d303593db94f43de208a3eaff
|
data/README.md
CHANGED
@@ -63,14 +63,8 @@ fis.add_field(:compressed_field, :store => :yes, :compression => :brotli, :term_
|
|
63
63
|
|
64
64
|
### Performance
|
65
65
|
|
66
|
-
|
67
|
-
On Windows
|
68
|
-
|
69
|
-
Search performance is still excellent and multiple times faster than Lucene.
|
70
|
-
|
71
|
-
Lucene achieves roughly double the indexing performance. This seems to be because of the different way strings and
|
72
|
-
encodings are handled in Java. For example, the Java WhitespaceTokenizer code requires only one method call per character (check for whitespace), but for Ruby, to support all the different encodings, several method calls are required per character (retrieve character according to encoding, check character for whitespace).
|
73
|
-
Ferret is internally using the standard Ruby string encoding methods.
|
66
|
+
For version 0.13.7 the performance bottle-nack has been identified and removed, ferret now delivers excellent indexing perfomance on all platforms, see numbers below.
|
67
|
+
On Windows performance is still not as good as on Linux, but that is equally true for Lucene and because of how the Windows filesystem works.
|
74
68
|
|
75
69
|
## Documentation
|
76
70
|
|
@@ -105,43 +99,35 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
|
|
105
99
|
|
106
100
|
A recent Java JDK must be installed to compile and run lucene benchmarks.
|
107
101
|
|
108
|
-
Results on
|
109
|
-
|
110
|
-
Ferret
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
Searching took: 1.64s for 8000 queries
|
120
|
-
thats 4875 q/s
|
121
|
-
Total found: 41000
|
122
|
-
index size: 35Mb
|
123
|
-
|
124
|
-
JVM 11.0.14.1 (Ubuntu)
|
125
|
-
```
|
102
|
+
Results, Ferret 0.13.7 vs. Lucene 9.1.0, WhitespaceAnalyzer, on old Intel Core i5 from 2015:
|
103
|
+
|
104
|
+
| OS | Task | Ferret | Lucene* |
|
105
|
+
|---------|------------|-----------------|----------------|
|
106
|
+
| Linux | Indexing | 4905 docs/s | 4785 docs/s |
|
107
|
+
| Windows | Indexing | 2361 docs/s | 2395 docs/s |
|
108
|
+
| Linux | Searching | 25664 queries/s | 4708 queries/s |
|
109
|
+
| Windows | Searching | 3646 queries/s | 935 queries/s |
|
110
|
+
| | Index Size | 28 MB | 35 MB |
|
111
|
+
|
112
|
+
*Lucene 9.1.0 on JVM 11.0.14.1 (Ubuntu)
|
126
113
|
|
127
114
|
### Storing Fields with Compression, Indexing and Retrieval
|
128
115
|
- clone repo
|
129
116
|
- bundle install
|
130
117
|
- rake ferret_compression_benchmark
|
131
118
|
|
132
|
-
Results on Linux, 0.13.
|
119
|
+
Results on Linux, 0.13.7, on old Intel Core i5 from 2015:
|
133
120
|
|
134
121
|
| Compression | Index & Store | Retrieve | Index size |
|
135
122
|
|-------------|---------------|---------------|------------|
|
136
|
-
| none |
|
137
|
-
| brotli |
|
138
|
-
| bzip2 |
|
139
|
-
| lz4 |
|
123
|
+
| none | 4866 docs/s | 153853 docs/s | 43 MB |
|
124
|
+
| brotli | 3539 docs/s | 58315 docs/s | 36 MB |
|
125
|
+
| bzip2 | 2624 docs/s | 15382 docs/s | 38 MB |
|
126
|
+
| lz4 | 4639 docs/s | 127100 docs/s | 41 MB |
|
140
127
|
|
141
128
|
## Future
|
142
129
|
|
143
130
|
Lots of things to do:
|
144
|
-
- Improve indexing performance on Windows (WriteFile is terribly slow, maybe use mapping, see libuv)
|
145
131
|
- Bring documentation in order in a docs directory
|
146
132
|
- Review code (especially for memory/stack issues, typical c issues)
|
147
133
|
- Take care of ruby GVL and threading
|
@@ -117,13 +117,13 @@ static void bm_single_run(BenchMarkUnit *unit, BenchMarkTimes *bm_times)
|
|
117
117
|
struct rusage ru_before, ru_after;
|
118
118
|
|
119
119
|
if (gettimeofday(&tv_before, NULL) == -1)
|
120
|
-
|
120
|
+
rb_raise(rb_eRuntimeError, "gettimeofday failed\n");
|
121
121
|
getrusage(RUSAGE_SELF, &ru_before);
|
122
122
|
|
123
123
|
unit->run();
|
124
124
|
|
125
125
|
if (gettimeofday(&tv_after, NULL) == -1)
|
126
|
-
|
126
|
+
rb_raise(rb_eRuntimeError, "gettimeofday failed\n");
|
127
127
|
getrusage(RUSAGE_SELF, &ru_after);
|
128
128
|
|
129
129
|
bm_times->rtime = TVAL_TO_SEC(tv_before, tv_after);
|
@@ -1271,11 +1271,11 @@ void frb_iw_free(void *p) {
|
|
1271
1271
|
|
1272
1272
|
void frb_iw_mark(void *p) {
|
1273
1273
|
FrtIndexWriter *iw = (FrtIndexWriter *)p;
|
1274
|
-
if (iw->analyzer->ranalyzer)
|
1274
|
+
if (iw->analyzer && iw->analyzer->ranalyzer)
|
1275
1275
|
rb_gc_mark(iw->analyzer->ranalyzer);
|
1276
|
-
if (iw->store->rstore)
|
1276
|
+
if (iw->store && iw->store->rstore)
|
1277
1277
|
rb_gc_mark(iw->store->rstore);
|
1278
|
-
if (iw->fis->rfis)
|
1278
|
+
if (iw->fis && iw->fis->rfis)
|
1279
1279
|
rb_gc_mark(iw->fis->rfis);
|
1280
1280
|
}
|
1281
1281
|
|
@@ -329,7 +329,7 @@ static void cw_copy_file(FrtCompoundWriter *cw, FrtCWFileEntry *src, FrtOutStrea
|
|
329
329
|
frt_is_close(is);
|
330
330
|
}
|
331
331
|
|
332
|
-
void frt_cw_close(FrtCompoundWriter *cw)
|
332
|
+
void frt_cw_close(FrtCompoundWriter *cw, FrtDeleter *dlr)
|
333
333
|
{
|
334
334
|
FrtOutStream *os = NULL;
|
335
335
|
int i;
|
@@ -356,6 +356,9 @@ void frt_cw_close(FrtCompoundWriter *cw)
|
|
356
356
|
for (i = 0; i < frt_ary_size(cw->file_entries); i++) {
|
357
357
|
cw->file_entries[i].data_offset = frt_os_pos(os);
|
358
358
|
cw_copy_file(cw, &cw->file_entries[i], os);
|
359
|
+
if (dlr) {
|
360
|
+
frt_deleter_queue_file(dlr, cw->file_entries[i].name);
|
361
|
+
}
|
359
362
|
}
|
360
363
|
|
361
364
|
/* Write the data offsets into the directory of the compound stream */
|
@@ -272,15 +272,13 @@ static const struct FrtOutStreamMethods FS_OUT_STREAM_METHODS = {
|
|
272
272
|
static FrtOutStream *fs_new_output(FrtStore *store, const char *filename)
|
273
273
|
{
|
274
274
|
char path[FRT_MAX_FILE_PATH];
|
275
|
-
int fd = open(join_path(path, store->dir.path, filename),
|
276
|
-
O_WRONLY | O_CREAT | O_BINARY, store->file_mode);
|
277
|
-
FrtOutStream *os;
|
275
|
+
int fd = open(join_path(path, store->dir.path, filename), O_WRONLY | O_CREAT | O_BINARY, store->file_mode);
|
278
276
|
if (fd < 0) {
|
279
277
|
FRT_RAISE(FRT_IO_ERROR, "couldn't create OutStream %s: <%s>",
|
280
278
|
path, strerror(errno));
|
281
279
|
}
|
282
280
|
|
283
|
-
os = frt_os_new();
|
281
|
+
FrtOutStream *os = frt_os_new();
|
284
282
|
os->file.fd = fd;
|
285
283
|
os->m = &FS_OUT_STREAM_METHODS;
|
286
284
|
return os;
|
@@ -418,9 +416,7 @@ static void fs_close_lock_i(FrtLock *lock)
|
|
418
416
|
|
419
417
|
static FrtHash *stores = NULL;
|
420
418
|
|
421
|
-
#ifndef UNTHREADED
|
422
419
|
static frt_mutex_t stores_mutex = FRT_MUTEX_INITIALIZER;
|
423
|
-
#endif
|
424
420
|
|
425
421
|
static void fs_close_i(FrtStore *store)
|
426
422
|
{
|
@@ -13,7 +13,7 @@
|
|
13
13
|
|
14
14
|
#define FRT_MAX_WORD_SIZE 255
|
15
15
|
#define FRT_MAX_FILE_PATH 1024
|
16
|
-
#define FRT_BUFFER_SIZE
|
16
|
+
#define FRT_BUFFER_SIZE 16384
|
17
17
|
|
18
18
|
typedef enum {
|
19
19
|
FRT_COMPRESSION_NONE = 0,
|
@@ -266,13 +266,6 @@ extern FILE *frt_x_exception_stream;
|
|
266
266
|
#define EXCEPTION 2
|
267
267
|
#define EXCEPTION_STREAM (frt_x_exception_stream ? frt_x_exception_stream : stderr)
|
268
268
|
|
269
|
-
#ifdef DEBUG
|
270
|
-
extern bool frt_x_do_logging;
|
271
|
-
#define xlog if (frt_x_do_logging) printf
|
272
|
-
#else
|
273
|
-
#define xlog()
|
274
|
-
#endif
|
275
|
-
|
276
269
|
extern void frt_init(int arc, const char *const argv[]);
|
277
270
|
extern void frt_micro_sleep(const int micro_seconds);
|
278
271
|
|
@@ -681,15 +681,13 @@ static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num)
|
|
681
681
|
if (field_num >= si->norm_gens_size
|
682
682
|
|| 0 > (norm_gen = si->norm_gens[field_num])) {
|
683
683
|
return NULL;
|
684
|
-
}
|
685
|
-
else {
|
684
|
+
} else {
|
686
685
|
const char *ext = (si->use_compound_file && norm_gen > 0) ? "s" : "f";
|
687
686
|
return fn_for_gen_field(buf, si->name, ext, norm_gen, field_num);
|
688
687
|
}
|
689
688
|
}
|
690
689
|
|
691
|
-
|
692
|
-
#define DEL(file_name) deleter_queue_file(dlr, file_name)
|
690
|
+
void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name);
|
693
691
|
|
694
692
|
static void si_delete_files(FrtSegmentInfo *si, FrtFieldInfos *fis, FrtDeleter *dlr)
|
695
693
|
{
|
@@ -700,7 +698,7 @@ static void si_delete_files(FrtSegmentInfo *si, FrtFieldInfos *fis, FrtDeleter *
|
|
700
698
|
|
701
699
|
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
702
700
|
if (0 <= si->norm_gens[i]) {
|
703
|
-
|
701
|
+
frt_deleter_queue_file(dlr, si_norm_file_name(si, file_name, fis->fields[i]->number));
|
704
702
|
}
|
705
703
|
}
|
706
704
|
|
@@ -710,15 +708,14 @@ static void si_delete_files(FrtSegmentInfo *si, FrtFieldInfos *fis, FrtDeleter *
|
|
710
708
|
|
711
709
|
if (si->use_compound_file) {
|
712
710
|
memcpy(ext, "cfs", 4);
|
713
|
-
|
711
|
+
frt_deleter_queue_file(dlr, file_name);
|
714
712
|
if (0 <= si->del_gen) {
|
715
|
-
|
713
|
+
frt_deleter_queue_file(dlr, frt_fn_for_generation(file_name, si->name, "del", si->del_gen));
|
716
714
|
}
|
717
|
-
}
|
718
|
-
else {
|
715
|
+
} else {
|
719
716
|
for (i = FRT_NELEMS(INDEX_EXTENSIONS) - 1; i >= 0; i--) {
|
720
717
|
memcpy(ext, INDEX_EXTENSIONS[i], 4);
|
721
|
-
|
718
|
+
frt_deleter_queue_file(dlr, file_name);
|
722
719
|
}
|
723
720
|
}
|
724
721
|
}
|
@@ -3695,8 +3692,7 @@ static bool file_name_filter_is_cfs_file(const char *file_name) {
|
|
3695
3692
|
****************************************************************************/
|
3696
3693
|
|
3697
3694
|
#define DELETABLE_START_CAPA 8
|
3698
|
-
FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store)
|
3699
|
-
{
|
3695
|
+
FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store) {
|
3700
3696
|
FrtDeleter *dlr = FRT_ALLOC(FrtDeleter);
|
3701
3697
|
dlr->sis = sis;
|
3702
3698
|
dlr->store = store;
|
@@ -3704,19 +3700,16 @@ FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store)
|
|
3704
3700
|
return dlr;
|
3705
3701
|
}
|
3706
3702
|
|
3707
|
-
void frt_deleter_destroy(FrtDeleter *dlr)
|
3708
|
-
{
|
3703
|
+
void frt_deleter_destroy(FrtDeleter *dlr) {
|
3709
3704
|
frt_hs_destroy(dlr->pending);
|
3710
3705
|
free(dlr);
|
3711
3706
|
}
|
3712
3707
|
|
3713
|
-
|
3714
|
-
{
|
3708
|
+
void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name) {
|
3715
3709
|
frt_hs_add(dlr->pending, frt_estrdup(file_name));
|
3716
3710
|
}
|
3717
3711
|
|
3718
|
-
void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name)
|
3719
|
-
{
|
3712
|
+
void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name) {
|
3720
3713
|
FrtStore *store = dlr->store;
|
3721
3714
|
FRT_TRY
|
3722
3715
|
if (store->exists(store, file_name)) {
|
@@ -3728,8 +3721,7 @@ void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name)
|
|
3728
3721
|
FRT_XENDTRY
|
3729
3722
|
}
|
3730
3723
|
|
3731
|
-
static void deleter_commit_pending_deletions(FrtDeleter *dlr)
|
3732
|
-
{
|
3724
|
+
static void deleter_commit_pending_deletions(FrtDeleter *dlr) {
|
3733
3725
|
FrtHashSetEntry *hse, *hse_next = dlr->pending->first;
|
3734
3726
|
while ((hse = hse_next) != NULL) {
|
3735
3727
|
hse_next = hse->next;
|
@@ -3737,23 +3729,13 @@ static void deleter_commit_pending_deletions(FrtDeleter *dlr)
|
|
3737
3729
|
}
|
3738
3730
|
}
|
3739
3731
|
|
3740
|
-
void frt_deleter_delete_files(FrtDeleter *dlr, char **files, int file_cnt)
|
3741
|
-
{
|
3742
|
-
int i;
|
3743
|
-
for (i = file_cnt - 1; i >= 0; i--) {
|
3744
|
-
deleter_queue_file(dlr, files[i]);
|
3745
|
-
}
|
3746
|
-
deleter_commit_pending_deletions(dlr);
|
3747
|
-
}
|
3748
|
-
|
3749
3732
|
struct DelFilesArg {
|
3750
3733
|
char curr_seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
3751
3734
|
FrtDeleter *dlr;
|
3752
3735
|
FrtHash *current;
|
3753
3736
|
};
|
3754
3737
|
|
3755
|
-
static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
|
3756
|
-
{
|
3738
|
+
static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg) {
|
3757
3739
|
struct DelFilesArg *dfa = (struct DelFilesArg *)arg;
|
3758
3740
|
FrtDeleter *dlr = dfa->dlr;
|
3759
3741
|
|
@@ -3773,8 +3755,7 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
|
|
3773
3755
|
if (NULL != p) {
|
3774
3756
|
*p = '\0';
|
3775
3757
|
extension = p + 1;
|
3776
|
-
}
|
3777
|
-
else {
|
3758
|
+
} else {
|
3778
3759
|
extension = NULL;
|
3779
3760
|
}
|
3780
3761
|
|
@@ -3789,16 +3770,14 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
|
|
3789
3770
|
if (NULL == (si = (FrtSegmentInfo *)frt_h_get(dfa->current, segment_name))) {
|
3790
3771
|
/* Delete if segment is not referenced: */
|
3791
3772
|
do_delete = true;
|
3792
|
-
}
|
3793
|
-
else {
|
3773
|
+
} else {
|
3794
3774
|
char tmp_fn[FRT_SEGMENT_NAME_MAX_LENGTH];
|
3795
3775
|
/* OK, segment is referenced, but file may still be orphan'd: */
|
3796
3776
|
if (file_name_filter_is_cfs_file(file_name)
|
3797
3777
|
&& si->use_compound_file) {
|
3798
3778
|
/* This file is stored in a CFS file for this segment: */
|
3799
3779
|
do_delete = true;
|
3800
|
-
}
|
3801
|
-
else if (0 == strcmp("del", extension)) {
|
3780
|
+
} else if (0 == strcmp("del", extension)) {
|
3802
3781
|
/* This is a _segmentName_N.del file: */
|
3803
3782
|
if (!frt_fn_for_generation(tmp_fn, segment_name, "del", si->del_gen)
|
3804
3783
|
|| 0 != strcmp(file_name, tmp_fn)) {
|
@@ -3807,8 +3786,7 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
|
|
3807
3786
|
* this segment, then delete it: */
|
3808
3787
|
do_delete = true;
|
3809
3788
|
}
|
3810
|
-
}
|
3811
|
-
else if (NULL != extension
|
3789
|
+
} else if (NULL != extension
|
3812
3790
|
&& ('s' == *extension || 'f' == *extension)
|
3813
3791
|
&& isdigit(extension[1])) {
|
3814
3792
|
si_norm_file_name(si, tmp_fn, atoi(extension + 1));
|
@@ -3817,15 +3795,14 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
|
|
3817
3795
|
/* This is an orphan'd norms file: */
|
3818
3796
|
do_delete = true;
|
3819
3797
|
}
|
3820
|
-
}
|
3821
|
-
else if (0 == strcmp("cfs", extension) && !si->use_compound_file) {
|
3798
|
+
} else if (0 == strcmp("cfs", extension) && !si->use_compound_file) {
|
3822
3799
|
/* This is a partially written _segmentName.cfs: */
|
3823
3800
|
do_delete = true;
|
3824
3801
|
}
|
3825
3802
|
}
|
3826
3803
|
|
3827
3804
|
if (do_delete) {
|
3828
|
-
|
3805
|
+
frt_deleter_queue_file(dlr, file_name);
|
3829
3806
|
}
|
3830
3807
|
}
|
3831
3808
|
}
|
@@ -3837,8 +3814,7 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
|
|
3837
3814
|
* create the unused file (eg when merging segments), and we only remove from
|
3838
3815
|
* deletable when a file is successfully deleted.
|
3839
3816
|
*/
|
3840
|
-
void frt_deleter_find_deletable_files(FrtDeleter *dlr)
|
3841
|
-
{
|
3817
|
+
void frt_deleter_find_deletable_files(FrtDeleter *dlr) {
|
3842
3818
|
/* Gather all "current" segments: */
|
3843
3819
|
int i;
|
3844
3820
|
FrtSegmentInfos *sis = dlr->sis;
|
@@ -4207,7 +4183,7 @@ static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
|
|
4207
4183
|
const int field_num = norm->field_num;
|
4208
4184
|
|
4209
4185
|
if (si_norm_file_name(si, norm_file_name, field_num)) {
|
4210
|
-
|
4186
|
+
frt_deleter_queue_file(dlr, norm_file_name);
|
4211
4187
|
}
|
4212
4188
|
frt_si_advance_norm_gen(si, field_num);
|
4213
4189
|
si_norm_file_name(si, norm_file_name, field_num);
|
@@ -4363,7 +4339,7 @@ static void sr_commit_i(FrtIndexReader *ir)
|
|
4363
4339
|
if (SR(ir)->undelete_all || SR(ir)->deleted_docs_dirty) {
|
4364
4340
|
if (si->del_gen >= 0) {
|
4365
4341
|
frt_fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
|
4366
|
-
|
4342
|
+
frt_deleter_queue_file(ir->deleter, tmp_file_name);
|
4367
4343
|
}
|
4368
4344
|
if (SR(ir)->undelete_all) {
|
4369
4345
|
si->del_gen = -1;
|
@@ -5687,7 +5663,6 @@ static SegmentMergeInfo *smi_new(int base, FrtStore *store, FrtSegmentInfo *si)
|
|
5687
5663
|
smi->store = frt_open_cmpd_store(store, file_name);
|
5688
5664
|
}
|
5689
5665
|
|
5690
|
-
|
5691
5666
|
sprintf(file_name, "%s.fdx", segment);
|
5692
5667
|
smi->doc_cnt = smi->max_doc
|
5693
5668
|
= smi->store->length(smi->store, file_name) / FIELDS_IDX_PTR_SIZE;
|
@@ -6116,14 +6091,7 @@ int frt_iw_doc_count(FrtIndexWriter *iw)
|
|
6116
6091
|
return doc_cnt;
|
6117
6092
|
}
|
6118
6093
|
|
6119
|
-
|
6120
|
-
deleter_queue_file(dlr, file_name);\
|
6121
|
-
frt_cw_add_file(cw, file_name)
|
6122
|
-
|
6123
|
-
static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis,
|
6124
|
-
FrtSegmentInfo *si, char *cfs_file_name,
|
6125
|
-
FrtDeleter *dlr)
|
6126
|
-
{
|
6094
|
+
static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis, FrtSegmentInfo *si, char *cfs_file_name, FrtDeleter *dlr) {
|
6127
6095
|
int i;
|
6128
6096
|
FrtCompoundWriter *cw;
|
6129
6097
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
@@ -6137,19 +6105,18 @@ static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis,
|
|
6137
6105
|
cw = frt_open_cw(store, cfs_file_name);
|
6138
6106
|
for (i = 0; i < FRT_NELEMS(COMPOUND_EXTENSIONS); i++) {
|
6139
6107
|
memcpy(ext, COMPOUND_EXTENSIONS[i], 4);
|
6140
|
-
|
6108
|
+
frt_cw_add_file(cw, file_name);
|
6141
6109
|
}
|
6142
6110
|
|
6143
6111
|
/* Field norm file_names */
|
6144
6112
|
for (i = fis->size - 1; i >= 0; i--) {
|
6145
|
-
if (fi_has_norms(fis->fields[i])
|
6146
|
-
|
6147
|
-
MOVE_TO_COMPOUND_DIR(file_name);
|
6113
|
+
if (fi_has_norms(fis->fields[i]) && si_norm_file_name(si, file_name, i)) {
|
6114
|
+
frt_cw_add_file(cw, file_name);
|
6148
6115
|
}
|
6149
6116
|
}
|
6150
6117
|
|
6151
6118
|
/* Perform the merge */
|
6152
|
-
frt_cw_close(cw);
|
6119
|
+
frt_cw_close(cw, dlr);
|
6153
6120
|
}
|
6154
6121
|
|
6155
6122
|
static void iw_commit_compound_file(FrtIndexWriter *iw, FrtSegmentInfo *si)
|
@@ -627,9 +627,9 @@ struct FrtDeleter {
|
|
627
627
|
|
628
628
|
extern FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store);
|
629
629
|
extern void frt_deleter_destroy(FrtDeleter *dlr);
|
630
|
+
extern void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name);
|
630
631
|
extern void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name);
|
631
632
|
extern void frt_deleter_find_deletable_files(FrtDeleter *dlr);
|
632
|
-
extern void frt_deleter_delete_files(FrtDeleter *dlr, char **files, int file_cnt);
|
633
633
|
|
634
634
|
/****************************************************************************
|
635
635
|
*
|
@@ -859,6 +859,6 @@ typedef struct FrtCompoundWriter {
|
|
859
859
|
|
860
860
|
extern FrtCompoundWriter *frt_open_cw(FrtStore *store, char *name);
|
861
861
|
extern void frt_cw_add_file(FrtCompoundWriter *cw, char *id);
|
862
|
-
extern void frt_cw_close(FrtCompoundWriter *cw);
|
862
|
+
extern void frt_cw_close(FrtCompoundWriter *cw, FrtDeleter *dlr);
|
863
863
|
|
864
864
|
#endif
|
@@ -43,7 +43,14 @@ void *frt_erealloc(void *ptr, size_t size)
|
|
43
43
|
return p;
|
44
44
|
}
|
45
45
|
|
46
|
-
|
46
|
+
|
47
|
+
struct timeval rb_time_interval _((VALUE));
|
48
|
+
|
49
|
+
extern void frt_micro_sleep(const int micro_seconds) {
|
50
|
+
rb_thread_wait_for(rb_time_interval(rb_float_new((double)micro_seconds/1000000.0)));
|
51
|
+
}
|
52
|
+
|
53
|
+
/* void frt_micro_sleep(const int micro_seconds)
|
47
54
|
{
|
48
55
|
#if (defined POSH_OS_WIN32 || defined POSH_OS_WIN64) && !defined __MINGW32__
|
49
56
|
Sleep(micro_seconds / 1000);
|
@@ -149,7 +149,7 @@ void frt_os_seek(FrtOutStream *os, off_t new_pos)
|
|
149
149
|
*/
|
150
150
|
void frt_os_write_byte(FrtOutStream *os, frt_uchar b)
|
151
151
|
{
|
152
|
-
if (os->buf.pos >= FRT_BUFFER_SIZE) {
|
152
|
+
if (os->buf.pos >= (FRT_BUFFER_SIZE - 1)) {
|
153
153
|
frt_os_flush(os);
|
154
154
|
}
|
155
155
|
write_byte(os, b);
|
@@ -157,15 +157,12 @@ void frt_os_write_byte(FrtOutStream *os, frt_uchar b)
|
|
157
157
|
|
158
158
|
void frt_os_write_bytes(FrtOutStream *os, const frt_uchar *buf, int len)
|
159
159
|
{
|
160
|
-
if (os->buf.pos
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
if (len < FRT_BUFFER_SIZE) {
|
165
|
-
os->m->flush_i(os, buf, len);
|
166
|
-
os->buf.start += len;
|
160
|
+
if (len < (FRT_BUFFER_SIZE - os->buf.pos)) {
|
161
|
+
memcpy(os->buf.buf + os->buf.pos, buf, len);
|
162
|
+
os->buf.pos += len;
|
167
163
|
}
|
168
164
|
else {
|
165
|
+
frt_os_flush(os);
|
169
166
|
int pos = 0;
|
170
167
|
int size;
|
171
168
|
while (pos < len) {
|
@@ -810,7 +810,7 @@ void Init_Test(void) {
|
|
810
810
|
rb_define_singleton_method(mTest, "filter", frb_ts_filter, 0);
|
811
811
|
rb_define_singleton_method(mTest, "fs_store", frb_ts_fs_store, 0);
|
812
812
|
rb_define_singleton_method(mTest, "global", frb_ts_global, 0);
|
813
|
-
rb_define_singleton_method(mTest, "
|
813
|
+
rb_define_singleton_method(mTest, "test_hash", frb_ts_hash, 0);
|
814
814
|
rb_define_singleton_method(mTest, "hashset", frb_ts_hashset, 0);
|
815
815
|
rb_define_singleton_method(mTest, "helper", frb_ts_helper, 0);
|
816
816
|
rb_define_singleton_method(mTest, "highlighter", frb_ts_highlighter, 0);
|
@@ -48,7 +48,7 @@ void test_compound_writer(TestCase *tc, void *data)
|
|
48
48
|
cw = frt_open_cw(store, (char *)"cfile");
|
49
49
|
frt_cw_add_file(cw, (char *)"file1");
|
50
50
|
frt_cw_add_file(cw, (char *)"file2");
|
51
|
-
frt_cw_close(cw);
|
51
|
+
frt_cw_close(cw, NULL);
|
52
52
|
|
53
53
|
is = store->open_input(store, "cfile");
|
54
54
|
Aiequal(2, frt_is_read_vint(is));
|
@@ -94,7 +94,7 @@ void test_compound_io(TestCase *tc, void *data)
|
|
94
94
|
frt_cw_add_file(cw, (char *)"file1");
|
95
95
|
frt_cw_add_file(cw, (char *)"file2");
|
96
96
|
frt_cw_add_file(cw, (char *)"file3");
|
97
|
-
frt_cw_close(cw);
|
97
|
+
frt_cw_close(cw, NULL);
|
98
98
|
|
99
99
|
c_reader = frt_open_cmpd_store(store, "cfile");
|
100
100
|
is1 = c_reader->open_input(c_reader, "file1");
|
@@ -137,7 +137,7 @@ void test_compound_io_many_files(TestCase *tc, void *data)
|
|
137
137
|
frt_os_write_vint(os, MAGIC);
|
138
138
|
frt_os_close(os);
|
139
139
|
}
|
140
|
-
frt_cw_close(cw);
|
140
|
+
frt_cw_close(cw, NULL);
|
141
141
|
|
142
142
|
c_reader = frt_open_cmpd_store(store, "_.cfs");
|
143
143
|
for (i = 0; i < TEST_FILE_CNT; i++) {
|
@@ -175,7 +175,7 @@ TestSuite *ts_threading(TestSuite *suite)
|
|
175
175
|
|
176
176
|
tst_run_test(suite, test_number_to_str, NULL);
|
177
177
|
tst_run_test(suite, test_threading_test, index);
|
178
|
-
|
178
|
+
tst_run_test(suite, test_threading, index);
|
179
179
|
|
180
180
|
frt_index_destroy(index);
|
181
181
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isomorfeus-ferret
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Biedermann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -167,12 +167,9 @@ files:
|
|
167
167
|
- ext/isomorfeus_ferret_ext/fio_tmpfile.h
|
168
168
|
- ext/isomorfeus_ferret_ext/frb_analysis.c
|
169
169
|
- ext/isomorfeus_ferret_ext/frb_index.c
|
170
|
-
- ext/isomorfeus_ferret_ext/frb_lang.c
|
171
|
-
- ext/isomorfeus_ferret_ext/frb_lang.h
|
172
170
|
- ext/isomorfeus_ferret_ext/frb_qparser.c
|
173
171
|
- ext/isomorfeus_ferret_ext/frb_search.c
|
174
172
|
- ext/isomorfeus_ferret_ext/frb_store.c
|
175
|
-
- ext/isomorfeus_ferret_ext/frb_threading.h
|
176
173
|
- ext/isomorfeus_ferret_ext/frb_utils.c
|
177
174
|
- ext/isomorfeus_ferret_ext/frt_analysis.c
|
178
175
|
- ext/isomorfeus_ferret_ext/frt_analysis.h
|
@@ -1,17 +0,0 @@
|
|
1
|
-
#ifndef FRT_LANG_H
|
2
|
-
#define FRT_LANG_H
|
3
|
-
|
4
|
-
#define RUBY_BINDINGS 1
|
5
|
-
|
6
|
-
#include <stdarg.h>
|
7
|
-
#include <ruby.h>
|
8
|
-
|
9
|
-
#undef close
|
10
|
-
#undef rename
|
11
|
-
#undef read
|
12
|
-
|
13
|
-
#define frt_emalloc xmalloc
|
14
|
-
#define frt_ecalloc(n) xcalloc(n, 1)
|
15
|
-
#define frt_erealloc xrealloc
|
16
|
-
|
17
|
-
#endif
|
@@ -1,29 +0,0 @@
|
|
1
|
-
#ifndef FRT_THREADING_H
|
2
|
-
#define FRT_THREADING_H
|
3
|
-
|
4
|
-
#include "frt_hash.h"
|
5
|
-
#define UNTHREADED 1
|
6
|
-
|
7
|
-
typedef void * frt_mutex_t;
|
8
|
-
typedef struct FrtHash *frt_thread_key_t;
|
9
|
-
typedef int frt_thread_once_t;
|
10
|
-
#define FRT_MUTEX_INITIALIZER NULL
|
11
|
-
#define FRT_THREAD_ONCE_INIT 1;
|
12
|
-
#define frt_mutex_init(a, b)
|
13
|
-
#define frt_mutex_lock(a)
|
14
|
-
#define frt_mutex_trylock(a)
|
15
|
-
#define frt_mutex_unlock(a)
|
16
|
-
#define frt_mutex_destroy(a)
|
17
|
-
#define frt_thread_key_create(a, b) frb_thread_key_create(a, b)
|
18
|
-
#define frt_thread_key_delete(a) frb_thread_key_delete(a)
|
19
|
-
#define frt_thread_setspecific(a, b) frb_thread_setspecific(a, b)
|
20
|
-
#define frt_thread_getspecific(a) frb_thread_getspecific(a)
|
21
|
-
#define frt_thread_once(a, b) frb_thread_once(a, b)
|
22
|
-
|
23
|
-
void frb_thread_once(int *once_control, void (*init_routine)(void));
|
24
|
-
void frb_thread_key_create(frt_thread_key_t *key, frt_free_ft destroy);
|
25
|
-
void frb_thread_key_delete(frt_thread_key_t key);
|
26
|
-
void frb_thread_setspecific(frt_thread_key_t key, const void *pointer);
|
27
|
-
void *frb_thread_getspecific(frt_thread_key_t key);
|
28
|
-
|
29
|
-
#endif
|