isomorfeus-ferret 0.13.7 → 0.13.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -4
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +7 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +2 -1
- data/ext/isomorfeus_ferret_ext/frb_search.c +22 -15
- data/ext/isomorfeus_ferret_ext/frb_store.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_ind.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +7 -4
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_search.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_sort.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_filter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_index.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_q_span.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_search.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_sort.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_term.c +1 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e6a7a9c17cc344699bee0c8116188f86da73e11fa2a4eed02eea1c4647b6a99
|
4
|
+
data.tar.gz: c94c9a2735b010c02c8d5a793057b94abf8950117153415a731347c21fadf66e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 549a2d3b09dc535278f6c9735f55a47203166df046e5a75ff55589085336bcf2cb7c25cdf491c07eca68da25682316c4bb15e5079f09df95f8a494558fe22260
|
7
|
+
data.tar.gz: e188f428fd881a0285ece69807f13c07dfd1eee131edcad3e38a713ffa820364992bb9068d5f94f567bd7769c7882441ce8eb67199b95b491528fecdd34503f4
|
data/README.md
CHANGED
@@ -11,12 +11,12 @@ At the [Isomorfeus Framework Project](https://isomorfeus.com)
|
|
11
11
|
|
12
12
|
## About this project
|
13
13
|
|
14
|
-
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain.
|
14
|
+
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain, [https://github.com/dbalmain/ferret](https://github.com/dbalmain/ferret).
|
15
15
|
During revival many things havbe been fixed, now all tests pass, no crashes and it
|
16
16
|
successfully compiles and runs with rubys >3. Its no longer a goal to have
|
17
17
|
a c library available, but instead the usage is meant as ruby gem with a c extension only.
|
18
18
|
|
19
|
-
It
|
19
|
+
It works on *nixes, *nuxes, *BSDs and also works on Windows and RaspberryPi.
|
20
20
|
|
21
21
|
## Improvements and Changes in Version 0.13
|
22
22
|
|
@@ -63,7 +63,7 @@ fis.add_field(:compressed_field, :store => :yes, :compression => :brotli, :term_
|
|
63
63
|
|
64
64
|
### Performance
|
65
65
|
|
66
|
-
For version 0.13.7 the performance bottle
|
66
|
+
For version 0.13.7 the performance bottle neck has been identified and removed, ferret now delivers excellent indexing perfomance on all platforms, see numbers below.
|
67
67
|
On Windows performance is still not as good as on Linux, but that is equally true for Lucene and because of how the Windows filesystem works.
|
68
68
|
|
69
69
|
## Documentation
|
@@ -99,14 +99,20 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
|
|
99
99
|
|
100
100
|
A recent Java JDK must be installed to compile and run lucene benchmarks.
|
101
101
|
|
102
|
-
Results, Ferret 0.13.7 vs. Lucene 9.1.0, WhitespaceAnalyzer,
|
102
|
+
Results, Ferret 0.13.7 vs. Lucene 9.1.0, WhitespaceAnalyzer,
|
103
|
+
Linux Ubuntu 20.04, FreeBSD 13.0 and Windows 10 on old Intel Core i5 from 2015,
|
104
|
+
LinuxPi on RaspberryPi 400:
|
103
105
|
|
104
106
|
| OS | Task | Ferret | Lucene* |
|
105
107
|
|---------|------------|-----------------|----------------|
|
106
108
|
| Linux | Indexing | 4905 docs/s | 4785 docs/s |
|
109
|
+
| FreeBSD | Indexing | 4516 docs/s | - |
|
107
110
|
| Windows | Indexing | 2361 docs/s | 2395 docs/s |
|
111
|
+
| LinuxPi | Indexing | 1161 docs/s | 707 docs/s |
|
108
112
|
| Linux | Searching | 25664 queries/s | 4708 queries/s |
|
113
|
+
| FreeBSD | Searching | 25073 queries/s | - |
|
109
114
|
| Windows | Searching | 3646 queries/s | 935 queries/s |
|
115
|
+
| LinuxPi | Searching | 5768 queries/s | 680 queries/s |
|
110
116
|
| | Index Size | 28 MB | 35 MB |
|
111
117
|
|
112
118
|
*Lucene 9.1.0 on JVM 11.0.14.1 (Ubuntu)
|
@@ -458,7 +458,7 @@ static VALUE frb_ts_next(VALUE self) {
|
|
458
458
|
|
459
459
|
static void frb_tf_mark(void *p) {
|
460
460
|
FrtTokenStream *ts = (FrtTokenStream *)p;
|
461
|
-
if (TkFilt(ts)->sub_ts->rts)
|
461
|
+
if (TkFilt(ts)->sub_ts && TkFilt(ts)->sub_ts->rts)
|
462
462
|
rb_gc_mark(TkFilt(ts)->sub_ts->rts);
|
463
463
|
}
|
464
464
|
|
@@ -1331,9 +1331,10 @@ static void frb_h_mark_values_i(void *key, void *value, void *arg) {
|
|
1331
1331
|
}
|
1332
1332
|
|
1333
1333
|
static void frb_pfa_mark(void *p) {
|
1334
|
-
if (PFA(p)->default_a->ranalyzer)
|
1334
|
+
if (PFA(p)->default_a && PFA(p)->default_a->ranalyzer)
|
1335
1335
|
rb_gc_mark(PFA(p)->default_a->ranalyzer);
|
1336
|
-
|
1336
|
+
if (PFA(p)->dict)
|
1337
|
+
frt_h_each(PFA(p)->dict, &frb_h_mark_values_i, NULL);
|
1337
1338
|
}
|
1338
1339
|
|
1339
1340
|
/*** PerFieldAnalyzer ***/
|
@@ -1370,6 +1371,8 @@ const rb_data_type_t frb_per_field_analyzer_t = {
|
|
1370
1371
|
|
1371
1372
|
static VALUE frb_per_field_analyzer_alloc(VALUE rclass) {
|
1372
1373
|
FrtAnalyzer *a = frt_per_field_analyzer_alloc();
|
1374
|
+
PFA(a)->default_a = NULL;
|
1375
|
+
PFA(a)->dict = NULL;
|
1373
1376
|
return TypedData_Wrap_Struct(rclass, &frb_per_field_analyzer_t, a);
|
1374
1377
|
}
|
1375
1378
|
|
@@ -1435,7 +1438,7 @@ static VALUE frb_pfa_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rstri
|
|
1435
1438
|
/*** RegExpAnalyzer ***/
|
1436
1439
|
|
1437
1440
|
static void frb_re_analyzer_mark(void *p) {
|
1438
|
-
if (((FrtAnalyzer *)p)->current_ts->rts)
|
1441
|
+
if (((FrtAnalyzer *)p)->current_ts && ((FrtAnalyzer *)p)->current_ts->rts)
|
1439
1442
|
rb_gc_mark(((FrtAnalyzer *)p)->current_ts->rts);
|
1440
1443
|
}
|
1441
1444
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#include "isomorfeus_ferret.h"
|
3
3
|
#include <ruby.h>
|
4
4
|
|
5
|
-
#undef close
|
5
|
+
// #undef close
|
6
6
|
|
7
7
|
VALUE mIndex;
|
8
8
|
|
@@ -429,6 +429,7 @@ static VALUE frb_get_field_infos(FrtFieldInfos *fis) {
|
|
429
429
|
|
430
430
|
static VALUE frb_fis_alloc(VALUE rclass) {
|
431
431
|
FrtFieldInfos *fis = frt_fis_alloc();
|
432
|
+
fis->size = 0;
|
432
433
|
return TypedData_Wrap_Struct(rclass, &frb_field_infos_t, fis);
|
433
434
|
}
|
434
435
|
|
@@ -674,8 +675,7 @@ frb_fis_get_tk_fields(VALUE self)
|
|
674
675
|
****************************************************************************/
|
675
676
|
|
676
677
|
static void frb_te_free(void *p) {
|
677
|
-
FrtTermEnum *
|
678
|
-
te->close(te);
|
678
|
+
((FrtTermEnum *)p)->close((FrtTermEnum *)p);
|
679
679
|
}
|
680
680
|
|
681
681
|
static size_t frb_te_size(const void *p) {
|
@@ -34,7 +34,7 @@ static void frb_qp_free(void *p) {
|
|
34
34
|
}
|
35
35
|
|
36
36
|
static void frb_qp_mark(void *p) {
|
37
|
-
if (((FrtQParser *)p)->analyzer->ranalyzer)
|
37
|
+
if (((FrtQParser *)p)->analyzer && ((FrtQParser *)p)->analyzer->ranalyzer)
|
38
38
|
rb_gc_mark(((FrtQParser *)p)->analyzer->ranalyzer);
|
39
39
|
}
|
40
40
|
|
@@ -59,6 +59,7 @@ const rb_data_type_t frb_qp_t = {
|
|
59
59
|
|
60
60
|
static VALUE frb_qp_alloc(VALUE rclass) {
|
61
61
|
FrtQParser *qp = frt_qp_alloc();
|
62
|
+
qp->analyzer = NULL;
|
62
63
|
return TypedData_Wrap_Struct(rclass, &frb_qp_t, qp);
|
63
64
|
}
|
64
65
|
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#include "isomorfeus_ferret.h"
|
5
5
|
#include <ruby.h>
|
6
6
|
|
7
|
-
#undef close
|
7
|
+
// #undef close
|
8
8
|
|
9
9
|
VALUE mSearch;
|
10
10
|
|
@@ -713,7 +713,7 @@ static size_t frb_boolean_clause_t_size(const void *p) {
|
|
713
713
|
}
|
714
714
|
|
715
715
|
static void frb_bc_mark(void *p) {
|
716
|
-
if (((FrtBooleanClause *)p)->query->rquery)
|
716
|
+
if (((FrtBooleanClause *)p)->query && ((FrtBooleanClause *)p)->query->rquery)
|
717
717
|
rb_gc_mark(((FrtBooleanClause *)p)->query->rquery);
|
718
718
|
}
|
719
719
|
|
@@ -1837,11 +1837,11 @@ static size_t frb_filtered_query_size(const void *p) {
|
|
1837
1837
|
}
|
1838
1838
|
|
1839
1839
|
static void frb_fqq_mark(void *p) {
|
1840
|
-
FrtFilteredQuery *
|
1841
|
-
if (
|
1842
|
-
rb_gc_mark(
|
1843
|
-
if (
|
1844
|
-
rb_gc_mark(
|
1840
|
+
FrtFilteredQuery *fqq = (FrtFilteredQuery *)p;
|
1841
|
+
if (fqq->query && fqq->query->rquery)
|
1842
|
+
rb_gc_mark(fqq->query->rquery);
|
1843
|
+
if (fqq->filter && fqq->filter->rfilter)
|
1844
|
+
rb_gc_mark(fqq->filter->rfilter);
|
1845
1845
|
}
|
1846
1846
|
|
1847
1847
|
const rb_data_type_t frb_filtered_query_t = {
|
@@ -1860,6 +1860,8 @@ const rb_data_type_t frb_filtered_query_t = {
|
|
1860
1860
|
|
1861
1861
|
static VALUE frb_fqq_alloc(VALUE rclass) {
|
1862
1862
|
FrtQuery *fqq = frt_fq_alloc();
|
1863
|
+
((FrtFilteredQuery *)fqq)->query = NULL;
|
1864
|
+
((FrtFilteredQuery *)fqq)->filter = NULL;
|
1863
1865
|
return TypedData_Wrap_Struct(rclass, &frb_filtered_query_t, fqq);
|
1864
1866
|
}
|
1865
1867
|
|
@@ -2172,6 +2174,7 @@ const rb_data_type_t frb_span_near_query_t = {
|
|
2172
2174
|
|
2173
2175
|
static VALUE frb_spannq_alloc(VALUE rclass) {
|
2174
2176
|
FrtQuery *snq = frt_spannq_alloc();
|
2177
|
+
((FrtSpanNearQuery *)snq)->c_cnt = 0;
|
2175
2178
|
return TypedData_Wrap_Struct(rclass, &frb_span_near_query_t, snq);
|
2176
2179
|
}
|
2177
2180
|
|
@@ -2352,9 +2355,9 @@ static size_t frb_span_not_query_size(const void *p) {
|
|
2352
2355
|
|
2353
2356
|
static void frb_spanxq_mark(void *p) {
|
2354
2357
|
FrtSpanNotQuery *sxq = (FrtSpanNotQuery *)p;
|
2355
|
-
if (sxq->inc->rquery)
|
2358
|
+
if (sxq->inc && sxq->inc->rquery)
|
2356
2359
|
rb_gc_mark(sxq->inc->rquery);
|
2357
|
-
if (sxq->exc->rquery)
|
2360
|
+
if (sxq->exc && sxq->exc->rquery)
|
2358
2361
|
rb_gc_mark(sxq->exc->rquery);
|
2359
2362
|
}
|
2360
2363
|
|
@@ -3557,10 +3560,12 @@ static size_t frb_index_searcher_size(const void *p) {
|
|
3557
3560
|
|
3558
3561
|
static void frb_sea_mark(void *p) {
|
3559
3562
|
FrtIndexSearcher *isea = (FrtIndexSearcher *)p;
|
3560
|
-
if (isea->ir
|
3561
|
-
|
3562
|
-
|
3563
|
-
|
3563
|
+
if (isea->ir) {
|
3564
|
+
if (isea->ir->rir)
|
3565
|
+
rb_gc_mark(isea->ir->rir);
|
3566
|
+
if (isea->ir->store && isea->ir->store->rstore)
|
3567
|
+
rb_gc_mark(isea->ir->store->rstore);
|
3568
|
+
}
|
3564
3569
|
}
|
3565
3570
|
|
3566
3571
|
const rb_data_type_t frb_index_searcher_t = {
|
@@ -3578,8 +3583,9 @@ const rb_data_type_t frb_index_searcher_t = {
|
|
3578
3583
|
};
|
3579
3584
|
|
3580
3585
|
static VALUE frb_sea_alloc(VALUE rclass) {
|
3581
|
-
FrtSearcher *
|
3582
|
-
|
3586
|
+
FrtSearcher *sea = frt_isea_alloc();
|
3587
|
+
((FrtIndexSearcher *)sea)->ir = NULL;
|
3588
|
+
return TypedData_Wrap_Struct(rclass, &frb_index_searcher_t, sea);
|
3583
3589
|
}
|
3584
3590
|
|
3585
3591
|
/*
|
@@ -3665,6 +3671,7 @@ const rb_data_type_t frb_multi_searcher_t = {
|
|
3665
3671
|
|
3666
3672
|
static VALUE frb_ms_alloc(VALUE rclass) {
|
3667
3673
|
FrtSearcher *s = frt_msea_alloc();
|
3674
|
+
((FrtMultiSearcher *)s)->s_cnt = 0;
|
3668
3675
|
return TypedData_Wrap_Struct(rclass, &frb_multi_searcher_t, s);
|
3669
3676
|
}
|
3670
3677
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#include "isomorfeus_ferret.h"
|
3
3
|
#include <time.h>
|
4
4
|
|
5
|
-
#undef rename
|
5
|
+
// #undef rename
|
6
6
|
|
7
7
|
static ID id_ref_cnt;
|
8
8
|
VALUE cLock;
|
@@ -34,7 +34,7 @@ void frb_lock_free(void *p) {
|
|
34
34
|
|
35
35
|
void frb_lock_mark(void *p) {
|
36
36
|
FrtLock *lock = (FrtLock *)p;
|
37
|
-
if (lock->store->rstore)
|
37
|
+
if (lock->store && lock->store->rstore)
|
38
38
|
rb_gc_mark(lock->store->rstore);
|
39
39
|
}
|
40
40
|
|
@@ -11,8 +11,8 @@
|
|
11
11
|
#include "bzlib.h"
|
12
12
|
#include "lz4frame.h"
|
13
13
|
|
14
|
-
#undef close
|
15
|
-
#undef read
|
14
|
+
// #undef close
|
15
|
+
// #undef read
|
16
16
|
|
17
17
|
extern rb_encoding *utf8_encoding;
|
18
18
|
extern void frt_micro_sleep(const int micro_seconds);
|
@@ -3913,9 +3913,10 @@ static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentI
|
|
3913
3913
|
ir->store = store;
|
3914
3914
|
FRT_REF(store);
|
3915
3915
|
}
|
3916
|
-
ir->sis
|
3917
|
-
ir->fis
|
3916
|
+
ir->sis = sis;
|
3917
|
+
ir->fis = fis;
|
3918
3918
|
ir->ref_cnt = 1;
|
3919
|
+
ir->rir = Qnil;
|
3919
3920
|
|
3920
3921
|
ir->is_owner = is_owner;
|
3921
3922
|
if (is_owner) {
|
@@ -6431,6 +6432,8 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
|
|
6431
6432
|
sprintf(file_name, "%s.del", segment);
|
6432
6433
|
del_out = store_out->new_output(store_out, file_name);
|
6433
6434
|
frt_is2os_copy_bytes(del_in, del_out, frt_is_length(del_in));
|
6435
|
+
frt_os_close(del_out);
|
6436
|
+
frt_is_close(del_in);
|
6434
6437
|
}
|
6435
6438
|
|
6436
6439
|
if (map) {
|
@@ -1280,7 +1280,7 @@ static int bc_eq(FrtBooleanClause *self, FrtBooleanClause *o) {
|
|
1280
1280
|
}
|
1281
1281
|
|
1282
1282
|
FrtBooleanClause *frt_bc_alloc(void) {
|
1283
|
-
return
|
1283
|
+
return FRT_ALLOC_AND_ZERO(FrtBooleanClause);
|
1284
1284
|
}
|
1285
1285
|
|
1286
1286
|
FrtBooleanClause *frt_bc_init(FrtBooleanClause *self, FrtQuery *query, FrtBCType occur) {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isomorfeus-ferret
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Biedermann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|