isomorfeus-ferret 0.13.7 → 0.13.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -4
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +7 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +2 -1
- data/ext/isomorfeus_ferret_ext/frb_search.c +22 -15
- data/ext/isomorfeus_ferret_ext/frb_store.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_ind.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +7 -4
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_search.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_sort.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_filter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_index.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_q_span.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_search.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_sort.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_term.c +1 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e6a7a9c17cc344699bee0c8116188f86da73e11fa2a4eed02eea1c4647b6a99
|
4
|
+
data.tar.gz: c94c9a2735b010c02c8d5a793057b94abf8950117153415a731347c21fadf66e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 549a2d3b09dc535278f6c9735f55a47203166df046e5a75ff55589085336bcf2cb7c25cdf491c07eca68da25682316c4bb15e5079f09df95f8a494558fe22260
|
7
|
+
data.tar.gz: e188f428fd881a0285ece69807f13c07dfd1eee131edcad3e38a713ffa820364992bb9068d5f94f567bd7769c7882441ce8eb67199b95b491528fecdd34503f4
|
data/README.md
CHANGED
@@ -11,12 +11,12 @@ At the [Isomorfeus Framework Project](https://isomorfeus.com)
|
|
11
11
|
|
12
12
|
## About this project
|
13
13
|
|
14
|
-
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain.
|
14
|
+
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain, [https://github.com/dbalmain/ferret](https://github.com/dbalmain/ferret).
|
15
15
|
During revival many things havbe been fixed, now all tests pass, no crashes and it
|
16
16
|
successfully compiles and runs with rubys >3. Its no longer a goal to have
|
17
17
|
a c library available, but instead the usage is meant as ruby gem with a c extension only.
|
18
18
|
|
19
|
-
It
|
19
|
+
It works on *nixes, *nuxes, *BSDs and also works on Windows and RaspberryPi.
|
20
20
|
|
21
21
|
## Improvements and Changes in Version 0.13
|
22
22
|
|
@@ -63,7 +63,7 @@ fis.add_field(:compressed_field, :store => :yes, :compression => :brotli, :term_
|
|
63
63
|
|
64
64
|
### Performance
|
65
65
|
|
66
|
-
For version 0.13.7 the performance bottle
|
66
|
+
For version 0.13.7 the performance bottle neck has been identified and removed, ferret now delivers excellent indexing perfomance on all platforms, see numbers below.
|
67
67
|
On Windows performance is still not as good as on Linux, but that is equally true for Lucene and because of how the Windows filesystem works.
|
68
68
|
|
69
69
|
## Documentation
|
@@ -99,14 +99,20 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
|
|
99
99
|
|
100
100
|
A recent Java JDK must be installed to compile and run lucene benchmarks.
|
101
101
|
|
102
|
-
Results, Ferret 0.13.7 vs. Lucene 9.1.0, WhitespaceAnalyzer,
|
102
|
+
Results, Ferret 0.13.7 vs. Lucene 9.1.0, WhitespaceAnalyzer,
|
103
|
+
Linux Ubuntu 20.04, FreeBSD 13.0 and Windows 10 on old Intel Core i5 from 2015,
|
104
|
+
LinuxPi on RaspberryPi 400:
|
103
105
|
|
104
106
|
| OS | Task | Ferret | Lucene* |
|
105
107
|
|---------|------------|-----------------|----------------|
|
106
108
|
| Linux | Indexing | 4905 docs/s | 4785 docs/s |
|
109
|
+
| FreeBSD | Indexing | 4516 docs/s | - |
|
107
110
|
| Windows | Indexing | 2361 docs/s | 2395 docs/s |
|
111
|
+
| LinuxPi | Indexing | 1161 docs/s | 707 docs/s |
|
108
112
|
| Linux | Searching | 25664 queries/s | 4708 queries/s |
|
113
|
+
| FreeBSD | Searching | 25073 queries/s | - |
|
109
114
|
| Windows | Searching | 3646 queries/s | 935 queries/s |
|
115
|
+
| LinuxPi | Searching | 5768 queries/s | 680 queries/s |
|
110
116
|
| | Index Size | 28 MB | 35 MB |
|
111
117
|
|
112
118
|
*Lucene 9.1.0 on JVM 11.0.14.1 (Ubuntu)
|
@@ -458,7 +458,7 @@ static VALUE frb_ts_next(VALUE self) {
|
|
458
458
|
|
459
459
|
static void frb_tf_mark(void *p) {
|
460
460
|
FrtTokenStream *ts = (FrtTokenStream *)p;
|
461
|
-
if (TkFilt(ts)->sub_ts->rts)
|
461
|
+
if (TkFilt(ts)->sub_ts && TkFilt(ts)->sub_ts->rts)
|
462
462
|
rb_gc_mark(TkFilt(ts)->sub_ts->rts);
|
463
463
|
}
|
464
464
|
|
@@ -1331,9 +1331,10 @@ static void frb_h_mark_values_i(void *key, void *value, void *arg) {
|
|
1331
1331
|
}
|
1332
1332
|
|
1333
1333
|
static void frb_pfa_mark(void *p) {
|
1334
|
-
if (PFA(p)->default_a->ranalyzer)
|
1334
|
+
if (PFA(p)->default_a && PFA(p)->default_a->ranalyzer)
|
1335
1335
|
rb_gc_mark(PFA(p)->default_a->ranalyzer);
|
1336
|
-
|
1336
|
+
if (PFA(p)->dict)
|
1337
|
+
frt_h_each(PFA(p)->dict, &frb_h_mark_values_i, NULL);
|
1337
1338
|
}
|
1338
1339
|
|
1339
1340
|
/*** PerFieldAnalyzer ***/
|
@@ -1370,6 +1371,8 @@ const rb_data_type_t frb_per_field_analyzer_t = {
|
|
1370
1371
|
|
1371
1372
|
static VALUE frb_per_field_analyzer_alloc(VALUE rclass) {
|
1372
1373
|
FrtAnalyzer *a = frt_per_field_analyzer_alloc();
|
1374
|
+
PFA(a)->default_a = NULL;
|
1375
|
+
PFA(a)->dict = NULL;
|
1373
1376
|
return TypedData_Wrap_Struct(rclass, &frb_per_field_analyzer_t, a);
|
1374
1377
|
}
|
1375
1378
|
|
@@ -1435,7 +1438,7 @@ static VALUE frb_pfa_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rstri
|
|
1435
1438
|
/*** RegExpAnalyzer ***/
|
1436
1439
|
|
1437
1440
|
static void frb_re_analyzer_mark(void *p) {
|
1438
|
-
if (((FrtAnalyzer *)p)->current_ts->rts)
|
1441
|
+
if (((FrtAnalyzer *)p)->current_ts && ((FrtAnalyzer *)p)->current_ts->rts)
|
1439
1442
|
rb_gc_mark(((FrtAnalyzer *)p)->current_ts->rts);
|
1440
1443
|
}
|
1441
1444
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#include "isomorfeus_ferret.h"
|
3
3
|
#include <ruby.h>
|
4
4
|
|
5
|
-
#undef close
|
5
|
+
// #undef close
|
6
6
|
|
7
7
|
VALUE mIndex;
|
8
8
|
|
@@ -429,6 +429,7 @@ static VALUE frb_get_field_infos(FrtFieldInfos *fis) {
|
|
429
429
|
|
430
430
|
static VALUE frb_fis_alloc(VALUE rclass) {
|
431
431
|
FrtFieldInfos *fis = frt_fis_alloc();
|
432
|
+
fis->size = 0;
|
432
433
|
return TypedData_Wrap_Struct(rclass, &frb_field_infos_t, fis);
|
433
434
|
}
|
434
435
|
|
@@ -674,8 +675,7 @@ frb_fis_get_tk_fields(VALUE self)
|
|
674
675
|
****************************************************************************/
|
675
676
|
|
676
677
|
static void frb_te_free(void *p) {
|
677
|
-
FrtTermEnum *
|
678
|
-
te->close(te);
|
678
|
+
((FrtTermEnum *)p)->close((FrtTermEnum *)p);
|
679
679
|
}
|
680
680
|
|
681
681
|
static size_t frb_te_size(const void *p) {
|
@@ -34,7 +34,7 @@ static void frb_qp_free(void *p) {
|
|
34
34
|
}
|
35
35
|
|
36
36
|
static void frb_qp_mark(void *p) {
|
37
|
-
if (((FrtQParser *)p)->analyzer->ranalyzer)
|
37
|
+
if (((FrtQParser *)p)->analyzer && ((FrtQParser *)p)->analyzer->ranalyzer)
|
38
38
|
rb_gc_mark(((FrtQParser *)p)->analyzer->ranalyzer);
|
39
39
|
}
|
40
40
|
|
@@ -59,6 +59,7 @@ const rb_data_type_t frb_qp_t = {
|
|
59
59
|
|
60
60
|
static VALUE frb_qp_alloc(VALUE rclass) {
|
61
61
|
FrtQParser *qp = frt_qp_alloc();
|
62
|
+
qp->analyzer = NULL;
|
62
63
|
return TypedData_Wrap_Struct(rclass, &frb_qp_t, qp);
|
63
64
|
}
|
64
65
|
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#include "isomorfeus_ferret.h"
|
5
5
|
#include <ruby.h>
|
6
6
|
|
7
|
-
#undef close
|
7
|
+
// #undef close
|
8
8
|
|
9
9
|
VALUE mSearch;
|
10
10
|
|
@@ -713,7 +713,7 @@ static size_t frb_boolean_clause_t_size(const void *p) {
|
|
713
713
|
}
|
714
714
|
|
715
715
|
static void frb_bc_mark(void *p) {
|
716
|
-
if (((FrtBooleanClause *)p)->query->rquery)
|
716
|
+
if (((FrtBooleanClause *)p)->query && ((FrtBooleanClause *)p)->query->rquery)
|
717
717
|
rb_gc_mark(((FrtBooleanClause *)p)->query->rquery);
|
718
718
|
}
|
719
719
|
|
@@ -1837,11 +1837,11 @@ static size_t frb_filtered_query_size(const void *p) {
|
|
1837
1837
|
}
|
1838
1838
|
|
1839
1839
|
static void frb_fqq_mark(void *p) {
|
1840
|
-
FrtFilteredQuery *
|
1841
|
-
if (
|
1842
|
-
rb_gc_mark(
|
1843
|
-
if (
|
1844
|
-
rb_gc_mark(
|
1840
|
+
FrtFilteredQuery *fqq = (FrtFilteredQuery *)p;
|
1841
|
+
if (fqq->query && fqq->query->rquery)
|
1842
|
+
rb_gc_mark(fqq->query->rquery);
|
1843
|
+
if (fqq->filter && fqq->filter->rfilter)
|
1844
|
+
rb_gc_mark(fqq->filter->rfilter);
|
1845
1845
|
}
|
1846
1846
|
|
1847
1847
|
const rb_data_type_t frb_filtered_query_t = {
|
@@ -1860,6 +1860,8 @@ const rb_data_type_t frb_filtered_query_t = {
|
|
1860
1860
|
|
1861
1861
|
static VALUE frb_fqq_alloc(VALUE rclass) {
|
1862
1862
|
FrtQuery *fqq = frt_fq_alloc();
|
1863
|
+
((FrtFilteredQuery *)fqq)->query = NULL;
|
1864
|
+
((FrtFilteredQuery *)fqq)->filter = NULL;
|
1863
1865
|
return TypedData_Wrap_Struct(rclass, &frb_filtered_query_t, fqq);
|
1864
1866
|
}
|
1865
1867
|
|
@@ -2172,6 +2174,7 @@ const rb_data_type_t frb_span_near_query_t = {
|
|
2172
2174
|
|
2173
2175
|
static VALUE frb_spannq_alloc(VALUE rclass) {
|
2174
2176
|
FrtQuery *snq = frt_spannq_alloc();
|
2177
|
+
((FrtSpanNearQuery *)snq)->c_cnt = 0;
|
2175
2178
|
return TypedData_Wrap_Struct(rclass, &frb_span_near_query_t, snq);
|
2176
2179
|
}
|
2177
2180
|
|
@@ -2352,9 +2355,9 @@ static size_t frb_span_not_query_size(const void *p) {
|
|
2352
2355
|
|
2353
2356
|
static void frb_spanxq_mark(void *p) {
|
2354
2357
|
FrtSpanNotQuery *sxq = (FrtSpanNotQuery *)p;
|
2355
|
-
if (sxq->inc->rquery)
|
2358
|
+
if (sxq->inc && sxq->inc->rquery)
|
2356
2359
|
rb_gc_mark(sxq->inc->rquery);
|
2357
|
-
if (sxq->exc->rquery)
|
2360
|
+
if (sxq->exc && sxq->exc->rquery)
|
2358
2361
|
rb_gc_mark(sxq->exc->rquery);
|
2359
2362
|
}
|
2360
2363
|
|
@@ -3557,10 +3560,12 @@ static size_t frb_index_searcher_size(const void *p) {
|
|
3557
3560
|
|
3558
3561
|
static void frb_sea_mark(void *p) {
|
3559
3562
|
FrtIndexSearcher *isea = (FrtIndexSearcher *)p;
|
3560
|
-
if (isea->ir
|
3561
|
-
|
3562
|
-
|
3563
|
-
|
3563
|
+
if (isea->ir) {
|
3564
|
+
if (isea->ir->rir)
|
3565
|
+
rb_gc_mark(isea->ir->rir);
|
3566
|
+
if (isea->ir->store && isea->ir->store->rstore)
|
3567
|
+
rb_gc_mark(isea->ir->store->rstore);
|
3568
|
+
}
|
3564
3569
|
}
|
3565
3570
|
|
3566
3571
|
const rb_data_type_t frb_index_searcher_t = {
|
@@ -3578,8 +3583,9 @@ const rb_data_type_t frb_index_searcher_t = {
|
|
3578
3583
|
};
|
3579
3584
|
|
3580
3585
|
static VALUE frb_sea_alloc(VALUE rclass) {
|
3581
|
-
FrtSearcher *
|
3582
|
-
|
3586
|
+
FrtSearcher *sea = frt_isea_alloc();
|
3587
|
+
((FrtIndexSearcher *)sea)->ir = NULL;
|
3588
|
+
return TypedData_Wrap_Struct(rclass, &frb_index_searcher_t, sea);
|
3583
3589
|
}
|
3584
3590
|
|
3585
3591
|
/*
|
@@ -3665,6 +3671,7 @@ const rb_data_type_t frb_multi_searcher_t = {
|
|
3665
3671
|
|
3666
3672
|
static VALUE frb_ms_alloc(VALUE rclass) {
|
3667
3673
|
FrtSearcher *s = frt_msea_alloc();
|
3674
|
+
((FrtMultiSearcher *)s)->s_cnt = 0;
|
3668
3675
|
return TypedData_Wrap_Struct(rclass, &frb_multi_searcher_t, s);
|
3669
3676
|
}
|
3670
3677
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#include "isomorfeus_ferret.h"
|
3
3
|
#include <time.h>
|
4
4
|
|
5
|
-
#undef rename
|
5
|
+
// #undef rename
|
6
6
|
|
7
7
|
static ID id_ref_cnt;
|
8
8
|
VALUE cLock;
|
@@ -34,7 +34,7 @@ void frb_lock_free(void *p) {
|
|
34
34
|
|
35
35
|
void frb_lock_mark(void *p) {
|
36
36
|
FrtLock *lock = (FrtLock *)p;
|
37
|
-
if (lock->store->rstore)
|
37
|
+
if (lock->store && lock->store->rstore)
|
38
38
|
rb_gc_mark(lock->store->rstore);
|
39
39
|
}
|
40
40
|
|
@@ -11,8 +11,8 @@
|
|
11
11
|
#include "bzlib.h"
|
12
12
|
#include "lz4frame.h"
|
13
13
|
|
14
|
-
#undef close
|
15
|
-
#undef read
|
14
|
+
// #undef close
|
15
|
+
// #undef read
|
16
16
|
|
17
17
|
extern rb_encoding *utf8_encoding;
|
18
18
|
extern void frt_micro_sleep(const int micro_seconds);
|
@@ -3913,9 +3913,10 @@ static FrtIndexReader *ir_setup(FrtIndexReader *ir, FrtStore *store, FrtSegmentI
|
|
3913
3913
|
ir->store = store;
|
3914
3914
|
FRT_REF(store);
|
3915
3915
|
}
|
3916
|
-
ir->sis
|
3917
|
-
ir->fis
|
3916
|
+
ir->sis = sis;
|
3917
|
+
ir->fis = fis;
|
3918
3918
|
ir->ref_cnt = 1;
|
3919
|
+
ir->rir = Qnil;
|
3919
3920
|
|
3920
3921
|
ir->is_owner = is_owner;
|
3921
3922
|
if (is_owner) {
|
@@ -6431,6 +6432,8 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
|
|
6431
6432
|
sprintf(file_name, "%s.del", segment);
|
6432
6433
|
del_out = store_out->new_output(store_out, file_name);
|
6433
6434
|
frt_is2os_copy_bytes(del_in, del_out, frt_is_length(del_in));
|
6435
|
+
frt_os_close(del_out);
|
6436
|
+
frt_is_close(del_in);
|
6434
6437
|
}
|
6435
6438
|
|
6436
6439
|
if (map) {
|
@@ -1280,7 +1280,7 @@ static int bc_eq(FrtBooleanClause *self, FrtBooleanClause *o) {
|
|
1280
1280
|
}
|
1281
1281
|
|
1282
1282
|
FrtBooleanClause *frt_bc_alloc(void) {
|
1283
|
-
return
|
1283
|
+
return FRT_ALLOC_AND_ZERO(FrtBooleanClause);
|
1284
1284
|
}
|
1285
1285
|
|
1286
1286
|
FrtBooleanClause *frt_bc_init(FrtBooleanClause *self, FrtQuery *query, FrtBCType occur) {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isomorfeus-ferret
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Biedermann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-04-
|
11
|
+
date: 2022-04-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|