isomorfeus-ferret 0.13.6 → 0.13.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +26 -34
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +7 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +18 -24
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +2 -1
- data/ext/isomorfeus_ferret_ext/frb_search.c +23 -19
- data/ext/isomorfeus_ferret_ext/frb_store.c +34 -36
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +5 -5
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_compound_io.c +34 -59
- data/ext/isomorfeus_ferret_ext/frt_except.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +11 -12
- data/ext/isomorfeus_ferret_ext/frt_global.c +6 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_hash.c +40 -48
- data/ext/isomorfeus_ferret_ext/frt_hash.h +14 -16
- data/ext/isomorfeus_ferret_ext/frt_ind.c +3 -4
- data/ext/isomorfeus_ferret_ext/frt_index.c +127 -197
- data/ext/isomorfeus_ferret_ext/frt_index.h +13 -13
- data/ext/isomorfeus_ferret_ext/frt_lang.c +1 -4
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +8 -9
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +7 -7
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -3
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1742 -1742
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +2 -3
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +12 -11
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +18 -34
- data/ext/isomorfeus_ferret_ext/frt_search.c +30 -29
- data/ext/isomorfeus_ferret_ext/frt_search.h +18 -19
- data/ext/isomorfeus_ferret_ext/frt_sort.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_store.c +45 -41
- data/ext/isomorfeus_ferret_ext/frt_store.h +24 -26
- data/ext/isomorfeus_ferret_ext/frt_threading.h +12 -5
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +4 -3
- data/ext/isomorfeus_ferret_ext/test_1710.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +8 -8
- data/ext/isomorfeus_ferret_ext/test_fields.c +7 -7
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_filter.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_fs_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +4 -2
- data/ext/isomorfeus_ferret_ext/test_index.c +61 -61
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +3 -2
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +4 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +4 -2
- data/ext/isomorfeus_ferret_ext/test_q_span.c +9 -2
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +4 -4
- data/ext/isomorfeus_ferret_ext/test_search.c +10 -5
- data/ext/isomorfeus_ferret_ext/test_segments.c +4 -3
- data/ext/isomorfeus_ferret_ext/test_sort.c +18 -10
- data/ext/isomorfeus_ferret_ext/test_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_term.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +5 -4
- data/lib/isomorfeus/ferret/index/index.rb +8 -3
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 744efe9c78deef0c563e39ae29eb4e755f8afb493be0fe80416a55987db97ce7
|
4
|
+
data.tar.gz: d1f814f14b5a6ef18f612e545b54b0a113544f994c22901e7204ae191ebb644e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3fd11e3b3f0f04625c14b9b709d69ef6f1a1abdfdf5c50ad5e151a3fd87e300595e29639ac9455c66f8db5452d9569bb1585f6b2f9f57ac5c3103f2ff09bb8d1
|
7
|
+
data.tar.gz: 27ace1a41fcc7557490865e740dfbe9906ce999288bb31306efa9b02f519552f9092391ff64a3ededc6a2285a0e78533a80d43ad48e85abc97c1a154e0060a67
|
data/README.md
CHANGED
@@ -11,12 +11,12 @@ At the [Isomorfeus Framework Project](https://isomorfeus.com)
|
|
11
11
|
|
12
12
|
## About this project
|
13
13
|
|
14
|
-
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain.
|
14
|
+
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain, [https://github.com/dbalmain/ferret](https://github.com/dbalmain/ferret).
|
15
15
|
During revival many things havbe been fixed, now all tests pass, no crashes and it
|
16
16
|
successfully compiles and runs with rubys >3. Its no longer a goal to have
|
17
17
|
a c library available, but instead the usage is meant as ruby gem with a c extension only.
|
18
18
|
|
19
|
-
It
|
19
|
+
It works on *nixes, *nuxes, *BSDs and also works on Windows and RaspberryPi.
|
20
20
|
|
21
21
|
## Improvements and Changes in Version 0.13
|
22
22
|
|
@@ -63,14 +63,8 @@ fis.add_field(:compressed_field, :store => :yes, :compression => :brotli, :term_
|
|
63
63
|
|
64
64
|
### Performance
|
65
65
|
|
66
|
-
|
67
|
-
On Windows
|
68
|
-
|
69
|
-
Search performance is still excellent and multiple times faster than Lucene.
|
70
|
-
|
71
|
-
Lucene achieves roughly double the indexing performance. This seems to be because of the different way strings and
|
72
|
-
encodings are handled in Java. For example, the Java WhitespaceTokenizer code requires only one method call per character (check for whitespace), but for Ruby, to support all the different encodings, several method calls are required per character (retrieve character according to encoding, check character for whitespace).
|
73
|
-
Ferret is internally using the standard Ruby string encoding methods.
|
66
|
+
For version 0.13.7 the performance bottle neck has been identified and removed, ferret now delivers excellent indexing perfomance on all platforms, see numbers below.
|
67
|
+
On Windows performance is still not as good as on Linux, but that is equally true for Lucene and because of how the Windows filesystem works.
|
74
68
|
|
75
69
|
## Documentation
|
76
70
|
|
@@ -105,43 +99,41 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
|
|
105
99
|
|
106
100
|
A recent Java JDK must be installed to compile and run lucene benchmarks.
|
107
101
|
|
108
|
-
Results
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
JVM 11.0.14.1 (Ubuntu)
|
125
|
-
```
|
102
|
+
Results, Ferret 0.13.7 vs. Lucene 9.1.0, WhitespaceAnalyzer,
|
103
|
+
Linux Ubuntu 20.04, FreeBSD 13.0 and Windows 10 on old Intel Core i5 from 2015,
|
104
|
+
LinuxPi on RaspberryPi 400:
|
105
|
+
|
106
|
+
| OS | Task | Ferret | Lucene* |
|
107
|
+
|---------|------------|-----------------|----------------|
|
108
|
+
| Linux | Indexing | 4905 docs/s | 4785 docs/s |
|
109
|
+
| FreeBSD | Indexing | 4516 docs/s | - |
|
110
|
+
| Windows | Indexing | 2361 docs/s | 2395 docs/s |
|
111
|
+
| LinuxPi | Indexing | 1161 docs/s | 707 docs/s |
|
112
|
+
| Linux | Searching | 25664 queries/s | 4708 queries/s |
|
113
|
+
| FreeBSD | Searching | 25073 queries/s | - |
|
114
|
+
| Windows | Searching | 3646 queries/s | 935 queries/s |
|
115
|
+
| LinuxPi | Searching | 5768 queries/s | 680 queries/s |
|
116
|
+
| | Index Size | 28 MB | 35 MB |
|
117
|
+
|
118
|
+
*Lucene 9.1.0 on JVM 11.0.14.1 (Ubuntu)
|
126
119
|
|
127
120
|
### Storing Fields with Compression, Indexing and Retrieval
|
128
121
|
- clone repo
|
129
122
|
- bundle install
|
130
123
|
- rake ferret_compression_benchmark
|
131
124
|
|
132
|
-
Results on Linux, 0.13.
|
125
|
+
Results on Linux, 0.13.7, on old Intel Core i5 from 2015:
|
133
126
|
|
134
127
|
| Compression | Index & Store | Retrieve | Index size |
|
135
128
|
|-------------|---------------|---------------|------------|
|
136
|
-
| none |
|
137
|
-
| brotli |
|
138
|
-
| bzip2 |
|
139
|
-
| lz4 |
|
129
|
+
| none | 4866 docs/s | 153853 docs/s | 43 MB |
|
130
|
+
| brotli | 3539 docs/s | 58315 docs/s | 36 MB |
|
131
|
+
| bzip2 | 2624 docs/s | 15382 docs/s | 38 MB |
|
132
|
+
| lz4 | 4639 docs/s | 127100 docs/s | 41 MB |
|
140
133
|
|
141
134
|
## Future
|
142
135
|
|
143
136
|
Lots of things to do:
|
144
|
-
- Improve indexing performance on Windows (WriteFile is terribly slow, maybe use mapping, see libuv)
|
145
137
|
- Bring documentation in order in a docs directory
|
146
138
|
- Review code (especially for memory/stack issues, typical c issues)
|
147
139
|
- Take care of ruby GVL and threading
|
@@ -458,7 +458,7 @@ static VALUE frb_ts_next(VALUE self) {
|
|
458
458
|
|
459
459
|
static void frb_tf_mark(void *p) {
|
460
460
|
FrtTokenStream *ts = (FrtTokenStream *)p;
|
461
|
-
if (TkFilt(ts)->sub_ts->rts)
|
461
|
+
if (TkFilt(ts)->sub_ts && TkFilt(ts)->sub_ts->rts)
|
462
462
|
rb_gc_mark(TkFilt(ts)->sub_ts->rts);
|
463
463
|
}
|
464
464
|
|
@@ -1331,9 +1331,10 @@ static void frb_h_mark_values_i(void *key, void *value, void *arg) {
|
|
1331
1331
|
}
|
1332
1332
|
|
1333
1333
|
static void frb_pfa_mark(void *p) {
|
1334
|
-
if (PFA(p)->default_a->ranalyzer)
|
1334
|
+
if (PFA(p)->default_a && PFA(p)->default_a->ranalyzer)
|
1335
1335
|
rb_gc_mark(PFA(p)->default_a->ranalyzer);
|
1336
|
-
|
1336
|
+
if (PFA(p)->dict)
|
1337
|
+
frt_h_each(PFA(p)->dict, &frb_h_mark_values_i, NULL);
|
1337
1338
|
}
|
1338
1339
|
|
1339
1340
|
/*** PerFieldAnalyzer ***/
|
@@ -1370,6 +1371,8 @@ const rb_data_type_t frb_per_field_analyzer_t = {
|
|
1370
1371
|
|
1371
1372
|
static VALUE frb_per_field_analyzer_alloc(VALUE rclass) {
|
1372
1373
|
FrtAnalyzer *a = frt_per_field_analyzer_alloc();
|
1374
|
+
PFA(a)->default_a = NULL;
|
1375
|
+
PFA(a)->dict = NULL;
|
1373
1376
|
return TypedData_Wrap_Struct(rclass, &frb_per_field_analyzer_t, a);
|
1374
1377
|
}
|
1375
1378
|
|
@@ -1435,7 +1438,7 @@ static VALUE frb_pfa_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rstri
|
|
1435
1438
|
/*** RegExpAnalyzer ***/
|
1436
1439
|
|
1437
1440
|
static void frb_re_analyzer_mark(void *p) {
|
1438
|
-
if (((FrtAnalyzer *)p)->current_ts->rts)
|
1441
|
+
if (((FrtAnalyzer *)p)->current_ts && ((FrtAnalyzer *)p)->current_ts->rts)
|
1439
1442
|
rb_gc_mark(((FrtAnalyzer *)p)->current_ts->rts);
|
1440
1443
|
}
|
1441
1444
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#include "isomorfeus_ferret.h"
|
3
3
|
#include <ruby.h>
|
4
4
|
|
5
|
-
#undef close
|
5
|
+
// #undef close
|
6
6
|
|
7
7
|
VALUE mIndex;
|
8
8
|
|
@@ -429,6 +429,7 @@ static VALUE frb_get_field_infos(FrtFieldInfos *fis) {
|
|
429
429
|
|
430
430
|
static VALUE frb_fis_alloc(VALUE rclass) {
|
431
431
|
FrtFieldInfos *fis = frt_fis_alloc();
|
432
|
+
fis->size = 0;
|
432
433
|
return TypedData_Wrap_Struct(rclass, &frb_field_infos_t, fis);
|
433
434
|
}
|
434
435
|
|
@@ -535,7 +536,7 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
|
|
535
536
|
{
|
536
537
|
FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
|
537
538
|
FrtFieldInfo *fi;
|
538
|
-
FrtStoreValue
|
539
|
+
FrtStoreValue store_val = fis->store_val;
|
539
540
|
FrtCompressionType compression = fis->compression;
|
540
541
|
FrtIndexValue index = fis->index;
|
541
542
|
FrtTermVectorValue term_vector = fis->term_vector;
|
@@ -544,9 +545,9 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
|
|
544
545
|
|
545
546
|
rb_scan_args(argc, argv, "11", &rname, &roptions);
|
546
547
|
if (argc > 1) {
|
547
|
-
frb_fi_get_params(roptions, &
|
548
|
+
frb_fi_get_params(roptions, &store_val, &compression, &index, &term_vector, &boost);
|
548
549
|
}
|
549
|
-
fi = frt_fi_new(frb_field(rname),
|
550
|
+
fi = frt_fi_new(frb_field(rname), store_val, compression, index, term_vector);
|
550
551
|
fi->boost = boost;
|
551
552
|
frt_fis_add_field(fis, fi);
|
552
553
|
return self;
|
@@ -592,9 +593,7 @@ frb_fis_to_s(VALUE self)
|
|
592
593
|
*
|
593
594
|
* Return the number of fields in the FieldInfos object.
|
594
595
|
*/
|
595
|
-
static VALUE
|
596
|
-
frb_fis_size(VALUE self)
|
597
|
-
{
|
596
|
+
static VALUE frb_fis_size(VALUE self) {
|
598
597
|
FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
|
599
598
|
return INT2FIX(fis->size);
|
600
599
|
}
|
@@ -609,21 +608,19 @@ frb_fis_size(VALUE self)
|
|
609
608
|
* existing index (or other files for that matter) will be deleted from the
|
610
609
|
* directory and overwritten by the new index.
|
611
610
|
*/
|
612
|
-
static VALUE
|
613
|
-
frb_fis_create_index(VALUE self, VALUE rdir)
|
614
|
-
{
|
611
|
+
static VALUE frb_fis_create_index(VALUE self, VALUE rdir) {
|
615
612
|
FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
|
616
613
|
FrtStore *store = NULL;
|
617
614
|
if (TYPE(rdir) == T_DATA) {
|
618
615
|
store = DATA_PTR(rdir);
|
619
|
-
|
616
|
+
frt_index_create(store, fis);
|
620
617
|
} else {
|
621
618
|
StringValue(rdir);
|
622
619
|
frb_create_dir(rdir);
|
623
620
|
store = frt_open_fs_store(rs2s(rdir));
|
621
|
+
frt_index_create(store, fis);
|
622
|
+
frt_store_close(store);
|
624
623
|
}
|
625
|
-
frt_index_create(store, fis);
|
626
|
-
frt_store_deref(store);
|
627
624
|
return self;
|
628
625
|
}
|
629
626
|
|
@@ -674,8 +671,7 @@ frb_fis_get_tk_fields(VALUE self)
|
|
674
671
|
****************************************************************************/
|
675
672
|
|
676
673
|
static void frb_te_free(void *p) {
|
677
|
-
FrtTermEnum *
|
678
|
-
te->close(te);
|
674
|
+
((FrtTermEnum *)p)->close((FrtTermEnum *)p);
|
679
675
|
}
|
680
676
|
|
681
677
|
static size_t frb_te_size(const void *p) {
|
@@ -1344,6 +1340,8 @@ static VALUE frb_iw_alloc(VALUE rclass) {
|
|
1344
1340
|
return TypedData_Wrap_Struct(rclass, &frb_index_writer_t, iw);
|
1345
1341
|
}
|
1346
1342
|
|
1343
|
+
extern rb_data_type_t frb_store_t;
|
1344
|
+
|
1347
1345
|
static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
|
1348
1346
|
VALUE roptions, rval;
|
1349
1347
|
bool create = false;
|
@@ -1363,12 +1361,11 @@ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
|
|
1363
1361
|
|
1364
1362
|
if ((rval = rb_hash_aref(roptions, sym_dir)) != Qnil) {
|
1365
1363
|
// Check_Type(rval, T_DATA);
|
1366
|
-
|
1364
|
+
TypedData_Get_Struct(rval, FrtStore, &frb_store_t, store);
|
1367
1365
|
} else if ((rval = rb_hash_aref(roptions, sym_path)) != Qnil) {
|
1368
1366
|
StringValue(rval);
|
1369
1367
|
frb_create_dir(rval);
|
1370
1368
|
store = frt_open_fs_store(rs2s(rval));
|
1371
|
-
FRT_DEREF(store);
|
1372
1369
|
}
|
1373
1370
|
/* use_compound_file defaults to true */
|
1374
1371
|
config.use_compound_file =
|
@@ -1393,7 +1390,6 @@ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
|
|
1393
1390
|
}
|
1394
1391
|
if (NULL == store) {
|
1395
1392
|
store = frt_open_ram_store(NULL);
|
1396
|
-
FRT_DEREF(store);
|
1397
1393
|
}
|
1398
1394
|
if (!create && create_if_missing && !store->exists(store, "segments")) {
|
1399
1395
|
create = true;
|
@@ -1411,7 +1407,7 @@ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
|
|
1411
1407
|
}
|
1412
1408
|
|
1413
1409
|
TypedData_Get_Struct(self, FrtIndexWriter, &frb_index_writer_t, iw);
|
1414
|
-
|
1410
|
+
frt_iw_open(iw, store, analyzer, &config);
|
1415
1411
|
FRT_XCATCHALL
|
1416
1412
|
ex_code = xcontext.excode;
|
1417
1413
|
msg = xcontext.msg;
|
@@ -1598,9 +1594,9 @@ void frb_ir_free(void *p) {
|
|
1598
1594
|
|
1599
1595
|
void frb_ir_mark(void *p) {
|
1600
1596
|
FrtIndexReader *ir = (FrtIndexReader *)p;
|
1601
|
-
FrtMultiReader *mr = (FrtMultiReader *)p;
|
1602
1597
|
|
1603
1598
|
if (ir->type == FRT_MULTI_READER) {
|
1599
|
+
FrtMultiReader *mr = (FrtMultiReader *)p;
|
1604
1600
|
int i;
|
1605
1601
|
for (i = 0; i < mr->r_cnt; i++) {
|
1606
1602
|
if (mr->sub_readers[i]->rir)
|
@@ -1652,6 +1648,7 @@ static VALUE frb_iw_add_readers(VALUE self, VALUE rreaders) {
|
|
1652
1648
|
while (i-- > 0) {
|
1653
1649
|
FrtIndexReader *ir;
|
1654
1650
|
TypedData_Get_Struct(RARRAY_PTR(rreaders)[i], FrtIndexReader, &frb_index_reader_t, ir);
|
1651
|
+
FRT_REF(ir);
|
1655
1652
|
irs[i] = ir;
|
1656
1653
|
}
|
1657
1654
|
frt_iw_add_readers(iw, irs, RARRAY_LEN(rreaders));
|
@@ -2157,7 +2154,6 @@ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
|
|
2157
2154
|
VALUE rfield_num_map = rb_hash_new();
|
2158
2155
|
int ex_code = 0;
|
2159
2156
|
const char *msg = NULL;
|
2160
|
-
|
2161
2157
|
FRT_TRY
|
2162
2158
|
if (TYPE(rdir) == T_ARRAY) {
|
2163
2159
|
VALUE rdirs = rdir;
|
@@ -2170,7 +2166,6 @@ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
|
|
2170
2166
|
case T_DATA:
|
2171
2167
|
if (CLASS_OF(rdir) == cIndexReader) {
|
2172
2168
|
TypedData_Get_Struct(rdir, FrtIndexReader, &frb_index_reader_t, sub_readers[i]);
|
2173
|
-
FRT_REF(sub_readers[i]);
|
2174
2169
|
continue;
|
2175
2170
|
} else if (RTEST(rb_obj_is_kind_of(rdir, cDirectory))) {
|
2176
2171
|
store = DATA_PTR(rdir);
|
@@ -2185,7 +2180,6 @@ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
|
|
2185
2180
|
case T_STRING:
|
2186
2181
|
frb_create_dir(rdir);
|
2187
2182
|
store = frt_open_fs_store(rs2s(rdir));
|
2188
|
-
FRT_DEREF(store);
|
2189
2183
|
break;
|
2190
2184
|
default:
|
2191
2185
|
FRT_RAISE(FRT_ARG_ERROR, "%s isn't a valid directory "
|
@@ -2195,6 +2189,7 @@ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
|
|
2195
2189
|
break;
|
2196
2190
|
}
|
2197
2191
|
sub_readers[i] = frt_ir_open(NULL, store);
|
2192
|
+
FRT_DEREF(sub_readers[i]);
|
2198
2193
|
}
|
2199
2194
|
TypedData_Get_Struct(self, FrtIndexReader, &frb_index_reader_t, ir);
|
2200
2195
|
ir = frt_mr_open(ir, sub_readers, reader_cnt);
|
@@ -2206,7 +2201,6 @@ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
|
|
2206
2201
|
case T_STRING:
|
2207
2202
|
frb_create_dir(rdir);
|
2208
2203
|
store = frt_open_fs_store(rs2s(rdir));
|
2209
|
-
FRT_DEREF(store);
|
2210
2204
|
break;
|
2211
2205
|
default:
|
2212
2206
|
FRT_RAISE(FRT_ARG_ERROR, "%s isn't a valid directory argument. "
|
@@ -34,7 +34,7 @@ static void frb_qp_free(void *p) {
|
|
34
34
|
}
|
35
35
|
|
36
36
|
static void frb_qp_mark(void *p) {
|
37
|
-
if (((FrtQParser *)p)->analyzer->ranalyzer)
|
37
|
+
if (((FrtQParser *)p)->analyzer && ((FrtQParser *)p)->analyzer->ranalyzer)
|
38
38
|
rb_gc_mark(((FrtQParser *)p)->analyzer->ranalyzer);
|
39
39
|
}
|
40
40
|
|
@@ -59,6 +59,7 @@ const rb_data_type_t frb_qp_t = {
|
|
59
59
|
|
60
60
|
static VALUE frb_qp_alloc(VALUE rclass) {
|
61
61
|
FrtQParser *qp = frt_qp_alloc();
|
62
|
+
qp->analyzer = NULL;
|
62
63
|
return TypedData_Wrap_Struct(rclass, &frb_qp_t, qp);
|
63
64
|
}
|
64
65
|
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#include "isomorfeus_ferret.h"
|
5
5
|
#include <ruby.h>
|
6
6
|
|
7
|
-
#undef close
|
7
|
+
// #undef close
|
8
8
|
|
9
9
|
VALUE mSearch;
|
10
10
|
|
@@ -713,7 +713,7 @@ static size_t frb_boolean_clause_t_size(const void *p) {
|
|
713
713
|
}
|
714
714
|
|
715
715
|
static void frb_bc_mark(void *p) {
|
716
|
-
if (((FrtBooleanClause *)p)->query->rquery)
|
716
|
+
if (((FrtBooleanClause *)p)->query && ((FrtBooleanClause *)p)->query->rquery)
|
717
717
|
rb_gc_mark(((FrtBooleanClause *)p)->query->rquery);
|
718
718
|
}
|
719
719
|
|
@@ -1837,11 +1837,11 @@ static size_t frb_filtered_query_size(const void *p) {
|
|
1837
1837
|
}
|
1838
1838
|
|
1839
1839
|
static void frb_fqq_mark(void *p) {
|
1840
|
-
FrtFilteredQuery *
|
1841
|
-
if (
|
1842
|
-
rb_gc_mark(
|
1843
|
-
if (
|
1844
|
-
rb_gc_mark(
|
1840
|
+
FrtFilteredQuery *fqq = (FrtFilteredQuery *)p;
|
1841
|
+
if (fqq->query && fqq->query->rquery)
|
1842
|
+
rb_gc_mark(fqq->query->rquery);
|
1843
|
+
if (fqq->filter && fqq->filter->rfilter)
|
1844
|
+
rb_gc_mark(fqq->filter->rfilter);
|
1845
1845
|
}
|
1846
1846
|
|
1847
1847
|
const rb_data_type_t frb_filtered_query_t = {
|
@@ -1860,6 +1860,8 @@ const rb_data_type_t frb_filtered_query_t = {
|
|
1860
1860
|
|
1861
1861
|
static VALUE frb_fqq_alloc(VALUE rclass) {
|
1862
1862
|
FrtQuery *fqq = frt_fq_alloc();
|
1863
|
+
((FrtFilteredQuery *)fqq)->query = NULL;
|
1864
|
+
((FrtFilteredQuery *)fqq)->filter = NULL;
|
1863
1865
|
return TypedData_Wrap_Struct(rclass, &frb_filtered_query_t, fqq);
|
1864
1866
|
}
|
1865
1867
|
|
@@ -2172,6 +2174,7 @@ const rb_data_type_t frb_span_near_query_t = {
|
|
2172
2174
|
|
2173
2175
|
static VALUE frb_spannq_alloc(VALUE rclass) {
|
2174
2176
|
FrtQuery *snq = frt_spannq_alloc();
|
2177
|
+
((FrtSpanNearQuery *)snq)->c_cnt = 0;
|
2175
2178
|
return TypedData_Wrap_Struct(rclass, &frb_span_near_query_t, snq);
|
2176
2179
|
}
|
2177
2180
|
|
@@ -2352,9 +2355,9 @@ static size_t frb_span_not_query_size(const void *p) {
|
|
2352
2355
|
|
2353
2356
|
static void frb_spanxq_mark(void *p) {
|
2354
2357
|
FrtSpanNotQuery *sxq = (FrtSpanNotQuery *)p;
|
2355
|
-
if (sxq->inc->rquery)
|
2358
|
+
if (sxq->inc && sxq->inc->rquery)
|
2356
2359
|
rb_gc_mark(sxq->inc->rquery);
|
2357
|
-
if (sxq->exc->rquery)
|
2360
|
+
if (sxq->exc && sxq->exc->rquery)
|
2358
2361
|
rb_gc_mark(sxq->exc->rquery);
|
2359
2362
|
}
|
2360
2363
|
|
@@ -3557,10 +3560,12 @@ static size_t frb_index_searcher_size(const void *p) {
|
|
3557
3560
|
|
3558
3561
|
static void frb_sea_mark(void *p) {
|
3559
3562
|
FrtIndexSearcher *isea = (FrtIndexSearcher *)p;
|
3560
|
-
if (isea->ir
|
3561
|
-
|
3562
|
-
|
3563
|
-
|
3563
|
+
if (isea->ir) {
|
3564
|
+
if (isea->ir->rir)
|
3565
|
+
rb_gc_mark(isea->ir->rir);
|
3566
|
+
if (isea->ir->store && isea->ir->store->rstore)
|
3567
|
+
rb_gc_mark(isea->ir->store->rstore);
|
3568
|
+
}
|
3564
3569
|
}
|
3565
3570
|
|
3566
3571
|
const rb_data_type_t frb_index_searcher_t = {
|
@@ -3578,8 +3583,9 @@ const rb_data_type_t frb_index_searcher_t = {
|
|
3578
3583
|
};
|
3579
3584
|
|
3580
3585
|
static VALUE frb_sea_alloc(VALUE rclass) {
|
3581
|
-
FrtSearcher *
|
3582
|
-
|
3586
|
+
FrtSearcher *sea = frt_isea_alloc();
|
3587
|
+
((FrtIndexSearcher *)sea)->ir = NULL;
|
3588
|
+
return TypedData_Wrap_Struct(rclass, &frb_index_searcher_t, sea);
|
3583
3589
|
}
|
3584
3590
|
|
3585
3591
|
/*
|
@@ -3600,7 +3606,6 @@ static VALUE frb_sea_init(VALUE self, VALUE obj) {
|
|
3600
3606
|
frb_create_dir(obj);
|
3601
3607
|
store = frt_open_fs_store(rs2s(obj));
|
3602
3608
|
ir = frt_ir_open(NULL, store);
|
3603
|
-
FRT_DEREF(store);
|
3604
3609
|
ir->rir = TypedData_Wrap_Struct(cIndexReader, &frb_index_reader_t, ir);
|
3605
3610
|
} else {
|
3606
3611
|
// Check_Type(obj, T_DATA);
|
@@ -3610,14 +3615,12 @@ static VALUE frb_sea_init(VALUE self, VALUE obj) {
|
|
3610
3615
|
ir->rir = TypedData_Wrap_Struct(cIndexReader, &frb_index_reader_t, ir);
|
3611
3616
|
} else if (rb_obj_is_kind_of(obj, cIndexReader) == Qtrue) {
|
3612
3617
|
TypedData_Get_Struct(obj, FrtIndexReader, &frb_index_reader_t, ir);
|
3613
|
-
ir->rir = obj;
|
3614
3618
|
} else {
|
3615
3619
|
rb_raise(rb_eArgError, "Unknown type for argument to IndexSearcher.new");
|
3616
3620
|
}
|
3617
3621
|
}
|
3618
3622
|
TypedData_Get_Struct(self, FrtSearcher, &frb_index_searcher_t, sea);
|
3619
3623
|
frt_isea_init(sea, ir);
|
3620
|
-
((FrtIndexSearcher *)sea)->close_ir = false;
|
3621
3624
|
sea->rsea = self;
|
3622
3625
|
return self;
|
3623
3626
|
}
|
@@ -3665,6 +3668,7 @@ const rb_data_type_t frb_multi_searcher_t = {
|
|
3665
3668
|
|
3666
3669
|
static VALUE frb_ms_alloc(VALUE rclass) {
|
3667
3670
|
FrtSearcher *s = frt_msea_alloc();
|
3671
|
+
((FrtMultiSearcher *)s)->s_cnt = 0;
|
3668
3672
|
return TypedData_Wrap_Struct(rclass, &frb_multi_searcher_t, s);
|
3669
3673
|
}
|
3670
3674
|
|
@@ -3703,7 +3707,7 @@ static VALUE frb_ms_init(int argc, VALUE *argv, VALUE self) {
|
|
3703
3707
|
}
|
3704
3708
|
}
|
3705
3709
|
TypedData_Get_Struct(self, FrtSearcher, &frb_multi_searcher_t, sea);
|
3706
|
-
frt_msea_init(sea, searchers, top
|
3710
|
+
frt_msea_init(sea, searchers, top);
|
3707
3711
|
sea->rsea = self;
|
3708
3712
|
return self;
|
3709
3713
|
}
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#include "isomorfeus_ferret.h"
|
3
3
|
#include <time.h>
|
4
4
|
|
5
|
-
#undef rename
|
5
|
+
// #undef rename
|
6
6
|
|
7
7
|
static ID id_ref_cnt;
|
8
8
|
VALUE cLock;
|
@@ -34,7 +34,7 @@ void frb_lock_free(void *p) {
|
|
34
34
|
|
35
35
|
void frb_lock_mark(void *p) {
|
36
36
|
FrtLock *lock = (FrtLock *)p;
|
37
|
-
if (lock->store->rstore)
|
37
|
+
if (lock->store && lock->store->rstore)
|
38
38
|
rb_gc_mark(lock->store->rstore);
|
39
39
|
}
|
40
40
|
|
@@ -185,10 +185,35 @@ static VALUE frb_lock_release(VALUE self) {
|
|
185
185
|
*
|
186
186
|
****************************************************************************/
|
187
187
|
|
188
|
+
/*** FrbStore ****************************************************************/
|
189
|
+
|
190
|
+
static size_t frb_store_size(const void *p) {
|
191
|
+
return sizeof(FrtStore);
|
192
|
+
(void)p;
|
193
|
+
}
|
194
|
+
|
188
195
|
void frb_dir_free(void *p) {
|
189
|
-
|
190
|
-
|
191
|
-
|
196
|
+
frb_unwrap_locks((FrtStore *)p);
|
197
|
+
frt_store_close((FrtStore *)p);
|
198
|
+
}
|
199
|
+
|
200
|
+
const rb_data_type_t frb_store_t = {
|
201
|
+
.wrap_struct_name = "FrbStore",
|
202
|
+
.function = {
|
203
|
+
.dmark = NULL,
|
204
|
+
.dfree = frb_dir_free,
|
205
|
+
.dsize = frb_store_size,
|
206
|
+
.dcompact = NULL,
|
207
|
+
.reserved = {0},
|
208
|
+
},
|
209
|
+
.parent = NULL,
|
210
|
+
.data = NULL,
|
211
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY
|
212
|
+
};
|
213
|
+
|
214
|
+
static VALUE frb_store_alloc(VALUE rclass) {
|
215
|
+
FrtStore *st = frt_store_alloc();
|
216
|
+
return TypedData_Wrap_Struct(rclass, &frb_store_t, st);
|
192
217
|
}
|
193
218
|
|
194
219
|
/*
|
@@ -208,7 +233,7 @@ static VALUE frb_dir_close(VALUE self) {
|
|
208
233
|
((struct RData *)(self))->dmark = NULL;
|
209
234
|
((struct RData *)(self))->dfree = NULL;
|
210
235
|
frb_unwrap_locks(store);
|
211
|
-
|
236
|
+
frt_store_close(store);
|
212
237
|
}
|
213
238
|
return Qnil;
|
214
239
|
}
|
@@ -308,32 +333,6 @@ static VALUE frb_dir_make_lock(VALUE self, VALUE rlock_name) {
|
|
308
333
|
return rlock;
|
309
334
|
}
|
310
335
|
|
311
|
-
/*** FrbStore ****************************************************************/
|
312
|
-
|
313
|
-
static size_t frb_store_size(const void *p) {
|
314
|
-
return sizeof(FrtStore);
|
315
|
-
(void)p;
|
316
|
-
}
|
317
|
-
|
318
|
-
const rb_data_type_t frb_store_t = {
|
319
|
-
.wrap_struct_name = "FrbStore",
|
320
|
-
.function = {
|
321
|
-
.dmark = NULL,
|
322
|
-
.dfree = frb_dir_free,
|
323
|
-
.dsize = frb_store_size,
|
324
|
-
.dcompact = NULL,
|
325
|
-
.reserved = {0},
|
326
|
-
},
|
327
|
-
.parent = NULL,
|
328
|
-
.data = NULL,
|
329
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY
|
330
|
-
};
|
331
|
-
|
332
|
-
static VALUE frb_store_alloc(VALUE rclass) {
|
333
|
-
FrtStore *st = frt_store_alloc();
|
334
|
-
return TypedData_Wrap_Struct(rclass, &frb_store_t, st);
|
335
|
-
}
|
336
|
-
|
337
336
|
/****************************************************************************
|
338
337
|
*
|
339
338
|
* RAMDirectory Methods
|
@@ -403,19 +402,18 @@ static VALUE frb_fsdir_new(int argc, VALUE *argv, VALUE klass) {
|
|
403
402
|
frb_create_dir(rpath);
|
404
403
|
}
|
405
404
|
if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
|
406
|
-
rb_raise(rb_eIOError, "No directory <%s> found. Use :create => true"
|
407
|
-
" to create one.", rs2s(rpath));
|
405
|
+
rb_raise(rb_eIOError, "No directory <%s> found. Use :create => true to create one.", rs2s(rpath));
|
408
406
|
}
|
409
407
|
store = frt_open_fs_store(rs2s(rpath));
|
410
408
|
if (create) store->clear_all(store);
|
411
|
-
|
409
|
+
self = store->rstore;
|
410
|
+
if (self == Qnil || DATA_PTR(self) == NULL) {
|
412
411
|
self = TypedData_Wrap_Struct(klass, &frb_store_t, store);
|
413
412
|
store->rstore = self;
|
414
413
|
rb_ivar_set(self, id_ref_cnt, INT2FIX(0));
|
415
414
|
} else {
|
416
415
|
int ref_cnt = FIX2INT(rb_ivar_get(self, id_ref_cnt)) + 1;
|
417
416
|
rb_ivar_set(self, id_ref_cnt, INT2FIX(ref_cnt));
|
418
|
-
FRT_DEREF(store);
|
419
417
|
}
|
420
418
|
return self;
|
421
419
|
}
|
@@ -133,7 +133,7 @@ FrtToken *frt_tk_new(void) {
|
|
133
133
|
/*****************************************************************************/
|
134
134
|
|
135
135
|
void frt_ts_deref(FrtTokenStream *ts) {
|
136
|
-
if (
|
136
|
+
if (FRT_DEREF(ts) == 0)
|
137
137
|
ts->destroy_i(ts);
|
138
138
|
}
|
139
139
|
|
@@ -1089,7 +1089,7 @@ FrtTokenStream *frt_stem_filter_new(FrtTokenStream *sub_ts, const char *algorith
|
|
1089
1089
|
/*****************************************************************************/
|
1090
1090
|
|
1091
1091
|
void frt_a_deref(FrtAnalyzer *a) {
|
1092
|
-
if (
|
1092
|
+
if (FRT_DEREF(a) == 0)
|
1093
1093
|
a->destroy_i(a);
|
1094
1094
|
}
|
1095
1095
|
|
@@ -33,15 +33,15 @@ typedef struct FrtTokenStream FrtTokenStream;
|
|
33
33
|
struct FrtTokenStream {
|
34
34
|
char *t; /* ptr used to scan text */
|
35
35
|
char *text;
|
36
|
-
int
|
36
|
+
int length;
|
37
37
|
rb_encoding *encoding;
|
38
38
|
FrtToken *(*next)(FrtTokenStream *ts);
|
39
39
|
FrtTokenStream *(*reset)(FrtTokenStream *ts, char *text, rb_encoding *encoding);
|
40
40
|
FrtTokenStream *(*clone_i)(FrtTokenStream *ts);
|
41
41
|
void (*destroy_i)(FrtTokenStream *ts);
|
42
|
-
int
|
43
|
-
VALUE
|
44
|
-
FrtToken
|
42
|
+
_Atomic unsigned int ref_cnt;
|
43
|
+
VALUE rts;
|
44
|
+
FrtToken token;
|
45
45
|
};
|
46
46
|
|
47
47
|
extern FrtTokenStream *frt_ts_new_i(size_t size);
|
@@ -184,7 +184,7 @@ typedef struct FrtAnalyzer {
|
|
184
184
|
FrtTokenStream *current_ts;
|
185
185
|
FrtTokenStream *(*get_ts)(struct FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding);
|
186
186
|
void (*destroy_i)(struct FrtAnalyzer *a);
|
187
|
-
int
|
187
|
+
_Atomic unsigned int ref_cnt;
|
188
188
|
VALUE ranalyzer;
|
189
189
|
} FrtAnalyzer;
|
190
190
|
|