isomorfeus-ferret 0.13.7 → 0.13.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -4
- data/ext/isomorfeus_ferret_ext/bm_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +7 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +18 -24
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +2 -1
- data/ext/isomorfeus_ferret_ext/frb_search.c +23 -19
- data/ext/isomorfeus_ferret_ext/frb_store.c +34 -36
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +4 -4
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_compound_io.c +41 -66
- data/ext/isomorfeus_ferret_ext/frt_config.h +8 -0
- data/ext/isomorfeus_ferret_ext/frt_except.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +24 -19
- data/ext/isomorfeus_ferret_ext/frt_global.c +6 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_hash.c +40 -48
- data/ext/isomorfeus_ferret_ext/frt_hash.h +14 -16
- data/ext/isomorfeus_ferret_ext/frt_ind.c +3 -4
- data/ext/isomorfeus_ferret_ext/frt_index.c +152 -222
- data/ext/isomorfeus_ferret_ext/frt_index.h +31 -31
- data/ext/isomorfeus_ferret_ext/frt_lang.c +1 -4
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +8 -9
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +7 -7
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -3
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1742 -1742
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +2 -3
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +12 -11
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +24 -40
- data/ext/isomorfeus_ferret_ext/frt_search.c +30 -29
- data/ext/isomorfeus_ferret_ext/frt_search.h +18 -19
- data/ext/isomorfeus_ferret_ext/frt_sort.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_store.c +47 -40
- data/ext/isomorfeus_ferret_ext/frt_store.h +45 -47
- data/ext/isomorfeus_ferret_ext/frt_threading.h +12 -5
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +4 -3
- data/ext/isomorfeus_ferret_ext/test_1710.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +8 -8
- data/ext/isomorfeus_ferret_ext/test_fields.c +7 -7
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_filter.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_fs_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +4 -2
- data/ext/isomorfeus_ferret_ext/test_index.c +63 -63
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +3 -2
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +4 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +4 -2
- data/ext/isomorfeus_ferret_ext/test_q_span.c +9 -2
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +4 -4
- data/ext/isomorfeus_ferret_ext/test_search.c +10 -5
- data/ext/isomorfeus_ferret_ext/test_segments.c +4 -3
- data/ext/isomorfeus_ferret_ext/test_sort.c +18 -10
- data/ext/isomorfeus_ferret_ext/test_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_term.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +5 -4
- data/lib/isomorfeus/ferret/index/index.rb +8 -3
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0625b49341ee2fc35e80e673e368789532c06ce6c93e779072cacc1206847a4b
|
4
|
+
data.tar.gz: 7e81488d430471a37a872f80319efdcf8378e55dc4a583e33ff12bea942b9416
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 38cf613eac98374898aa9a5c998cc3534d18d6badba3560cd1660413acd189cb6e618eba38f3bf828cef23fcafb9e44b2de349363b55585af9ea79927ff9b2cf
|
7
|
+
data.tar.gz: 88564c30711737b48bec8879df788b46e03b06a041a98aaf3147c7375ef0d313d48f789805303774b4d0ed4531603ee8baaa46935b81abafb6df8ba7ccdb56a6
|
data/README.md
CHANGED
@@ -11,12 +11,12 @@ At the [Isomorfeus Framework Project](https://isomorfeus.com)
|
|
11
11
|
|
12
12
|
## About this project
|
13
13
|
|
14
|
-
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain.
|
14
|
+
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain, [https://github.com/dbalmain/ferret](https://github.com/dbalmain/ferret).
|
15
15
|
During revival many things havbe been fixed, now all tests pass, no crashes and it
|
16
16
|
successfully compiles and runs with rubys >3. Its no longer a goal to have
|
17
17
|
a c library available, but instead the usage is meant as ruby gem with a c extension only.
|
18
18
|
|
19
|
-
It
|
19
|
+
It works on *nixes, *nuxes, *BSDs and also works on Windows and RaspberryPi.
|
20
20
|
|
21
21
|
## Improvements and Changes in Version 0.13
|
22
22
|
|
@@ -63,7 +63,7 @@ fis.add_field(:compressed_field, :store => :yes, :compression => :brotli, :term_
|
|
63
63
|
|
64
64
|
### Performance
|
65
65
|
|
66
|
-
For version 0.13.7 the performance bottle
|
66
|
+
For version 0.13.7 the performance bottle neck has been identified and removed, ferret now delivers excellent indexing perfomance on all platforms, see numbers below.
|
67
67
|
On Windows performance is still not as good as on Linux, but that is equally true for Lucene and because of how the Windows filesystem works.
|
68
68
|
|
69
69
|
## Documentation
|
@@ -99,14 +99,20 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
|
|
99
99
|
|
100
100
|
A recent Java JDK must be installed to compile and run lucene benchmarks.
|
101
101
|
|
102
|
-
Results, Ferret 0.13.7 vs. Lucene 9.1.0, WhitespaceAnalyzer,
|
102
|
+
Results, Ferret 0.13.7 vs. Lucene 9.1.0, WhitespaceAnalyzer,
|
103
|
+
Linux Ubuntu 20.04, FreeBSD 13.0 and Windows 10 on old Intel Core i5 from 2015,
|
104
|
+
LinuxPi on RaspberryPi 400:
|
103
105
|
|
104
106
|
| OS | Task | Ferret | Lucene* |
|
105
107
|
|---------|------------|-----------------|----------------|
|
106
108
|
| Linux | Indexing | 4905 docs/s | 4785 docs/s |
|
109
|
+
| FreeBSD | Indexing | 4516 docs/s | - |
|
107
110
|
| Windows | Indexing | 2361 docs/s | 2395 docs/s |
|
111
|
+
| LinuxPi | Indexing | 1161 docs/s | 707 docs/s |
|
108
112
|
| Linux | Searching | 25664 queries/s | 4708 queries/s |
|
113
|
+
| FreeBSD | Searching | 25073 queries/s | - |
|
109
114
|
| Windows | Searching | 3646 queries/s | 935 queries/s |
|
115
|
+
| LinuxPi | Searching | 5768 queries/s | 680 queries/s |
|
110
116
|
| | Index Size | 28 MB | 35 MB |
|
111
117
|
|
112
118
|
*Lucene 9.1.0 on JVM 11.0.14.1 (Ubuntu)
|
@@ -6,7 +6,7 @@
|
|
6
6
|
#define N 10
|
7
7
|
#define write_byte(os, b) os->buf.buf[os->buf.pos++] = (frt_uchar)b
|
8
8
|
|
9
|
-
void my_os_write_voff_t(FrtOutStream *os, register
|
9
|
+
void my_os_write_voff_t(FrtOutStream *os, register frt_off_t num) {
|
10
10
|
if (!(num&0x7f)) {
|
11
11
|
if (os->buf.pos >= FRT_BUFFER_SIZE) {
|
12
12
|
frt_os_write_byte(os, (frt_uchar)num);
|
@@ -48,7 +48,7 @@ void my_os_write_voff_t(FrtOutStream *os, register off_t num) {
|
|
48
48
|
|
49
49
|
static void vint_out(void) {
|
50
50
|
int n;
|
51
|
-
|
51
|
+
frt_off_t i;
|
52
52
|
FrtOutStream *os;
|
53
53
|
|
54
54
|
for (n = 0; n < N; n++) {
|
@@ -63,7 +63,7 @@ static void vint_out(void) {
|
|
63
63
|
|
64
64
|
static void unrolled_vint_out(void) {
|
65
65
|
int n;
|
66
|
-
|
66
|
+
frt_off_t i;
|
67
67
|
FrtOutStream *os;
|
68
68
|
|
69
69
|
for (n = 0; n < N; n++) {
|
@@ -458,7 +458,7 @@ static VALUE frb_ts_next(VALUE self) {
|
|
458
458
|
|
459
459
|
static void frb_tf_mark(void *p) {
|
460
460
|
FrtTokenStream *ts = (FrtTokenStream *)p;
|
461
|
-
if (TkFilt(ts)->sub_ts->rts)
|
461
|
+
if (TkFilt(ts)->sub_ts && TkFilt(ts)->sub_ts->rts)
|
462
462
|
rb_gc_mark(TkFilt(ts)->sub_ts->rts);
|
463
463
|
}
|
464
464
|
|
@@ -1331,9 +1331,10 @@ static void frb_h_mark_values_i(void *key, void *value, void *arg) {
|
|
1331
1331
|
}
|
1332
1332
|
|
1333
1333
|
static void frb_pfa_mark(void *p) {
|
1334
|
-
if (PFA(p)->default_a->ranalyzer)
|
1334
|
+
if (PFA(p)->default_a && PFA(p)->default_a->ranalyzer)
|
1335
1335
|
rb_gc_mark(PFA(p)->default_a->ranalyzer);
|
1336
|
-
|
1336
|
+
if (PFA(p)->dict)
|
1337
|
+
frt_h_each(PFA(p)->dict, &frb_h_mark_values_i, NULL);
|
1337
1338
|
}
|
1338
1339
|
|
1339
1340
|
/*** PerFieldAnalyzer ***/
|
@@ -1370,6 +1371,8 @@ const rb_data_type_t frb_per_field_analyzer_t = {
|
|
1370
1371
|
|
1371
1372
|
static VALUE frb_per_field_analyzer_alloc(VALUE rclass) {
|
1372
1373
|
FrtAnalyzer *a = frt_per_field_analyzer_alloc();
|
1374
|
+
PFA(a)->default_a = NULL;
|
1375
|
+
PFA(a)->dict = NULL;
|
1373
1376
|
return TypedData_Wrap_Struct(rclass, &frb_per_field_analyzer_t, a);
|
1374
1377
|
}
|
1375
1378
|
|
@@ -1435,7 +1438,7 @@ static VALUE frb_pfa_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rstri
|
|
1435
1438
|
/*** RegExpAnalyzer ***/
|
1436
1439
|
|
1437
1440
|
static void frb_re_analyzer_mark(void *p) {
|
1438
|
-
if (((FrtAnalyzer *)p)->current_ts->rts)
|
1441
|
+
if (((FrtAnalyzer *)p)->current_ts && ((FrtAnalyzer *)p)->current_ts->rts)
|
1439
1442
|
rb_gc_mark(((FrtAnalyzer *)p)->current_ts->rts);
|
1440
1443
|
}
|
1441
1444
|
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#include "isomorfeus_ferret.h"
|
3
3
|
#include <ruby.h>
|
4
4
|
|
5
|
-
#undef close
|
5
|
+
// #undef close
|
6
6
|
|
7
7
|
VALUE mIndex;
|
8
8
|
|
@@ -429,6 +429,7 @@ static VALUE frb_get_field_infos(FrtFieldInfos *fis) {
|
|
429
429
|
|
430
430
|
static VALUE frb_fis_alloc(VALUE rclass) {
|
431
431
|
FrtFieldInfos *fis = frt_fis_alloc();
|
432
|
+
fis->size = 0;
|
432
433
|
return TypedData_Wrap_Struct(rclass, &frb_field_infos_t, fis);
|
433
434
|
}
|
434
435
|
|
@@ -535,7 +536,7 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
|
|
535
536
|
{
|
536
537
|
FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
|
537
538
|
FrtFieldInfo *fi;
|
538
|
-
FrtStoreValue
|
539
|
+
FrtStoreValue store_val = fis->store_val;
|
539
540
|
FrtCompressionType compression = fis->compression;
|
540
541
|
FrtIndexValue index = fis->index;
|
541
542
|
FrtTermVectorValue term_vector = fis->term_vector;
|
@@ -544,9 +545,9 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
|
|
544
545
|
|
545
546
|
rb_scan_args(argc, argv, "11", &rname, &roptions);
|
546
547
|
if (argc > 1) {
|
547
|
-
frb_fi_get_params(roptions, &
|
548
|
+
frb_fi_get_params(roptions, &store_val, &compression, &index, &term_vector, &boost);
|
548
549
|
}
|
549
|
-
fi = frt_fi_new(frb_field(rname),
|
550
|
+
fi = frt_fi_new(frb_field(rname), store_val, compression, index, term_vector);
|
550
551
|
fi->boost = boost;
|
551
552
|
frt_fis_add_field(fis, fi);
|
552
553
|
return self;
|
@@ -592,9 +593,7 @@ frb_fis_to_s(VALUE self)
|
|
592
593
|
*
|
593
594
|
* Return the number of fields in the FieldInfos object.
|
594
595
|
*/
|
595
|
-
static VALUE
|
596
|
-
frb_fis_size(VALUE self)
|
597
|
-
{
|
596
|
+
static VALUE frb_fis_size(VALUE self) {
|
598
597
|
FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
|
599
598
|
return INT2FIX(fis->size);
|
600
599
|
}
|
@@ -609,21 +608,19 @@ frb_fis_size(VALUE self)
|
|
609
608
|
* existing index (or other files for that matter) will be deleted from the
|
610
609
|
* directory and overwritten by the new index.
|
611
610
|
*/
|
612
|
-
static VALUE
|
613
|
-
frb_fis_create_index(VALUE self, VALUE rdir)
|
614
|
-
{
|
611
|
+
static VALUE frb_fis_create_index(VALUE self, VALUE rdir) {
|
615
612
|
FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
|
616
613
|
FrtStore *store = NULL;
|
617
614
|
if (TYPE(rdir) == T_DATA) {
|
618
615
|
store = DATA_PTR(rdir);
|
619
|
-
|
616
|
+
frt_index_create(store, fis);
|
620
617
|
} else {
|
621
618
|
StringValue(rdir);
|
622
619
|
frb_create_dir(rdir);
|
623
620
|
store = frt_open_fs_store(rs2s(rdir));
|
621
|
+
frt_index_create(store, fis);
|
622
|
+
frt_store_close(store);
|
624
623
|
}
|
625
|
-
frt_index_create(store, fis);
|
626
|
-
frt_store_deref(store);
|
627
624
|
return self;
|
628
625
|
}
|
629
626
|
|
@@ -674,8 +671,7 @@ frb_fis_get_tk_fields(VALUE self)
|
|
674
671
|
****************************************************************************/
|
675
672
|
|
676
673
|
static void frb_te_free(void *p) {
|
677
|
-
FrtTermEnum *
|
678
|
-
te->close(te);
|
674
|
+
((FrtTermEnum *)p)->close((FrtTermEnum *)p);
|
679
675
|
}
|
680
676
|
|
681
677
|
static size_t frb_te_size(const void *p) {
|
@@ -1344,6 +1340,8 @@ static VALUE frb_iw_alloc(VALUE rclass) {
|
|
1344
1340
|
return TypedData_Wrap_Struct(rclass, &frb_index_writer_t, iw);
|
1345
1341
|
}
|
1346
1342
|
|
1343
|
+
extern rb_data_type_t frb_store_t;
|
1344
|
+
|
1347
1345
|
static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
|
1348
1346
|
VALUE roptions, rval;
|
1349
1347
|
bool create = false;
|
@@ -1363,12 +1361,11 @@ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
|
|
1363
1361
|
|
1364
1362
|
if ((rval = rb_hash_aref(roptions, sym_dir)) != Qnil) {
|
1365
1363
|
// Check_Type(rval, T_DATA);
|
1366
|
-
|
1364
|
+
TypedData_Get_Struct(rval, FrtStore, &frb_store_t, store);
|
1367
1365
|
} else if ((rval = rb_hash_aref(roptions, sym_path)) != Qnil) {
|
1368
1366
|
StringValue(rval);
|
1369
1367
|
frb_create_dir(rval);
|
1370
1368
|
store = frt_open_fs_store(rs2s(rval));
|
1371
|
-
FRT_DEREF(store);
|
1372
1369
|
}
|
1373
1370
|
/* use_compound_file defaults to true */
|
1374
1371
|
config.use_compound_file =
|
@@ -1393,7 +1390,6 @@ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
|
|
1393
1390
|
}
|
1394
1391
|
if (NULL == store) {
|
1395
1392
|
store = frt_open_ram_store(NULL);
|
1396
|
-
FRT_DEREF(store);
|
1397
1393
|
}
|
1398
1394
|
if (!create && create_if_missing && !store->exists(store, "segments")) {
|
1399
1395
|
create = true;
|
@@ -1411,7 +1407,7 @@ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
|
|
1411
1407
|
}
|
1412
1408
|
|
1413
1409
|
TypedData_Get_Struct(self, FrtIndexWriter, &frb_index_writer_t, iw);
|
1414
|
-
|
1410
|
+
frt_iw_open(iw, store, analyzer, &config);
|
1415
1411
|
FRT_XCATCHALL
|
1416
1412
|
ex_code = xcontext.excode;
|
1417
1413
|
msg = xcontext.msg;
|
@@ -1598,9 +1594,9 @@ void frb_ir_free(void *p) {
|
|
1598
1594
|
|
1599
1595
|
void frb_ir_mark(void *p) {
|
1600
1596
|
FrtIndexReader *ir = (FrtIndexReader *)p;
|
1601
|
-
FrtMultiReader *mr = (FrtMultiReader *)p;
|
1602
1597
|
|
1603
1598
|
if (ir->type == FRT_MULTI_READER) {
|
1599
|
+
FrtMultiReader *mr = (FrtMultiReader *)p;
|
1604
1600
|
int i;
|
1605
1601
|
for (i = 0; i < mr->r_cnt; i++) {
|
1606
1602
|
if (mr->sub_readers[i]->rir)
|
@@ -1652,6 +1648,7 @@ static VALUE frb_iw_add_readers(VALUE self, VALUE rreaders) {
|
|
1652
1648
|
while (i-- > 0) {
|
1653
1649
|
FrtIndexReader *ir;
|
1654
1650
|
TypedData_Get_Struct(RARRAY_PTR(rreaders)[i], FrtIndexReader, &frb_index_reader_t, ir);
|
1651
|
+
FRT_REF(ir);
|
1655
1652
|
irs[i] = ir;
|
1656
1653
|
}
|
1657
1654
|
frt_iw_add_readers(iw, irs, RARRAY_LEN(rreaders));
|
@@ -2157,7 +2154,6 @@ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
|
|
2157
2154
|
VALUE rfield_num_map = rb_hash_new();
|
2158
2155
|
int ex_code = 0;
|
2159
2156
|
const char *msg = NULL;
|
2160
|
-
|
2161
2157
|
FRT_TRY
|
2162
2158
|
if (TYPE(rdir) == T_ARRAY) {
|
2163
2159
|
VALUE rdirs = rdir;
|
@@ -2170,7 +2166,6 @@ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
|
|
2170
2166
|
case T_DATA:
|
2171
2167
|
if (CLASS_OF(rdir) == cIndexReader) {
|
2172
2168
|
TypedData_Get_Struct(rdir, FrtIndexReader, &frb_index_reader_t, sub_readers[i]);
|
2173
|
-
FRT_REF(sub_readers[i]);
|
2174
2169
|
continue;
|
2175
2170
|
} else if (RTEST(rb_obj_is_kind_of(rdir, cDirectory))) {
|
2176
2171
|
store = DATA_PTR(rdir);
|
@@ -2185,7 +2180,6 @@ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
|
|
2185
2180
|
case T_STRING:
|
2186
2181
|
frb_create_dir(rdir);
|
2187
2182
|
store = frt_open_fs_store(rs2s(rdir));
|
2188
|
-
FRT_DEREF(store);
|
2189
2183
|
break;
|
2190
2184
|
default:
|
2191
2185
|
FRT_RAISE(FRT_ARG_ERROR, "%s isn't a valid directory "
|
@@ -2195,6 +2189,7 @@ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
|
|
2195
2189
|
break;
|
2196
2190
|
}
|
2197
2191
|
sub_readers[i] = frt_ir_open(NULL, store);
|
2192
|
+
FRT_DEREF(sub_readers[i]);
|
2198
2193
|
}
|
2199
2194
|
TypedData_Get_Struct(self, FrtIndexReader, &frb_index_reader_t, ir);
|
2200
2195
|
ir = frt_mr_open(ir, sub_readers, reader_cnt);
|
@@ -2206,7 +2201,6 @@ static VALUE frb_ir_init(VALUE self, VALUE rdir) {
|
|
2206
2201
|
case T_STRING:
|
2207
2202
|
frb_create_dir(rdir);
|
2208
2203
|
store = frt_open_fs_store(rs2s(rdir));
|
2209
|
-
FRT_DEREF(store);
|
2210
2204
|
break;
|
2211
2205
|
default:
|
2212
2206
|
FRT_RAISE(FRT_ARG_ERROR, "%s isn't a valid directory argument. "
|
@@ -34,7 +34,7 @@ static void frb_qp_free(void *p) {
|
|
34
34
|
}
|
35
35
|
|
36
36
|
static void frb_qp_mark(void *p) {
|
37
|
-
if (((FrtQParser *)p)->analyzer->ranalyzer)
|
37
|
+
if (((FrtQParser *)p)->analyzer && ((FrtQParser *)p)->analyzer->ranalyzer)
|
38
38
|
rb_gc_mark(((FrtQParser *)p)->analyzer->ranalyzer);
|
39
39
|
}
|
40
40
|
|
@@ -59,6 +59,7 @@ const rb_data_type_t frb_qp_t = {
|
|
59
59
|
|
60
60
|
static VALUE frb_qp_alloc(VALUE rclass) {
|
61
61
|
FrtQParser *qp = frt_qp_alloc();
|
62
|
+
qp->analyzer = NULL;
|
62
63
|
return TypedData_Wrap_Struct(rclass, &frb_qp_t, qp);
|
63
64
|
}
|
64
65
|
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#include "isomorfeus_ferret.h"
|
5
5
|
#include <ruby.h>
|
6
6
|
|
7
|
-
#undef close
|
7
|
+
// #undef close
|
8
8
|
|
9
9
|
VALUE mSearch;
|
10
10
|
|
@@ -713,7 +713,7 @@ static size_t frb_boolean_clause_t_size(const void *p) {
|
|
713
713
|
}
|
714
714
|
|
715
715
|
static void frb_bc_mark(void *p) {
|
716
|
-
if (((FrtBooleanClause *)p)->query->rquery)
|
716
|
+
if (((FrtBooleanClause *)p)->query && ((FrtBooleanClause *)p)->query->rquery)
|
717
717
|
rb_gc_mark(((FrtBooleanClause *)p)->query->rquery);
|
718
718
|
}
|
719
719
|
|
@@ -1837,11 +1837,11 @@ static size_t frb_filtered_query_size(const void *p) {
|
|
1837
1837
|
}
|
1838
1838
|
|
1839
1839
|
static void frb_fqq_mark(void *p) {
|
1840
|
-
FrtFilteredQuery *
|
1841
|
-
if (
|
1842
|
-
rb_gc_mark(
|
1843
|
-
if (
|
1844
|
-
rb_gc_mark(
|
1840
|
+
FrtFilteredQuery *fqq = (FrtFilteredQuery *)p;
|
1841
|
+
if (fqq->query && fqq->query->rquery)
|
1842
|
+
rb_gc_mark(fqq->query->rquery);
|
1843
|
+
if (fqq->filter && fqq->filter->rfilter)
|
1844
|
+
rb_gc_mark(fqq->filter->rfilter);
|
1845
1845
|
}
|
1846
1846
|
|
1847
1847
|
const rb_data_type_t frb_filtered_query_t = {
|
@@ -1860,6 +1860,8 @@ const rb_data_type_t frb_filtered_query_t = {
|
|
1860
1860
|
|
1861
1861
|
static VALUE frb_fqq_alloc(VALUE rclass) {
|
1862
1862
|
FrtQuery *fqq = frt_fq_alloc();
|
1863
|
+
((FrtFilteredQuery *)fqq)->query = NULL;
|
1864
|
+
((FrtFilteredQuery *)fqq)->filter = NULL;
|
1863
1865
|
return TypedData_Wrap_Struct(rclass, &frb_filtered_query_t, fqq);
|
1864
1866
|
}
|
1865
1867
|
|
@@ -2172,6 +2174,7 @@ const rb_data_type_t frb_span_near_query_t = {
|
|
2172
2174
|
|
2173
2175
|
static VALUE frb_spannq_alloc(VALUE rclass) {
|
2174
2176
|
FrtQuery *snq = frt_spannq_alloc();
|
2177
|
+
((FrtSpanNearQuery *)snq)->c_cnt = 0;
|
2175
2178
|
return TypedData_Wrap_Struct(rclass, &frb_span_near_query_t, snq);
|
2176
2179
|
}
|
2177
2180
|
|
@@ -2352,9 +2355,9 @@ static size_t frb_span_not_query_size(const void *p) {
|
|
2352
2355
|
|
2353
2356
|
static void frb_spanxq_mark(void *p) {
|
2354
2357
|
FrtSpanNotQuery *sxq = (FrtSpanNotQuery *)p;
|
2355
|
-
if (sxq->inc->rquery)
|
2358
|
+
if (sxq->inc && sxq->inc->rquery)
|
2356
2359
|
rb_gc_mark(sxq->inc->rquery);
|
2357
|
-
if (sxq->exc->rquery)
|
2360
|
+
if (sxq->exc && sxq->exc->rquery)
|
2358
2361
|
rb_gc_mark(sxq->exc->rquery);
|
2359
2362
|
}
|
2360
2363
|
|
@@ -3557,10 +3560,12 @@ static size_t frb_index_searcher_size(const void *p) {
|
|
3557
3560
|
|
3558
3561
|
static void frb_sea_mark(void *p) {
|
3559
3562
|
FrtIndexSearcher *isea = (FrtIndexSearcher *)p;
|
3560
|
-
if (isea->ir
|
3561
|
-
|
3562
|
-
|
3563
|
-
|
3563
|
+
if (isea->ir) {
|
3564
|
+
if (isea->ir->rir)
|
3565
|
+
rb_gc_mark(isea->ir->rir);
|
3566
|
+
if (isea->ir->store && isea->ir->store->rstore)
|
3567
|
+
rb_gc_mark(isea->ir->store->rstore);
|
3568
|
+
}
|
3564
3569
|
}
|
3565
3570
|
|
3566
3571
|
const rb_data_type_t frb_index_searcher_t = {
|
@@ -3578,8 +3583,9 @@ const rb_data_type_t frb_index_searcher_t = {
|
|
3578
3583
|
};
|
3579
3584
|
|
3580
3585
|
static VALUE frb_sea_alloc(VALUE rclass) {
|
3581
|
-
FrtSearcher *
|
3582
|
-
|
3586
|
+
FrtSearcher *sea = frt_isea_alloc();
|
3587
|
+
((FrtIndexSearcher *)sea)->ir = NULL;
|
3588
|
+
return TypedData_Wrap_Struct(rclass, &frb_index_searcher_t, sea);
|
3583
3589
|
}
|
3584
3590
|
|
3585
3591
|
/*
|
@@ -3600,7 +3606,6 @@ static VALUE frb_sea_init(VALUE self, VALUE obj) {
|
|
3600
3606
|
frb_create_dir(obj);
|
3601
3607
|
store = frt_open_fs_store(rs2s(obj));
|
3602
3608
|
ir = frt_ir_open(NULL, store);
|
3603
|
-
FRT_DEREF(store);
|
3604
3609
|
ir->rir = TypedData_Wrap_Struct(cIndexReader, &frb_index_reader_t, ir);
|
3605
3610
|
} else {
|
3606
3611
|
// Check_Type(obj, T_DATA);
|
@@ -3610,14 +3615,12 @@ static VALUE frb_sea_init(VALUE self, VALUE obj) {
|
|
3610
3615
|
ir->rir = TypedData_Wrap_Struct(cIndexReader, &frb_index_reader_t, ir);
|
3611
3616
|
} else if (rb_obj_is_kind_of(obj, cIndexReader) == Qtrue) {
|
3612
3617
|
TypedData_Get_Struct(obj, FrtIndexReader, &frb_index_reader_t, ir);
|
3613
|
-
ir->rir = obj;
|
3614
3618
|
} else {
|
3615
3619
|
rb_raise(rb_eArgError, "Unknown type for argument to IndexSearcher.new");
|
3616
3620
|
}
|
3617
3621
|
}
|
3618
3622
|
TypedData_Get_Struct(self, FrtSearcher, &frb_index_searcher_t, sea);
|
3619
3623
|
frt_isea_init(sea, ir);
|
3620
|
-
((FrtIndexSearcher *)sea)->close_ir = false;
|
3621
3624
|
sea->rsea = self;
|
3622
3625
|
return self;
|
3623
3626
|
}
|
@@ -3665,6 +3668,7 @@ const rb_data_type_t frb_multi_searcher_t = {
|
|
3665
3668
|
|
3666
3669
|
static VALUE frb_ms_alloc(VALUE rclass) {
|
3667
3670
|
FrtSearcher *s = frt_msea_alloc();
|
3671
|
+
((FrtMultiSearcher *)s)->s_cnt = 0;
|
3668
3672
|
return TypedData_Wrap_Struct(rclass, &frb_multi_searcher_t, s);
|
3669
3673
|
}
|
3670
3674
|
|
@@ -3703,7 +3707,7 @@ static VALUE frb_ms_init(int argc, VALUE *argv, VALUE self) {
|
|
3703
3707
|
}
|
3704
3708
|
}
|
3705
3709
|
TypedData_Get_Struct(self, FrtSearcher, &frb_multi_searcher_t, sea);
|
3706
|
-
frt_msea_init(sea, searchers, top
|
3710
|
+
frt_msea_init(sea, searchers, top);
|
3707
3711
|
sea->rsea = self;
|
3708
3712
|
return self;
|
3709
3713
|
}
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#include "isomorfeus_ferret.h"
|
3
3
|
#include <time.h>
|
4
4
|
|
5
|
-
#undef rename
|
5
|
+
// #undef rename
|
6
6
|
|
7
7
|
static ID id_ref_cnt;
|
8
8
|
VALUE cLock;
|
@@ -34,7 +34,7 @@ void frb_lock_free(void *p) {
|
|
34
34
|
|
35
35
|
void frb_lock_mark(void *p) {
|
36
36
|
FrtLock *lock = (FrtLock *)p;
|
37
|
-
if (lock->store->rstore)
|
37
|
+
if (lock->store && lock->store->rstore)
|
38
38
|
rb_gc_mark(lock->store->rstore);
|
39
39
|
}
|
40
40
|
|
@@ -185,10 +185,35 @@ static VALUE frb_lock_release(VALUE self) {
|
|
185
185
|
*
|
186
186
|
****************************************************************************/
|
187
187
|
|
188
|
+
/*** FrbStore ****************************************************************/
|
189
|
+
|
190
|
+
static size_t frb_store_size(const void *p) {
|
191
|
+
return sizeof(FrtStore);
|
192
|
+
(void)p;
|
193
|
+
}
|
194
|
+
|
188
195
|
void frb_dir_free(void *p) {
|
189
|
-
|
190
|
-
|
191
|
-
|
196
|
+
frb_unwrap_locks((FrtStore *)p);
|
197
|
+
frt_store_close((FrtStore *)p);
|
198
|
+
}
|
199
|
+
|
200
|
+
const rb_data_type_t frb_store_t = {
|
201
|
+
.wrap_struct_name = "FrbStore",
|
202
|
+
.function = {
|
203
|
+
.dmark = NULL,
|
204
|
+
.dfree = frb_dir_free,
|
205
|
+
.dsize = frb_store_size,
|
206
|
+
.dcompact = NULL,
|
207
|
+
.reserved = {0},
|
208
|
+
},
|
209
|
+
.parent = NULL,
|
210
|
+
.data = NULL,
|
211
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY
|
212
|
+
};
|
213
|
+
|
214
|
+
static VALUE frb_store_alloc(VALUE rclass) {
|
215
|
+
FrtStore *st = frt_store_alloc();
|
216
|
+
return TypedData_Wrap_Struct(rclass, &frb_store_t, st);
|
192
217
|
}
|
193
218
|
|
194
219
|
/*
|
@@ -208,7 +233,7 @@ static VALUE frb_dir_close(VALUE self) {
|
|
208
233
|
((struct RData *)(self))->dmark = NULL;
|
209
234
|
((struct RData *)(self))->dfree = NULL;
|
210
235
|
frb_unwrap_locks(store);
|
211
|
-
|
236
|
+
frt_store_close(store);
|
212
237
|
}
|
213
238
|
return Qnil;
|
214
239
|
}
|
@@ -308,32 +333,6 @@ static VALUE frb_dir_make_lock(VALUE self, VALUE rlock_name) {
|
|
308
333
|
return rlock;
|
309
334
|
}
|
310
335
|
|
311
|
-
/*** FrbStore ****************************************************************/
|
312
|
-
|
313
|
-
static size_t frb_store_size(const void *p) {
|
314
|
-
return sizeof(FrtStore);
|
315
|
-
(void)p;
|
316
|
-
}
|
317
|
-
|
318
|
-
const rb_data_type_t frb_store_t = {
|
319
|
-
.wrap_struct_name = "FrbStore",
|
320
|
-
.function = {
|
321
|
-
.dmark = NULL,
|
322
|
-
.dfree = frb_dir_free,
|
323
|
-
.dsize = frb_store_size,
|
324
|
-
.dcompact = NULL,
|
325
|
-
.reserved = {0},
|
326
|
-
},
|
327
|
-
.parent = NULL,
|
328
|
-
.data = NULL,
|
329
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY
|
330
|
-
};
|
331
|
-
|
332
|
-
static VALUE frb_store_alloc(VALUE rclass) {
|
333
|
-
FrtStore *st = frt_store_alloc();
|
334
|
-
return TypedData_Wrap_Struct(rclass, &frb_store_t, st);
|
335
|
-
}
|
336
|
-
|
337
336
|
/****************************************************************************
|
338
337
|
*
|
339
338
|
* RAMDirectory Methods
|
@@ -403,19 +402,18 @@ static VALUE frb_fsdir_new(int argc, VALUE *argv, VALUE klass) {
|
|
403
402
|
frb_create_dir(rpath);
|
404
403
|
}
|
405
404
|
if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
|
406
|
-
rb_raise(rb_eIOError, "No directory <%s> found. Use :create => true"
|
407
|
-
" to create one.", rs2s(rpath));
|
405
|
+
rb_raise(rb_eIOError, "No directory <%s> found. Use :create => true to create one.", rs2s(rpath));
|
408
406
|
}
|
409
407
|
store = frt_open_fs_store(rs2s(rpath));
|
410
408
|
if (create) store->clear_all(store);
|
411
|
-
|
409
|
+
self = store->rstore;
|
410
|
+
if (self == Qnil || DATA_PTR(self) == NULL) {
|
412
411
|
self = TypedData_Wrap_Struct(klass, &frb_store_t, store);
|
413
412
|
store->rstore = self;
|
414
413
|
rb_ivar_set(self, id_ref_cnt, INT2FIX(0));
|
415
414
|
} else {
|
416
415
|
int ref_cnt = FIX2INT(rb_ivar_get(self, id_ref_cnt)) + 1;
|
417
416
|
rb_ivar_set(self, id_ref_cnt, INT2FIX(ref_cnt));
|
418
|
-
FRT_DEREF(store);
|
419
417
|
}
|
420
418
|
return self;
|
421
419
|
}
|
@@ -64,7 +64,7 @@ static inline int get_cp(char *start, char *end, int *cp_len, rb_encoding *enc)
|
|
64
64
|
/*** FrtToken ****************************************************************/
|
65
65
|
/*****************************************************************************/
|
66
66
|
|
67
|
-
FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen,
|
67
|
+
FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
|
68
68
|
if (tlen >= FRT_MAX_WORD_SIZE) {
|
69
69
|
tlen = FRT_MAX_WORD_SIZE - 1; // TODO: this may invalidate mbc's
|
70
70
|
}
|
@@ -92,7 +92,7 @@ static FrtToken *frt_tk_set_ts(FrtToken *tk, char *start, char *end, char *text,
|
|
92
92
|
return frt_tk_set(tk, start, (int)(end - start), (off_t)(start - text), (off_t)(end - text), pos_inc, encoding);
|
93
93
|
}
|
94
94
|
|
95
|
-
FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text,
|
95
|
+
FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
|
96
96
|
return frt_tk_set(tk, text, (int)strlen(text), start, end, pos_inc, encoding);
|
97
97
|
}
|
98
98
|
|
@@ -133,7 +133,7 @@ FrtToken *frt_tk_new(void) {
|
|
133
133
|
/*****************************************************************************/
|
134
134
|
|
135
135
|
void frt_ts_deref(FrtTokenStream *ts) {
|
136
|
-
if (
|
136
|
+
if (FRT_DEREF(ts) == 0)
|
137
137
|
ts->destroy_i(ts);
|
138
138
|
}
|
139
139
|
|
@@ -1089,7 +1089,7 @@ FrtTokenStream *frt_stem_filter_new(FrtTokenStream *sub_ts, const char *algorith
|
|
1089
1089
|
/*****************************************************************************/
|
1090
1090
|
|
1091
1091
|
void frt_a_deref(FrtAnalyzer *a) {
|
1092
|
-
if (
|
1092
|
+
if (FRT_DEREF(a) == 0)
|
1093
1093
|
a->destroy_i(a);
|
1094
1094
|
}
|
1095
1095
|
|
@@ -13,15 +13,15 @@
|
|
13
13
|
typedef struct FrtToken {
|
14
14
|
char text[FRT_MAX_WORD_SIZE];
|
15
15
|
int len;
|
16
|
-
|
17
|
-
|
16
|
+
frt_off_t start;
|
17
|
+
frt_off_t end;
|
18
18
|
int pos_inc;
|
19
19
|
} FrtToken;
|
20
20
|
|
21
21
|
extern FrtToken *frt_tk_new();
|
22
22
|
extern void frt_tk_destroy(void *p);
|
23
|
-
extern FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen,
|
24
|
-
extern FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text,
|
23
|
+
extern FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
24
|
+
extern FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
25
25
|
extern int frt_tk_eq(FrtToken *tk1, FrtToken *tk2);
|
26
26
|
extern int frt_tk_cmp(FrtToken *tk1, FrtToken *tk2);
|
27
27
|
|
@@ -33,15 +33,15 @@ typedef struct FrtTokenStream FrtTokenStream;
|
|
33
33
|
struct FrtTokenStream {
|
34
34
|
char *t; /* ptr used to scan text */
|
35
35
|
char *text;
|
36
|
-
int
|
36
|
+
int length;
|
37
37
|
rb_encoding *encoding;
|
38
38
|
FrtToken *(*next)(FrtTokenStream *ts);
|
39
39
|
FrtTokenStream *(*reset)(FrtTokenStream *ts, char *text, rb_encoding *encoding);
|
40
40
|
FrtTokenStream *(*clone_i)(FrtTokenStream *ts);
|
41
41
|
void (*destroy_i)(FrtTokenStream *ts);
|
42
|
-
int
|
43
|
-
VALUE
|
44
|
-
FrtToken
|
42
|
+
_Atomic unsigned int ref_cnt;
|
43
|
+
VALUE rts;
|
44
|
+
FrtToken token;
|
45
45
|
};
|
46
46
|
|
47
47
|
extern FrtTokenStream *frt_ts_new_i(size_t size);
|
@@ -184,7 +184,7 @@ typedef struct FrtAnalyzer {
|
|
184
184
|
FrtTokenStream *current_ts;
|
185
185
|
FrtTokenStream *(*get_ts)(struct FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding);
|
186
186
|
void (*destroy_i)(struct FrtAnalyzer *a);
|
187
|
-
int
|
187
|
+
_Atomic unsigned int ref_cnt;
|
188
188
|
VALUE ranalyzer;
|
189
189
|
} FrtAnalyzer;
|
190
190
|
|