isomorfeus-ferret 0.13.10 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +40 -19
- data/ext/isomorfeus_ferret_ext/bzlib.h +83 -82
- data/ext/isomorfeus_ferret_ext/frb_index.c +55 -194
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +705 -0
- data/ext/isomorfeus_ferret_ext/frb_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_config.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_hash.h +6 -8
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +5 -5
- data/ext/isomorfeus_ferret_ext/frt_index.c +8 -3
- data/ext/isomorfeus_ferret_ext/frt_index.h +3 -1
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_store.h +1 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +8 -6
- data/lib/isomorfeus/ferret/index/index.rb +11 -8
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e6893e7012cf75189d3ff378b6e869a831a5281472f84ea5ab4e354bd92bfcee
|
4
|
+
data.tar.gz: 0a4cad49faae062c29e0bed8fd7f87c5e3875c548cbb6e168907719b52777306
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e48ec64d99af7fe0440480f11f22fd79dd9fca0c6dd09ce58bc0953f7f556ee4f61bdf18810f5cb2f39c8f80c99b406c5319c93235d06974009cefb1c73fccb
|
7
|
+
data.tar.gz: 254960eb7543fb59e1d12087f83feaeb4abd957314f76b56778baf7a2fef2e090922b4b99d653e8532e30690a14d90c661ec5f849588a7e384b2a65346f7f04d
|
data/README.md
CHANGED
@@ -11,13 +11,29 @@ At the [Isomorfeus Framework Project](https://isomorfeus.com)
|
|
11
11
|
|
12
12
|
## About this project
|
13
13
|
|
14
|
-
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain,
|
14
|
+
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain,
|
15
|
+
[https://github.com/dbalmain/ferret](https://github.com/dbalmain/ferret).
|
15
16
|
During revival many things havbe been fixed, now all tests pass, no crashes and it
|
16
17
|
successfully compiles and runs with rubys >3. Its no longer a goal to have
|
17
18
|
a c library available, but instead the usage is meant as ruby gem with a c extension only.
|
18
19
|
|
19
20
|
It works on *nixes, *nuxes, *BSDs and also works on Windows and RaspberryPi.
|
20
21
|
|
22
|
+
## Improvements and Changes in Version 0.14
|
23
|
+
|
24
|
+
### Breaking
|
25
|
+
|
26
|
+
- The API for LazyDocs has changed, they are read only now. LazyDoc#to_h may be used to create a hash, that may be changed and reindexed as doc.
|
27
|
+
|
28
|
+
### Performance
|
29
|
+
|
30
|
+
- LazyDoc is now truly lazy, fields are automatically retrieved. LazyDoc#load is no longer required, but may be used to preload all fields.
|
31
|
+
- Index#each is now multiple times faster, depending on use case.
|
32
|
+
|
33
|
+
### Other
|
34
|
+
|
35
|
+
- The Index class now includes Enumerable
|
36
|
+
|
21
37
|
## Improvements and Changes in Version 0.13
|
22
38
|
|
23
39
|
### Breaking
|
@@ -99,37 +115,42 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
|
|
99
115
|
|
100
116
|
A recent Java JDK must be installed to compile and run lucene benchmarks.
|
101
117
|
|
102
|
-
Results, Ferret 0.
|
103
|
-
Linux Ubuntu 20.04, FreeBSD 13.
|
118
|
+
Results, Ferret 0.14.0 vs. Lucene 9.1.0, WhitespaceAnalyzer,
|
119
|
+
Linux Ubuntu 20.04, FreeBSD 13.1 and Windows 10 on old Intel Core i5 from 2015,
|
104
120
|
LinuxPi on RaspberryPi 400:
|
105
121
|
|
106
122
|
| OS | Task | Ferret | Lucene* |
|
107
123
|
|---------|------------|-----------------|----------------|
|
108
|
-
| Linux | Indexing |
|
109
|
-
| FreeBSD | Indexing |
|
110
|
-
| Windows | Indexing |
|
111
|
-
| LinuxPi | Indexing |
|
112
|
-
| Linux | Searching |
|
113
|
-
| FreeBSD | Searching |
|
114
|
-
| Windows | Searching |
|
115
|
-
| LinuxPi | Searching |
|
124
|
+
| Linux | Indexing | 5125 docs/s | 4671 docs/s |
|
125
|
+
| FreeBSD | Indexing | 4537 docs/s | 3831 docs/s |
|
126
|
+
| Windows | Indexing | 2488 docs/s | 2588 docs/s |
|
127
|
+
| LinuxPi | Indexing | 1200 docs/s | 551 docs/s |
|
128
|
+
| Linux | Searching | 26610 queries/s | 7165 queries/s |
|
129
|
+
| FreeBSD | Searching | 24167 queries/s | 4288 queries/s |
|
130
|
+
| Windows | Searching | 3901 queries/s | 1033 queries/s |
|
131
|
+
| LinuxPi | Searching | 6194 queries/s | 769 queries/s |
|
116
132
|
| | Index Size | 28 MB | 35 MB |
|
117
133
|
|
118
|
-
*
|
134
|
+
* JVM Versions:
|
135
|
+
OpenJDK Runtime Environment (build 18-ea+36-Ubuntu-1) (Linux)
|
136
|
+
OpenJDK Runtime Environment (build 17.0.3+7-Raspbian-1deb11u1rpt1) (LinuxPi)
|
137
|
+
OpenJDK Runtime Environment Temurin-18.0.1+10 (build 18.0.1+10) (Windows)
|
138
|
+
OpenJDK Runtime Environment (build 17.0.2+8-1) (FreeBSD)
|
119
139
|
|
120
140
|
### Storing Fields with Compression, Indexing and Retrieval
|
141
|
+
|
121
142
|
- clone repo
|
122
143
|
- bundle install
|
123
144
|
- rake ferret_compression_benchmark
|
124
145
|
|
125
|
-
Results on Linux, 0.
|
146
|
+
Results on Linux, 0.14.0, on old Intel Core i5 from 2015:
|
126
147
|
|
127
|
-
| Compression | Index & Store | Retrieve
|
128
|
-
|
129
|
-
| none |
|
130
|
-
| brotli |
|
131
|
-
| bzip2 |
|
132
|
-
| lz4 |
|
148
|
+
| Compression | Index & Store | Retrieve Title | Index size |
|
149
|
+
|-------------|---------------|----------------|------------|
|
150
|
+
| none | 4862 docs/s | 278827 docs/s | 43 MB |
|
151
|
+
| brotli | 3559 docs/s | 178170 docs/s | 36 MB |
|
152
|
+
| bzip2 | 2628 docs/s | 81877 docs/s | 38 MB |
|
153
|
+
| lz4 | 4648 docs/s | 232236 docs/s | 41 MB |
|
133
154
|
|
134
155
|
## Future
|
135
156
|
|
@@ -11,7 +11,7 @@
|
|
11
11
|
bzip2/libbzip2 version 1.0.8 of 13 July 2019
|
12
12
|
Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
|
13
13
|
|
14
|
-
Please read the WARNING, DISCLAIMER and PATENTS sections in the
|
14
|
+
Please read the WARNING, DISCLAIMER and PATENTS sections in the
|
15
15
|
README file.
|
16
16
|
|
17
17
|
This program is released under the terms of the license contained
|
@@ -25,6 +25,7 @@
|
|
25
25
|
#ifdef __cplusplus
|
26
26
|
extern "C" {
|
27
27
|
#endif
|
28
|
+
#define BZ_DEBUG 0
|
28
29
|
|
29
30
|
#define BZ_RUN 0
|
30
31
|
#define BZ_FLUSH 1
|
@@ -45,7 +46,7 @@ extern "C" {
|
|
45
46
|
#define BZ_OUTBUFF_FULL (-8)
|
46
47
|
#define BZ_CONFIG_ERROR (-9)
|
47
48
|
|
48
|
-
typedef
|
49
|
+
typedef
|
49
50
|
struct {
|
50
51
|
char *next_in;
|
51
52
|
unsigned int avail_in;
|
@@ -62,7 +63,7 @@ typedef
|
|
62
63
|
void *(*bzalloc)(void *,int,int);
|
63
64
|
void (*bzfree)(void *,void *);
|
64
65
|
void *opaque;
|
65
|
-
}
|
66
|
+
}
|
66
67
|
bz_stream;
|
67
68
|
|
68
69
|
|
@@ -97,34 +98,34 @@ typedef
|
|
97
98
|
|
98
99
|
/*-- Core (low-level) library functions --*/
|
99
100
|
|
100
|
-
BZ_EXTERN int BZ_API(BZ2_bzCompressInit) (
|
101
|
-
bz_stream* strm,
|
102
|
-
int blockSize100k,
|
103
|
-
int verbosity,
|
104
|
-
int workFactor
|
101
|
+
BZ_EXTERN int BZ_API(BZ2_bzCompressInit) (
|
102
|
+
bz_stream* strm,
|
103
|
+
int blockSize100k,
|
104
|
+
int verbosity,
|
105
|
+
int workFactor
|
105
106
|
);
|
106
107
|
|
107
|
-
BZ_EXTERN int BZ_API(BZ2_bzCompress) (
|
108
|
-
bz_stream* strm,
|
109
|
-
int action
|
108
|
+
BZ_EXTERN int BZ_API(BZ2_bzCompress) (
|
109
|
+
bz_stream* strm,
|
110
|
+
int action
|
110
111
|
);
|
111
112
|
|
112
|
-
BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) (
|
113
|
-
bz_stream* strm
|
113
|
+
BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) (
|
114
|
+
bz_stream* strm
|
114
115
|
);
|
115
116
|
|
116
|
-
BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) (
|
117
|
-
bz_stream *strm,
|
118
|
-
int verbosity,
|
117
|
+
BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) (
|
118
|
+
bz_stream *strm,
|
119
|
+
int verbosity,
|
119
120
|
int small
|
120
121
|
);
|
121
122
|
|
122
|
-
BZ_EXTERN int BZ_API(BZ2_bzDecompress) (
|
123
|
-
bz_stream* strm
|
123
|
+
BZ_EXTERN int BZ_API(BZ2_bzDecompress) (
|
124
|
+
bz_stream* strm
|
124
125
|
);
|
125
126
|
|
126
|
-
BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
|
127
|
-
bz_stream *strm
|
127
|
+
BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
|
128
|
+
bz_stream *strm
|
128
129
|
);
|
129
130
|
|
130
131
|
|
@@ -136,64 +137,64 @@ BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
|
|
136
137
|
|
137
138
|
typedef void BZFILE;
|
138
139
|
|
139
|
-
BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) (
|
140
|
-
int* bzerror,
|
141
|
-
FILE* f,
|
142
|
-
int verbosity,
|
140
|
+
BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) (
|
141
|
+
int* bzerror,
|
142
|
+
FILE* f,
|
143
|
+
int verbosity,
|
143
144
|
int small,
|
144
|
-
void* unused,
|
145
|
-
int nUnused
|
145
|
+
void* unused,
|
146
|
+
int nUnused
|
146
147
|
);
|
147
148
|
|
148
|
-
BZ_EXTERN void BZ_API(BZ2_bzReadClose) (
|
149
|
-
int* bzerror,
|
150
|
-
BZFILE* b
|
149
|
+
BZ_EXTERN void BZ_API(BZ2_bzReadClose) (
|
150
|
+
int* bzerror,
|
151
|
+
BZFILE* b
|
151
152
|
);
|
152
153
|
|
153
|
-
BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
|
154
|
-
int* bzerror,
|
155
|
-
BZFILE* b,
|
156
|
-
void** unused,
|
157
|
-
int* nUnused
|
154
|
+
BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
|
155
|
+
int* bzerror,
|
156
|
+
BZFILE* b,
|
157
|
+
void** unused,
|
158
|
+
int* nUnused
|
158
159
|
);
|
159
160
|
|
160
|
-
BZ_EXTERN int BZ_API(BZ2_bzRead) (
|
161
|
-
int* bzerror,
|
162
|
-
BZFILE* b,
|
163
|
-
void* buf,
|
164
|
-
int len
|
161
|
+
BZ_EXTERN int BZ_API(BZ2_bzRead) (
|
162
|
+
int* bzerror,
|
163
|
+
BZFILE* b,
|
164
|
+
void* buf,
|
165
|
+
int len
|
165
166
|
);
|
166
167
|
|
167
|
-
BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
|
168
|
-
int* bzerror,
|
169
|
-
FILE* f,
|
170
|
-
int blockSize100k,
|
171
|
-
int verbosity,
|
172
|
-
int workFactor
|
168
|
+
BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
|
169
|
+
int* bzerror,
|
170
|
+
FILE* f,
|
171
|
+
int blockSize100k,
|
172
|
+
int verbosity,
|
173
|
+
int workFactor
|
173
174
|
);
|
174
175
|
|
175
|
-
BZ_EXTERN void BZ_API(BZ2_bzWrite) (
|
176
|
-
int* bzerror,
|
177
|
-
BZFILE* b,
|
178
|
-
void* buf,
|
179
|
-
int len
|
176
|
+
BZ_EXTERN void BZ_API(BZ2_bzWrite) (
|
177
|
+
int* bzerror,
|
178
|
+
BZFILE* b,
|
179
|
+
void* buf,
|
180
|
+
int len
|
180
181
|
);
|
181
182
|
|
182
|
-
BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
|
183
|
-
int* bzerror,
|
184
|
-
BZFILE* b,
|
185
|
-
int abandon,
|
186
|
-
unsigned int* nbytes_in,
|
187
|
-
unsigned int* nbytes_out
|
183
|
+
BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
|
184
|
+
int* bzerror,
|
185
|
+
BZFILE* b,
|
186
|
+
int abandon,
|
187
|
+
unsigned int* nbytes_in,
|
188
|
+
unsigned int* nbytes_out
|
188
189
|
);
|
189
190
|
|
190
|
-
BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
|
191
|
-
int* bzerror,
|
192
|
-
BZFILE* b,
|
193
|
-
int abandon,
|
194
|
-
unsigned int* nbytes_in_lo32,
|
195
|
-
unsigned int* nbytes_in_hi32,
|
196
|
-
unsigned int* nbytes_out_lo32,
|
191
|
+
BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
|
192
|
+
int* bzerror,
|
193
|
+
BZFILE* b,
|
194
|
+
int abandon,
|
195
|
+
unsigned int* nbytes_in_lo32,
|
196
|
+
unsigned int* nbytes_in_hi32,
|
197
|
+
unsigned int* nbytes_out_lo32,
|
197
198
|
unsigned int* nbytes_out_hi32
|
198
199
|
);
|
199
200
|
#endif
|
@@ -201,23 +202,23 @@ BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
|
|
201
202
|
|
202
203
|
/*-- Utility functions --*/
|
203
204
|
|
204
|
-
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
|
205
|
-
char* dest,
|
205
|
+
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
|
206
|
+
char* dest,
|
206
207
|
unsigned int* destLen,
|
207
|
-
char* source,
|
208
|
+
char* source,
|
208
209
|
unsigned int sourceLen,
|
209
|
-
int blockSize100k,
|
210
|
-
int verbosity,
|
211
|
-
int workFactor
|
210
|
+
int blockSize100k,
|
211
|
+
int verbosity,
|
212
|
+
int workFactor
|
212
213
|
);
|
213
214
|
|
214
|
-
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
|
215
|
-
char* dest,
|
215
|
+
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
|
216
|
+
char* dest,
|
216
217
|
unsigned int* destLen,
|
217
|
-
char* source,
|
218
|
+
char* source,
|
218
219
|
unsigned int sourceLen,
|
219
|
-
int small,
|
220
|
-
int verbosity
|
220
|
+
int small,
|
221
|
+
int verbosity
|
221
222
|
);
|
222
223
|
|
223
224
|
|
@@ -244,17 +245,17 @@ BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
|
|
244
245
|
int fd,
|
245
246
|
const char *mode
|
246
247
|
);
|
247
|
-
|
248
|
+
|
248
249
|
BZ_EXTERN int BZ_API(BZ2_bzread) (
|
249
|
-
BZFILE* b,
|
250
|
-
void* buf,
|
251
|
-
int len
|
250
|
+
BZFILE* b,
|
251
|
+
void* buf,
|
252
|
+
int len
|
252
253
|
);
|
253
254
|
|
254
255
|
BZ_EXTERN int BZ_API(BZ2_bzwrite) (
|
255
|
-
BZFILE* b,
|
256
|
-
void* buf,
|
257
|
-
int len
|
256
|
+
BZFILE* b,
|
257
|
+
void* buf,
|
258
|
+
int len
|
258
259
|
);
|
259
260
|
|
260
261
|
BZ_EXTERN int BZ_API(BZ2_bzflush) (
|
@@ -266,7 +267,7 @@ BZ_EXTERN void BZ_API(BZ2_bzclose) (
|
|
266
267
|
);
|
267
268
|
|
268
269
|
BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
|
269
|
-
BZFILE *b,
|
270
|
+
BZFILE *b,
|
270
271
|
int *errnum
|
271
272
|
);
|
272
273
|
#endif
|