isomorfeus-ferret 0.13.10 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +40 -19
- data/ext/isomorfeus_ferret_ext/bzlib.h +83 -82
- data/ext/isomorfeus_ferret_ext/frb_index.c +55 -194
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +705 -0
- data/ext/isomorfeus_ferret_ext/frb_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_config.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_hash.h +6 -8
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +5 -5
- data/ext/isomorfeus_ferret_ext/frt_index.c +8 -3
- data/ext/isomorfeus_ferret_ext/frt_index.h +3 -1
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_store.h +1 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +8 -6
- data/lib/isomorfeus/ferret/index/index.rb +11 -8
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e6893e7012cf75189d3ff378b6e869a831a5281472f84ea5ab4e354bd92bfcee
|
4
|
+
data.tar.gz: 0a4cad49faae062c29e0bed8fd7f87c5e3875c548cbb6e168907719b52777306
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e48ec64d99af7fe0440480f11f22fd79dd9fca0c6dd09ce58bc0953f7f556ee4f61bdf18810f5cb2f39c8f80c99b406c5319c93235d06974009cefb1c73fccb
|
7
|
+
data.tar.gz: 254960eb7543fb59e1d12087f83feaeb4abd957314f76b56778baf7a2fef2e090922b4b99d653e8532e30690a14d90c661ec5f849588a7e384b2a65346f7f04d
|
data/README.md
CHANGED
@@ -11,13 +11,29 @@ At the [Isomorfeus Framework Project](https://isomorfeus.com)
|
|
11
11
|
|
12
12
|
## About this project
|
13
13
|
|
14
|
-
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain,
|
14
|
+
Isomorfeus-Ferret is a revived version of the original ferret gem created by Dave Balmain,
|
15
|
+
[https://github.com/dbalmain/ferret](https://github.com/dbalmain/ferret).
|
15
16
|
During revival many things havbe been fixed, now all tests pass, no crashes and it
|
16
17
|
successfully compiles and runs with rubys >3. Its no longer a goal to have
|
17
18
|
a c library available, but instead the usage is meant as ruby gem with a c extension only.
|
18
19
|
|
19
20
|
It works on *nixes, *nuxes, *BSDs and also works on Windows and RaspberryPi.
|
20
21
|
|
22
|
+
## Improvements and Changes in Version 0.14
|
23
|
+
|
24
|
+
### Breaking
|
25
|
+
|
26
|
+
- The API for LazyDocs has changed, they are read only now. LazyDoc#to_h may be used to create a hash, that may be changed and reindexed as doc.
|
27
|
+
|
28
|
+
### Performance
|
29
|
+
|
30
|
+
- LazyDoc is now truly lazy, fields are automatically retrieved. LazyDoc#load is no longer required, but may be used to preload all fields.
|
31
|
+
- Index#each is now multiple times faster, depending on use case.
|
32
|
+
|
33
|
+
### Other
|
34
|
+
|
35
|
+
- The Index class now includes Enumerable
|
36
|
+
|
21
37
|
## Improvements and Changes in Version 0.13
|
22
38
|
|
23
39
|
### Breaking
|
@@ -99,37 +115,42 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
|
|
99
115
|
|
100
116
|
A recent Java JDK must be installed to compile and run lucene benchmarks.
|
101
117
|
|
102
|
-
Results, Ferret 0.
|
103
|
-
Linux Ubuntu 20.04, FreeBSD 13.
|
118
|
+
Results, Ferret 0.14.0 vs. Lucene 9.1.0, WhitespaceAnalyzer,
|
119
|
+
Linux Ubuntu 20.04, FreeBSD 13.1 and Windows 10 on old Intel Core i5 from 2015,
|
104
120
|
LinuxPi on RaspberryPi 400:
|
105
121
|
|
106
122
|
| OS | Task | Ferret | Lucene* |
|
107
123
|
|---------|------------|-----------------|----------------|
|
108
|
-
| Linux | Indexing |
|
109
|
-
| FreeBSD | Indexing |
|
110
|
-
| Windows | Indexing |
|
111
|
-
| LinuxPi | Indexing |
|
112
|
-
| Linux | Searching |
|
113
|
-
| FreeBSD | Searching |
|
114
|
-
| Windows | Searching |
|
115
|
-
| LinuxPi | Searching |
|
124
|
+
| Linux | Indexing | 5125 docs/s | 4671 docs/s |
|
125
|
+
| FreeBSD | Indexing | 4537 docs/s | 3831 docs/s |
|
126
|
+
| Windows | Indexing | 2488 docs/s | 2588 docs/s |
|
127
|
+
| LinuxPi | Indexing | 1200 docs/s | 551 docs/s |
|
128
|
+
| Linux | Searching | 26610 queries/s | 7165 queries/s |
|
129
|
+
| FreeBSD | Searching | 24167 queries/s | 4288 queries/s |
|
130
|
+
| Windows | Searching | 3901 queries/s | 1033 queries/s |
|
131
|
+
| LinuxPi | Searching | 6194 queries/s | 769 queries/s |
|
116
132
|
| | Index Size | 28 MB | 35 MB |
|
117
133
|
|
118
|
-
*
|
134
|
+
* JVM Versions:
|
135
|
+
OpenJDK Runtime Environment (build 18-ea+36-Ubuntu-1) (Linux)
|
136
|
+
OpenJDK Runtime Environment (build 17.0.3+7-Raspbian-1deb11u1rpt1) (LinuxPi)
|
137
|
+
OpenJDK Runtime Environment Temurin-18.0.1+10 (build 18.0.1+10) (Windows)
|
138
|
+
OpenJDK Runtime Environment (build 17.0.2+8-1) (FreeBSD)
|
119
139
|
|
120
140
|
### Storing Fields with Compression, Indexing and Retrieval
|
141
|
+
|
121
142
|
- clone repo
|
122
143
|
- bundle install
|
123
144
|
- rake ferret_compression_benchmark
|
124
145
|
|
125
|
-
Results on Linux, 0.
|
146
|
+
Results on Linux, 0.14.0, on old Intel Core i5 from 2015:
|
126
147
|
|
127
|
-
| Compression | Index & Store | Retrieve
|
128
|
-
|
129
|
-
| none |
|
130
|
-
| brotli |
|
131
|
-
| bzip2 |
|
132
|
-
| lz4 |
|
148
|
+
| Compression | Index & Store | Retrieve Title | Index size |
|
149
|
+
|-------------|---------------|----------------|------------|
|
150
|
+
| none | 4862 docs/s | 278827 docs/s | 43 MB |
|
151
|
+
| brotli | 3559 docs/s | 178170 docs/s | 36 MB |
|
152
|
+
| bzip2 | 2628 docs/s | 81877 docs/s | 38 MB |
|
153
|
+
| lz4 | 4648 docs/s | 232236 docs/s | 41 MB |
|
133
154
|
|
134
155
|
## Future
|
135
156
|
|
@@ -11,7 +11,7 @@
|
|
11
11
|
bzip2/libbzip2 version 1.0.8 of 13 July 2019
|
12
12
|
Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
|
13
13
|
|
14
|
-
Please read the WARNING, DISCLAIMER and PATENTS sections in the
|
14
|
+
Please read the WARNING, DISCLAIMER and PATENTS sections in the
|
15
15
|
README file.
|
16
16
|
|
17
17
|
This program is released under the terms of the license contained
|
@@ -25,6 +25,7 @@
|
|
25
25
|
#ifdef __cplusplus
|
26
26
|
extern "C" {
|
27
27
|
#endif
|
28
|
+
#define BZ_DEBUG 0
|
28
29
|
|
29
30
|
#define BZ_RUN 0
|
30
31
|
#define BZ_FLUSH 1
|
@@ -45,7 +46,7 @@ extern "C" {
|
|
45
46
|
#define BZ_OUTBUFF_FULL (-8)
|
46
47
|
#define BZ_CONFIG_ERROR (-9)
|
47
48
|
|
48
|
-
typedef
|
49
|
+
typedef
|
49
50
|
struct {
|
50
51
|
char *next_in;
|
51
52
|
unsigned int avail_in;
|
@@ -62,7 +63,7 @@ typedef
|
|
62
63
|
void *(*bzalloc)(void *,int,int);
|
63
64
|
void (*bzfree)(void *,void *);
|
64
65
|
void *opaque;
|
65
|
-
}
|
66
|
+
}
|
66
67
|
bz_stream;
|
67
68
|
|
68
69
|
|
@@ -97,34 +98,34 @@ typedef
|
|
97
98
|
|
98
99
|
/*-- Core (low-level) library functions --*/
|
99
100
|
|
100
|
-
BZ_EXTERN int BZ_API(BZ2_bzCompressInit) (
|
101
|
-
bz_stream* strm,
|
102
|
-
int blockSize100k,
|
103
|
-
int verbosity,
|
104
|
-
int workFactor
|
101
|
+
BZ_EXTERN int BZ_API(BZ2_bzCompressInit) (
|
102
|
+
bz_stream* strm,
|
103
|
+
int blockSize100k,
|
104
|
+
int verbosity,
|
105
|
+
int workFactor
|
105
106
|
);
|
106
107
|
|
107
|
-
BZ_EXTERN int BZ_API(BZ2_bzCompress) (
|
108
|
-
bz_stream* strm,
|
109
|
-
int action
|
108
|
+
BZ_EXTERN int BZ_API(BZ2_bzCompress) (
|
109
|
+
bz_stream* strm,
|
110
|
+
int action
|
110
111
|
);
|
111
112
|
|
112
|
-
BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) (
|
113
|
-
bz_stream* strm
|
113
|
+
BZ_EXTERN int BZ_API(BZ2_bzCompressEnd) (
|
114
|
+
bz_stream* strm
|
114
115
|
);
|
115
116
|
|
116
|
-
BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) (
|
117
|
-
bz_stream *strm,
|
118
|
-
int verbosity,
|
117
|
+
BZ_EXTERN int BZ_API(BZ2_bzDecompressInit) (
|
118
|
+
bz_stream *strm,
|
119
|
+
int verbosity,
|
119
120
|
int small
|
120
121
|
);
|
121
122
|
|
122
|
-
BZ_EXTERN int BZ_API(BZ2_bzDecompress) (
|
123
|
-
bz_stream* strm
|
123
|
+
BZ_EXTERN int BZ_API(BZ2_bzDecompress) (
|
124
|
+
bz_stream* strm
|
124
125
|
);
|
125
126
|
|
126
|
-
BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
|
127
|
-
bz_stream *strm
|
127
|
+
BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
|
128
|
+
bz_stream *strm
|
128
129
|
);
|
129
130
|
|
130
131
|
|
@@ -136,64 +137,64 @@ BZ_EXTERN int BZ_API(BZ2_bzDecompressEnd) (
|
|
136
137
|
|
137
138
|
typedef void BZFILE;
|
138
139
|
|
139
|
-
BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) (
|
140
|
-
int* bzerror,
|
141
|
-
FILE* f,
|
142
|
-
int verbosity,
|
140
|
+
BZ_EXTERN BZFILE* BZ_API(BZ2_bzReadOpen) (
|
141
|
+
int* bzerror,
|
142
|
+
FILE* f,
|
143
|
+
int verbosity,
|
143
144
|
int small,
|
144
|
-
void* unused,
|
145
|
-
int nUnused
|
145
|
+
void* unused,
|
146
|
+
int nUnused
|
146
147
|
);
|
147
148
|
|
148
|
-
BZ_EXTERN void BZ_API(BZ2_bzReadClose) (
|
149
|
-
int* bzerror,
|
150
|
-
BZFILE* b
|
149
|
+
BZ_EXTERN void BZ_API(BZ2_bzReadClose) (
|
150
|
+
int* bzerror,
|
151
|
+
BZFILE* b
|
151
152
|
);
|
152
153
|
|
153
|
-
BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
|
154
|
-
int* bzerror,
|
155
|
-
BZFILE* b,
|
156
|
-
void** unused,
|
157
|
-
int* nUnused
|
154
|
+
BZ_EXTERN void BZ_API(BZ2_bzReadGetUnused) (
|
155
|
+
int* bzerror,
|
156
|
+
BZFILE* b,
|
157
|
+
void** unused,
|
158
|
+
int* nUnused
|
158
159
|
);
|
159
160
|
|
160
|
-
BZ_EXTERN int BZ_API(BZ2_bzRead) (
|
161
|
-
int* bzerror,
|
162
|
-
BZFILE* b,
|
163
|
-
void* buf,
|
164
|
-
int len
|
161
|
+
BZ_EXTERN int BZ_API(BZ2_bzRead) (
|
162
|
+
int* bzerror,
|
163
|
+
BZFILE* b,
|
164
|
+
void* buf,
|
165
|
+
int len
|
165
166
|
);
|
166
167
|
|
167
|
-
BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
|
168
|
-
int* bzerror,
|
169
|
-
FILE* f,
|
170
|
-
int blockSize100k,
|
171
|
-
int verbosity,
|
172
|
-
int workFactor
|
168
|
+
BZ_EXTERN BZFILE* BZ_API(BZ2_bzWriteOpen) (
|
169
|
+
int* bzerror,
|
170
|
+
FILE* f,
|
171
|
+
int blockSize100k,
|
172
|
+
int verbosity,
|
173
|
+
int workFactor
|
173
174
|
);
|
174
175
|
|
175
|
-
BZ_EXTERN void BZ_API(BZ2_bzWrite) (
|
176
|
-
int* bzerror,
|
177
|
-
BZFILE* b,
|
178
|
-
void* buf,
|
179
|
-
int len
|
176
|
+
BZ_EXTERN void BZ_API(BZ2_bzWrite) (
|
177
|
+
int* bzerror,
|
178
|
+
BZFILE* b,
|
179
|
+
void* buf,
|
180
|
+
int len
|
180
181
|
);
|
181
182
|
|
182
|
-
BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
|
183
|
-
int* bzerror,
|
184
|
-
BZFILE* b,
|
185
|
-
int abandon,
|
186
|
-
unsigned int* nbytes_in,
|
187
|
-
unsigned int* nbytes_out
|
183
|
+
BZ_EXTERN void BZ_API(BZ2_bzWriteClose) (
|
184
|
+
int* bzerror,
|
185
|
+
BZFILE* b,
|
186
|
+
int abandon,
|
187
|
+
unsigned int* nbytes_in,
|
188
|
+
unsigned int* nbytes_out
|
188
189
|
);
|
189
190
|
|
190
|
-
BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
|
191
|
-
int* bzerror,
|
192
|
-
BZFILE* b,
|
193
|
-
int abandon,
|
194
|
-
unsigned int* nbytes_in_lo32,
|
195
|
-
unsigned int* nbytes_in_hi32,
|
196
|
-
unsigned int* nbytes_out_lo32,
|
191
|
+
BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
|
192
|
+
int* bzerror,
|
193
|
+
BZFILE* b,
|
194
|
+
int abandon,
|
195
|
+
unsigned int* nbytes_in_lo32,
|
196
|
+
unsigned int* nbytes_in_hi32,
|
197
|
+
unsigned int* nbytes_out_lo32,
|
197
198
|
unsigned int* nbytes_out_hi32
|
198
199
|
);
|
199
200
|
#endif
|
@@ -201,23 +202,23 @@ BZ_EXTERN void BZ_API(BZ2_bzWriteClose64) (
|
|
201
202
|
|
202
203
|
/*-- Utility functions --*/
|
203
204
|
|
204
|
-
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
|
205
|
-
char* dest,
|
205
|
+
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffCompress) (
|
206
|
+
char* dest,
|
206
207
|
unsigned int* destLen,
|
207
|
-
char* source,
|
208
|
+
char* source,
|
208
209
|
unsigned int sourceLen,
|
209
|
-
int blockSize100k,
|
210
|
-
int verbosity,
|
211
|
-
int workFactor
|
210
|
+
int blockSize100k,
|
211
|
+
int verbosity,
|
212
|
+
int workFactor
|
212
213
|
);
|
213
214
|
|
214
|
-
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
|
215
|
-
char* dest,
|
215
|
+
BZ_EXTERN int BZ_API(BZ2_bzBuffToBuffDecompress) (
|
216
|
+
char* dest,
|
216
217
|
unsigned int* destLen,
|
217
|
-
char* source,
|
218
|
+
char* source,
|
218
219
|
unsigned int sourceLen,
|
219
|
-
int small,
|
220
|
-
int verbosity
|
220
|
+
int small,
|
221
|
+
int verbosity
|
221
222
|
);
|
222
223
|
|
223
224
|
|
@@ -244,17 +245,17 @@ BZ_EXTERN BZFILE * BZ_API(BZ2_bzdopen) (
|
|
244
245
|
int fd,
|
245
246
|
const char *mode
|
246
247
|
);
|
247
|
-
|
248
|
+
|
248
249
|
BZ_EXTERN int BZ_API(BZ2_bzread) (
|
249
|
-
BZFILE* b,
|
250
|
-
void* buf,
|
251
|
-
int len
|
250
|
+
BZFILE* b,
|
251
|
+
void* buf,
|
252
|
+
int len
|
252
253
|
);
|
253
254
|
|
254
255
|
BZ_EXTERN int BZ_API(BZ2_bzwrite) (
|
255
|
-
BZFILE* b,
|
256
|
-
void* buf,
|
257
|
-
int len
|
256
|
+
BZFILE* b,
|
257
|
+
void* buf,
|
258
|
+
int len
|
258
259
|
);
|
259
260
|
|
260
261
|
BZ_EXTERN int BZ_API(BZ2_bzflush) (
|
@@ -266,7 +267,7 @@ BZ_EXTERN void BZ_API(BZ2_bzclose) (
|
|
266
267
|
);
|
267
268
|
|
268
269
|
BZ_EXTERN const char * BZ_API(BZ2_bzerror) (
|
269
|
-
BZFILE *b,
|
270
|
+
BZFILE *b,
|
270
271
|
int *errnum
|
271
272
|
);
|
272
273
|
#endif
|