isomorfeus-ferret 0.13.0 → 0.13.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +31 -12
- data/ext/isomorfeus_ferret_ext/{bzip_blocksort.c → bzlib_blocksort.c} +0 -0
- data/ext/isomorfeus_ferret_ext/{bzip_huffman.c → bzlib_huffman.c} +0 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +0 -151
- data/ext/isomorfeus_ferret_ext/frt_global.h +0 -15
- data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1 -0
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +1 -3
- data/ext/isomorfeus_ferret_ext/test.c +0 -16
- data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
- data/ext/isomorfeus_ferret_ext/test_search.c +0 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98439b4a9e6ca849246c6e2ddd2ce1bbf117182025b3651d4f6a95593aff0eb6
|
4
|
+
data.tar.gz: 0e1e90c4bfce1014c9983f4bb5be0f3123ef502788f464e988c4671bdd1758ed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da674772ba34175364d0d4d93023ef380eab06334b4f9c3e1956934289a7a2016387607740fb83cc11753a3bb72de62208390933ff9f4cc341524b7fe6c0c6af
|
7
|
+
data.tar.gz: 8261029020f33cb9fb52453e007defdfd2c1dd029f9da22a02b64ae0047bcd333e62e6d6a8d15263ee71089e203f393f5e865138a31784edf48f66a483a0ffce
|
data/README.md
CHANGED
@@ -50,6 +50,7 @@ Compression semantics have changed, now Brotli, BZip2 and LZ4 compression codecs
|
|
50
50
|
- BZip2: slow compression, slow decompression, high compression ratio
|
51
51
|
- Brotli: slow compression, fast decrompression, high compression ratio, recommended for general purpose.
|
52
52
|
- LZ4: fast compression, fast decrompression, low compression ratio
|
53
|
+
|
53
54
|
To see performance and compression ratios `rake ferret_compression_bench` can be run from the cloned repo.
|
54
55
|
It uses data and code within the misc/ferret_vs_lucene directory.
|
55
56
|
|
@@ -96,6 +97,7 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
|
|
96
97
|
|
97
98
|
## Benchmarks
|
98
99
|
|
100
|
+
### Indexing and Searching
|
99
101
|
- clone repo
|
100
102
|
- bundle install
|
101
103
|
- rake ferret_vs_lucene
|
@@ -104,20 +106,37 @@ A recent Java JDK must be installed to compile and run lucene benchmarks.
|
|
104
106
|
|
105
107
|
Results on Linux:
|
106
108
|
```
|
107
|
-
Ferret:
|
108
|
-
Indexing
|
109
|
-
Searching took: 0.
|
110
|
-
thats
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
109
|
+
Ferret 0.13.0:
|
110
|
+
Indexing: 9.35 secs, Docs: 19043, 2035 docs/s
|
111
|
+
Searching took: 0.3133133s for 8000 queries
|
112
|
+
thats 25533 q/s
|
113
|
+
Total found: 42000
|
114
|
+
Index size: 28Mb
|
115
|
+
|
116
|
+
Lucene 9.1.0:
|
117
|
+
Indexing: 4.20 secs, Docs: 19043, 4538 docs/s
|
118
|
+
Searching took: 1.64s for 8000 queries
|
119
|
+
thats 4875 q/s
|
120
|
+
Total found: 41000
|
121
|
+
index size: 35Mb
|
122
|
+
|
123
|
+
JVM 11.0.14.1 (Ubuntu)
|
119
124
|
```
|
120
125
|
|
126
|
+
### Storing Fields with Compression, Indexing and Retrieval
|
127
|
+
- clone repo
|
128
|
+
- bundle install
|
129
|
+
- rake ferret_compression_benchmark
|
130
|
+
|
131
|
+
Results on Linux, 0.13.0:
|
132
|
+
|
133
|
+
| Compression | Index & Store | Retrieve | Index size |
|
134
|
+
|-------------|---------------|---------------|------------|
|
135
|
+
| none | 2008 docs/s | 153853 docs/s | 43 MB |
|
136
|
+
| brotli | 1726 docs/s | 58315 docs/s | 36 MB |
|
137
|
+
| bzip2 | 1438 docs/s | 15382 docs/s | 38 MB |
|
138
|
+
| lz4 | 1932 docs/s | 127100 docs/s | 41 MB |
|
139
|
+
|
121
140
|
## Future
|
122
141
|
|
123
142
|
Lots of things to do:
|
File without changes
|
File without changes
|
@@ -232,71 +232,6 @@ void frt_dummy_free(void *p) {
|
|
232
232
|
(void)p; /* suppress unused argument warning */
|
233
233
|
}
|
234
234
|
|
235
|
-
#ifdef HAVE_GDB
|
236
|
-
#define CMD_BUF_SIZE (128 + FILENAME_MAX)
|
237
|
-
/* need to declare this as it is masked by default in linux */
|
238
|
-
|
239
|
-
static char *build_shell_command(void) {
|
240
|
-
int pid = getpid();
|
241
|
-
char *buf = FRT_ALLOC_N(char, CMD_BUF_SIZE);
|
242
|
-
char *command =
|
243
|
-
"gdb -quiet -ex='bt' -ex='quit' %s %d 2>/dev/null | grep '^[ #]'";
|
244
|
-
|
245
|
-
snprintf(buf, CMD_BUF_SIZE, command, frt_progname(), pid);
|
246
|
-
return buf;
|
247
|
-
}
|
248
|
-
|
249
|
-
#endif
|
250
|
-
|
251
|
-
/**
|
252
|
-
* Call out to gdb to get our stacktrace.
|
253
|
-
*/
|
254
|
-
char *frt_get_stacktrace(void) {
|
255
|
-
#ifdef HAVE_GDB
|
256
|
-
FILE *stream;
|
257
|
-
char *gdb_filename = NULL, *buf = NULL, *stack = NULL;
|
258
|
-
int offset = -FRT_BUFFER_SIZE;
|
259
|
-
|
260
|
-
if ( !(buf = build_shell_command()) ) {
|
261
|
-
fprintf(EXCEPTION_STREAM,
|
262
|
-
"Unable to build stacktrace shell command\n");
|
263
|
-
goto cleanup;
|
264
|
-
}
|
265
|
-
|
266
|
-
if ( !(stream = popen(buf, "r")) ) {
|
267
|
-
fprintf(EXCEPTION_STREAM,
|
268
|
-
"Unable to exec stacktrace shell command: '%s'\n", buf);
|
269
|
-
goto cleanup;
|
270
|
-
}
|
271
|
-
|
272
|
-
do {
|
273
|
-
offset += FRT_BUFFER_SIZE;
|
274
|
-
FRT_REALLOC_N(stack, char, offset + FRT_BUFFER_SIZE);
|
275
|
-
FRT_ZEROSET_N(stack + offset, char, FRT_BUFFER_SIZE);
|
276
|
-
} while(fread(stack + offset, 1, FRT_BUFFER_SIZE, stream) == FRT_BUFFER_SIZE);
|
277
|
-
|
278
|
-
pclose(stream);
|
279
|
-
|
280
|
-
cleanup:
|
281
|
-
if (gdb_filename) free(gdb_filename);
|
282
|
-
if (buf) free(buf);
|
283
|
-
return stack;
|
284
|
-
#else
|
285
|
-
return NULL;
|
286
|
-
#endif
|
287
|
-
}
|
288
|
-
|
289
|
-
void frt_print_stacktrace(void) {
|
290
|
-
char *stack = frt_get_stacktrace();
|
291
|
-
|
292
|
-
if (stack) {
|
293
|
-
fprintf(EXCEPTION_STREAM, "Stack trace:\n%s", stack);
|
294
|
-
free(stack);
|
295
|
-
} else {
|
296
|
-
fprintf(EXCEPTION_STREAM, "Stack trace not available\n");
|
297
|
-
}
|
298
|
-
}
|
299
|
-
|
300
235
|
typedef struct FreeMe {
|
301
236
|
void *p;
|
302
237
|
frt_free_ft free_func;
|
@@ -321,55 +256,7 @@ void frt_register_for_cleanup(void *p, frt_free_ft free_func) {
|
|
321
256
|
free_me->free_func = free_func;
|
322
257
|
}
|
323
258
|
|
324
|
-
#define MAX_PROG_NAME 200
|
325
|
-
static char name[MAX_PROG_NAME]; /* program name for error msgs */
|
326
|
-
|
327
|
-
/* frt_setprogname: set stored name of program */
|
328
|
-
void frt_setprogname(const char *str) {
|
329
|
-
strncpy(name, str, sizeof(name) - 1);
|
330
|
-
}
|
331
|
-
|
332
|
-
const char *frt_progname(void) {
|
333
|
-
return name;
|
334
|
-
}
|
335
|
-
|
336
|
-
static const char *signal_to_string(int signum) {
|
337
|
-
switch (signum)
|
338
|
-
{
|
339
|
-
case SIGILL: return "SIGILL";
|
340
|
-
case SIGABRT: return "SIGABRT";
|
341
|
-
case SIGFPE: return "SIGFPE";
|
342
|
-
#if !defined POSH_OS_WIN32 && !defined POSH_OS_WIN64
|
343
|
-
case SIGBUS: return "SIGBUS";
|
344
|
-
#endif
|
345
|
-
case SIGSEGV: return "SIGSEGV";
|
346
|
-
}
|
347
|
-
|
348
|
-
return "Unknown Signal";
|
349
|
-
}
|
350
|
-
|
351
|
-
static void sighandler_crash(int signum) {
|
352
|
-
frt_print_stacktrace();
|
353
|
-
FRT_XEXIT("Signal", "Exiting on signal %s (%d)", signal_to_string(signum), signum);
|
354
|
-
}
|
355
|
-
|
356
|
-
#define SETSIG_IF_UNSET(sig, handler) do { \
|
357
|
-
signal(sig, handler); \
|
358
|
-
} while(0)
|
359
|
-
|
360
259
|
void frt_init(int argc, const char *const argv[]) {
|
361
|
-
if (argc > 0) {
|
362
|
-
frt_setprogname(argv[0]);
|
363
|
-
}
|
364
|
-
|
365
|
-
SETSIG_IF_UNSET(SIGILL , sighandler_crash);
|
366
|
-
SETSIG_IF_UNSET(SIGABRT, sighandler_crash);
|
367
|
-
SETSIG_IF_UNSET(SIGFPE , sighandler_crash);
|
368
|
-
#if !defined POSH_OS_WIN32 && !defined POSH_OS_WIN64
|
369
|
-
SETSIG_IF_UNSET(SIGBUS , sighandler_crash);
|
370
|
-
#endif
|
371
|
-
SETSIG_IF_UNSET(SIGSEGV, sighandler_crash);
|
372
|
-
|
373
260
|
atexit(&frt_hash_finalize);
|
374
261
|
|
375
262
|
utf8_encoding = rb_enc_find("UTF-8");
|
@@ -429,41 +316,3 @@ void frt_init(int argc, const char *const argv[]) {
|
|
429
316
|
FRT_SORT_FIELD_DOC_REV->compare = frt_sort_field_doc_compare; /* compare */
|
430
317
|
FRT_SORT_FIELD_DOC_REV->get_val = frt_sort_field_doc_get_val; /* get_val */
|
431
318
|
}
|
432
|
-
|
433
|
-
/**
|
434
|
-
* For general use when testing
|
435
|
-
*
|
436
|
-
* TODO wrap in #ifdef
|
437
|
-
*/
|
438
|
-
|
439
|
-
static bool p_switch = false;
|
440
|
-
static bool p_switch_tmp = false;
|
441
|
-
|
442
|
-
void p(const char *format, ...) {
|
443
|
-
va_list args;
|
444
|
-
|
445
|
-
if (!p_switch) return;
|
446
|
-
|
447
|
-
va_start(args, format);
|
448
|
-
vfprintf(stderr, format, args);
|
449
|
-
va_end(args);
|
450
|
-
}
|
451
|
-
|
452
|
-
void p_on(void) {
|
453
|
-
fprintf(stderr, "> > > > > STARTING PRINT\n");
|
454
|
-
p_switch = true;
|
455
|
-
}
|
456
|
-
|
457
|
-
void p_off(void) {
|
458
|
-
fprintf(stderr, "< < < < < STOPPING PRINT\n");
|
459
|
-
p_switch = false;
|
460
|
-
}
|
461
|
-
|
462
|
-
void frt_p_pause(void) {
|
463
|
-
p_switch_tmp = p_switch;
|
464
|
-
p_switch = false;
|
465
|
-
}
|
466
|
-
|
467
|
-
void frt_p_resume(void) {
|
468
|
-
p_switch = p_switch_tmp;
|
469
|
-
}
|
@@ -105,9 +105,6 @@ extern char *frt_dbl_to_s(char *buf, double num);
|
|
105
105
|
extern char *frt_strfmt(const char *fmt, ...);
|
106
106
|
extern char *frt_vstrfmt(const char *fmt, va_list args);
|
107
107
|
|
108
|
-
extern char *frt_get_stacktrace();
|
109
|
-
extern void frt_print_stacktrace();
|
110
|
-
|
111
108
|
extern void frt_register_for_cleanup(void *p, frt_free_ft free_func);
|
112
109
|
|
113
110
|
/**
|
@@ -277,18 +274,6 @@ extern bool frt_x_do_logging;
|
|
277
274
|
#endif
|
278
275
|
|
279
276
|
extern void frt_init(int arc, const char *const argv[]);
|
280
|
-
extern void frt_setprogname(const char *str);
|
281
|
-
extern const char *frt_progname();
|
282
277
|
extern void frt_micro_sleep(const int micro_seconds);
|
283
278
|
|
284
|
-
/**
|
285
|
-
* For general use during testing. Switch this on and off for print statements
|
286
|
-
* to only print when p_on is called and not after p_off is called
|
287
|
-
*/
|
288
|
-
extern void p(const char *format, ...);
|
289
|
-
extern void p_on();
|
290
|
-
extern void p_off();
|
291
|
-
extern void frt_p_pause();
|
292
|
-
extern void frt_p_resume();
|
293
|
-
|
294
279
|
#endif
|
@@ -61,7 +61,6 @@ void FRT_VEXIT(const char *err_type, const char *fmt, va_list args)
|
|
61
61
|
# endif
|
62
62
|
{
|
63
63
|
fflush(stdout);
|
64
|
-
fprintf(EXCEPTION_STREAM, "\n%s: ", frt_progname());
|
65
64
|
|
66
65
|
# ifdef FRT_HAS_VARARGS
|
67
66
|
fprintf(EXCEPTION_STREAM, "%s occurred at <%s>:%d in %s\n",
|
@@ -76,7 +75,6 @@ void FRT_VEXIT(const char *err_type, const char *fmt, va_list args)
|
|
76
75
|
}
|
77
76
|
|
78
77
|
fprintf(EXCEPTION_STREAM, "\n");
|
79
|
-
frt_print_stacktrace();
|
80
78
|
if (frt_x_abort_on_exception) {
|
81
79
|
exit(2); /* conventional value for failed execution */
|
82
80
|
}
|
@@ -1286,6 +1286,7 @@ FrtBooleanClause *frt_bc_alloc(void) {
|
|
1286
1286
|
FrtBooleanClause *frt_bc_init(FrtBooleanClause *self, FrtQuery *query, FrtBCType occur) {
|
1287
1287
|
self->ref_cnt = 1;
|
1288
1288
|
self->query = query;
|
1289
|
+
self->rbc = Qnil;
|
1289
1290
|
frt_bc_set_occur(self, occur);
|
1290
1291
|
return self;
|
1291
1292
|
}
|
@@ -279,22 +279,10 @@ static void append_to_msg_buf(const char *fmt, ...)
|
|
279
279
|
va_end(args);
|
280
280
|
}
|
281
281
|
|
282
|
-
|
283
|
-
static void Tstack(void) {
|
284
|
-
if (show_stack) {
|
285
|
-
char *stack = frt_get_stacktrace();
|
286
|
-
if (stack) {
|
287
|
-
append_to_msg_buf("\n\nStack trace:\n%s\n", stack);
|
288
|
-
free(stack);
|
289
|
-
}
|
290
|
-
}
|
291
|
-
}
|
292
|
-
|
293
282
|
static void vTmsg_nf(const char *fmt, va_list args)
|
294
283
|
{
|
295
284
|
if (verbose) {
|
296
285
|
vappend_to_msg_buf(fmt, args);
|
297
|
-
Tstack();
|
298
286
|
}
|
299
287
|
}
|
300
288
|
|
@@ -305,8 +293,6 @@ void vTmsg(const char *fmt, va_list args)
|
|
305
293
|
vappend_to_msg_buf(fmt, args);
|
306
294
|
va_end(args);
|
307
295
|
append_to_msg_buf("\n");
|
308
|
-
|
309
|
-
Tstack();
|
310
296
|
}
|
311
297
|
}
|
312
298
|
|
@@ -348,8 +334,6 @@ void tst_msg(const char *func, const char *fname, int line_num, const char *fmt,
|
|
348
334
|
va_start(args, fmt);
|
349
335
|
vappend_to_msg_buf(fmt, args);
|
350
336
|
va_end(args);
|
351
|
-
|
352
|
-
Tstack();
|
353
337
|
}
|
354
338
|
}
|
355
339
|
|
@@ -145,50 +145,6 @@ static void test_dbl_to_s(TestCase *tc, void *data)
|
|
145
145
|
Asequal("NaN", frt_dbl_to_s(buf, NAN));
|
146
146
|
}
|
147
147
|
|
148
|
-
|
149
|
-
/**
|
150
|
-
* Generate a stacktrace, make sure it does something
|
151
|
-
*/
|
152
|
-
static void test_stacktrace(TestCase *tc, void *data)
|
153
|
-
{
|
154
|
-
FILE *old_stream = frt_x_exception_stream;
|
155
|
-
(void)data; /* suppress warning */
|
156
|
-
int tfd = fio_tmpfile();
|
157
|
-
frt_x_exception_stream = fdopen(tfd, "w+");
|
158
|
-
Atrue(frt_x_exception_stream != NULL);
|
159
|
-
if (frt_x_exception_stream) {
|
160
|
-
frt_print_stacktrace();
|
161
|
-
long int f = ftell(frt_x_exception_stream);
|
162
|
-
Assert(f, "Stream position should not be 0");
|
163
|
-
fclose(frt_x_exception_stream);
|
164
|
-
}
|
165
|
-
frt_x_exception_stream = old_stream;
|
166
|
-
}
|
167
|
-
|
168
|
-
/**
|
169
|
-
* Generate a normally fatal signal, which gets caught
|
170
|
-
*/
|
171
|
-
/*
|
172
|
-
static void test_sighandler(TestCase *tc, void *data)
|
173
|
-
{
|
174
|
-
bool old_abort = frt_x_abort_on_exception;
|
175
|
-
FILE *old_stream = frt_x_exception_stream;
|
176
|
-
(void)data;
|
177
|
-
(void)tc;
|
178
|
-
|
179
|
-
frt_x_exception_stream = false;
|
180
|
-
frt_x_exception_stream = tmpfile();
|
181
|
-
|
182
|
-
raise(SIGSEGV);
|
183
|
-
|
184
|
-
Assert(ftell(frt_x_exception_stream), "Stream position should not be 0");
|
185
|
-
fclose(frt_x_exception_stream);
|
186
|
-
|
187
|
-
frt_x_exception_stream = old_stream;
|
188
|
-
frt_x_abort_on_exception = old_abort;
|
189
|
-
}
|
190
|
-
*/
|
191
|
-
|
192
148
|
static void test_count_leading_zeros(TestCase *tc, void *data)
|
193
149
|
{
|
194
150
|
(void)data;
|
@@ -284,8 +240,6 @@ TestSuite *ts_global(TestSuite *suite)
|
|
284
240
|
tst_run_test(suite, test_alloc, NULL);
|
285
241
|
tst_run_test(suite, test_strfmt, NULL);
|
286
242
|
tst_run_test(suite, test_dbl_to_s, NULL);
|
287
|
-
tst_run_test(suite, test_stacktrace, NULL);
|
288
|
-
// tst_run_test(suite, test_sighandler, NULL);
|
289
243
|
tst_run_test(suite, test_count_leading_zeros, NULL);
|
290
244
|
tst_run_test(suite, test_count_leading_ones, NULL);
|
291
245
|
tst_run_test(suite, test_count_trailing_zeros, NULL);
|
@@ -266,7 +266,6 @@ void tst_check_hits(TestCase *tc, FrtSearcher *searcher, FrtQuery *query, const
|
|
266
266
|
int i, count;
|
267
267
|
int total_hits = s2l(expected_hits, num_array);
|
268
268
|
FrtTopDocs *top_docs = frt_searcher_search(searcher, query, 0, total_hits + 1, NULL, NULL, NULL);
|
269
|
-
frt_p_pause();
|
270
269
|
if (!tc->failed && !Aiequal(total_hits, top_docs->total_hits)) {
|
271
270
|
int i;
|
272
271
|
Tmsg_nf("\texpected docs:\n\t ");
|
@@ -314,7 +313,6 @@ void tst_check_hits(TestCase *tc, FrtSearcher *searcher, FrtQuery *query, const
|
|
314
313
|
count = frt_searcher_search_unscored(searcher, query, num_array2, ARRAY_SIZE, num_array2[3]);
|
315
314
|
Aaiequal(num_array + 3, num_array2, count);
|
316
315
|
}
|
317
|
-
frt_p_resume();
|
318
316
|
}
|
319
317
|
|
320
318
|
void check_match_vector(TestCase *tc, FrtSearcher *searcher, FrtQuery *query,
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isomorfeus-ferret
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Biedermann
|
@@ -154,13 +154,13 @@ files:
|
|
154
154
|
- ext/isomorfeus_ferret_ext/brotli_encode.h
|
155
155
|
- ext/isomorfeus_ferret_ext/brotli_port.h
|
156
156
|
- ext/isomorfeus_ferret_ext/brotli_types.h
|
157
|
-
- ext/isomorfeus_ferret_ext/bzip_blocksort.c
|
158
|
-
- ext/isomorfeus_ferret_ext/bzip_huffman.c
|
159
157
|
- ext/isomorfeus_ferret_ext/bzlib.c
|
160
158
|
- ext/isomorfeus_ferret_ext/bzlib.h
|
159
|
+
- ext/isomorfeus_ferret_ext/bzlib_blocksort.c
|
161
160
|
- ext/isomorfeus_ferret_ext/bzlib_compress.c
|
162
161
|
- ext/isomorfeus_ferret_ext/bzlib_crctable.c
|
163
162
|
- ext/isomorfeus_ferret_ext/bzlib_decompress.c
|
163
|
+
- ext/isomorfeus_ferret_ext/bzlib_huffman.c
|
164
164
|
- ext/isomorfeus_ferret_ext/bzlib_private.h
|
165
165
|
- ext/isomorfeus_ferret_ext/bzlib_randtable.c
|
166
166
|
- ext/isomorfeus_ferret_ext/extconf.rb
|