isomorfeus-ferret 0.13.0 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +31 -12
- data/ext/isomorfeus_ferret_ext/{bzip_blocksort.c → bzlib_blocksort.c} +0 -0
- data/ext/isomorfeus_ferret_ext/{bzip_huffman.c → bzlib_huffman.c} +0 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +0 -151
- data/ext/isomorfeus_ferret_ext/frt_global.h +0 -15
- data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1 -0
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +1 -3
- data/ext/isomorfeus_ferret_ext/test.c +0 -16
- data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
- data/ext/isomorfeus_ferret_ext/test_search.c +0 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98439b4a9e6ca849246c6e2ddd2ce1bbf117182025b3651d4f6a95593aff0eb6
|
4
|
+
data.tar.gz: 0e1e90c4bfce1014c9983f4bb5be0f3123ef502788f464e988c4671bdd1758ed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: da674772ba34175364d0d4d93023ef380eab06334b4f9c3e1956934289a7a2016387607740fb83cc11753a3bb72de62208390933ff9f4cc341524b7fe6c0c6af
|
7
|
+
data.tar.gz: 8261029020f33cb9fb52453e007defdfd2c1dd029f9da22a02b64ae0047bcd333e62e6d6a8d15263ee71089e203f393f5e865138a31784edf48f66a483a0ffce
|
data/README.md
CHANGED
@@ -50,6 +50,7 @@ Compression semantics have changed, now Brotli, BZip2 and LZ4 compression codecs
|
|
50
50
|
- BZip2: slow compression, slow decompression, high compression ratio
|
51
51
|
- Brotli: slow compression, fast decrompression, high compression ratio, recommended for general purpose.
|
52
52
|
- LZ4: fast compression, fast decrompression, low compression ratio
|
53
|
+
|
53
54
|
To see performance and compression ratios `rake ferret_compression_bench` can be run from the cloned repo.
|
54
55
|
It uses data and code within the misc/ferret_vs_lucene directory.
|
55
56
|
|
@@ -96,6 +97,7 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
|
|
96
97
|
|
97
98
|
## Benchmarks
|
98
99
|
|
100
|
+
### Indexing and Searching
|
99
101
|
- clone repo
|
100
102
|
- bundle install
|
101
103
|
- rake ferret_vs_lucene
|
@@ -104,20 +106,37 @@ A recent Java JDK must be installed to compile and run lucene benchmarks.
|
|
104
106
|
|
105
107
|
Results on Linux:
|
106
108
|
```
|
107
|
-
Ferret:
|
108
|
-
Indexing
|
109
|
-
Searching took: 0.
|
110
|
-
thats
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
109
|
+
Ferret 0.13.0:
|
110
|
+
Indexing: 9.35 secs, Docs: 19043, 2035 docs/s
|
111
|
+
Searching took: 0.3133133s for 8000 queries
|
112
|
+
thats 25533 q/s
|
113
|
+
Total found: 42000
|
114
|
+
Index size: 28Mb
|
115
|
+
|
116
|
+
Lucene 9.1.0:
|
117
|
+
Indexing: 4.20 secs, Docs: 19043, 4538 docs/s
|
118
|
+
Searching took: 1.64s for 8000 queries
|
119
|
+
thats 4875 q/s
|
120
|
+
Total found: 41000
|
121
|
+
index size: 35Mb
|
122
|
+
|
123
|
+
JVM 11.0.14.1 (Ubuntu)
|
119
124
|
```
|
120
125
|
|
126
|
+
### Storing Fields with Compression, Indexing and Retrieval
|
127
|
+
- clone repo
|
128
|
+
- bundle install
|
129
|
+
- rake ferret_compression_benchmark
|
130
|
+
|
131
|
+
Results on Linux, 0.13.0:
|
132
|
+
|
133
|
+
| Compression | Index & Store | Retrieve | Index size |
|
134
|
+
|-------------|---------------|---------------|------------|
|
135
|
+
| none | 2008 docs/s | 153853 docs/s | 43 MB |
|
136
|
+
| brotli | 1726 docs/s | 58315 docs/s | 36 MB |
|
137
|
+
| bzip2 | 1438 docs/s | 15382 docs/s | 38 MB |
|
138
|
+
| lz4 | 1932 docs/s | 127100 docs/s | 41 MB |
|
139
|
+
|
121
140
|
## Future
|
122
141
|
|
123
142
|
Lots of things to do:
|
File without changes
|
File without changes
|
@@ -232,71 +232,6 @@ void frt_dummy_free(void *p) {
|
|
232
232
|
(void)p; /* suppress unused argument warning */
|
233
233
|
}
|
234
234
|
|
235
|
-
#ifdef HAVE_GDB
|
236
|
-
#define CMD_BUF_SIZE (128 + FILENAME_MAX)
|
237
|
-
/* need to declare this as it is masked by default in linux */
|
238
|
-
|
239
|
-
static char *build_shell_command(void) {
|
240
|
-
int pid = getpid();
|
241
|
-
char *buf = FRT_ALLOC_N(char, CMD_BUF_SIZE);
|
242
|
-
char *command =
|
243
|
-
"gdb -quiet -ex='bt' -ex='quit' %s %d 2>/dev/null | grep '^[ #]'";
|
244
|
-
|
245
|
-
snprintf(buf, CMD_BUF_SIZE, command, frt_progname(), pid);
|
246
|
-
return buf;
|
247
|
-
}
|
248
|
-
|
249
|
-
#endif
|
250
|
-
|
251
|
-
/**
|
252
|
-
* Call out to gdb to get our stacktrace.
|
253
|
-
*/
|
254
|
-
char *frt_get_stacktrace(void) {
|
255
|
-
#ifdef HAVE_GDB
|
256
|
-
FILE *stream;
|
257
|
-
char *gdb_filename = NULL, *buf = NULL, *stack = NULL;
|
258
|
-
int offset = -FRT_BUFFER_SIZE;
|
259
|
-
|
260
|
-
if ( !(buf = build_shell_command()) ) {
|
261
|
-
fprintf(EXCEPTION_STREAM,
|
262
|
-
"Unable to build stacktrace shell command\n");
|
263
|
-
goto cleanup;
|
264
|
-
}
|
265
|
-
|
266
|
-
if ( !(stream = popen(buf, "r")) ) {
|
267
|
-
fprintf(EXCEPTION_STREAM,
|
268
|
-
"Unable to exec stacktrace shell command: '%s'\n", buf);
|
269
|
-
goto cleanup;
|
270
|
-
}
|
271
|
-
|
272
|
-
do {
|
273
|
-
offset += FRT_BUFFER_SIZE;
|
274
|
-
FRT_REALLOC_N(stack, char, offset + FRT_BUFFER_SIZE);
|
275
|
-
FRT_ZEROSET_N(stack + offset, char, FRT_BUFFER_SIZE);
|
276
|
-
} while(fread(stack + offset, 1, FRT_BUFFER_SIZE, stream) == FRT_BUFFER_SIZE);
|
277
|
-
|
278
|
-
pclose(stream);
|
279
|
-
|
280
|
-
cleanup:
|
281
|
-
if (gdb_filename) free(gdb_filename);
|
282
|
-
if (buf) free(buf);
|
283
|
-
return stack;
|
284
|
-
#else
|
285
|
-
return NULL;
|
286
|
-
#endif
|
287
|
-
}
|
288
|
-
|
289
|
-
void frt_print_stacktrace(void) {
|
290
|
-
char *stack = frt_get_stacktrace();
|
291
|
-
|
292
|
-
if (stack) {
|
293
|
-
fprintf(EXCEPTION_STREAM, "Stack trace:\n%s", stack);
|
294
|
-
free(stack);
|
295
|
-
} else {
|
296
|
-
fprintf(EXCEPTION_STREAM, "Stack trace not available\n");
|
297
|
-
}
|
298
|
-
}
|
299
|
-
|
300
235
|
typedef struct FreeMe {
|
301
236
|
void *p;
|
302
237
|
frt_free_ft free_func;
|
@@ -321,55 +256,7 @@ void frt_register_for_cleanup(void *p, frt_free_ft free_func) {
|
|
321
256
|
free_me->free_func = free_func;
|
322
257
|
}
|
323
258
|
|
324
|
-
#define MAX_PROG_NAME 200
|
325
|
-
static char name[MAX_PROG_NAME]; /* program name for error msgs */
|
326
|
-
|
327
|
-
/* frt_setprogname: set stored name of program */
|
328
|
-
void frt_setprogname(const char *str) {
|
329
|
-
strncpy(name, str, sizeof(name) - 1);
|
330
|
-
}
|
331
|
-
|
332
|
-
const char *frt_progname(void) {
|
333
|
-
return name;
|
334
|
-
}
|
335
|
-
|
336
|
-
static const char *signal_to_string(int signum) {
|
337
|
-
switch (signum)
|
338
|
-
{
|
339
|
-
case SIGILL: return "SIGILL";
|
340
|
-
case SIGABRT: return "SIGABRT";
|
341
|
-
case SIGFPE: return "SIGFPE";
|
342
|
-
#if !defined POSH_OS_WIN32 && !defined POSH_OS_WIN64
|
343
|
-
case SIGBUS: return "SIGBUS";
|
344
|
-
#endif
|
345
|
-
case SIGSEGV: return "SIGSEGV";
|
346
|
-
}
|
347
|
-
|
348
|
-
return "Unknown Signal";
|
349
|
-
}
|
350
|
-
|
351
|
-
static void sighandler_crash(int signum) {
|
352
|
-
frt_print_stacktrace();
|
353
|
-
FRT_XEXIT("Signal", "Exiting on signal %s (%d)", signal_to_string(signum), signum);
|
354
|
-
}
|
355
|
-
|
356
|
-
#define SETSIG_IF_UNSET(sig, handler) do { \
|
357
|
-
signal(sig, handler); \
|
358
|
-
} while(0)
|
359
|
-
|
360
259
|
void frt_init(int argc, const char *const argv[]) {
|
361
|
-
if (argc > 0) {
|
362
|
-
frt_setprogname(argv[0]);
|
363
|
-
}
|
364
|
-
|
365
|
-
SETSIG_IF_UNSET(SIGILL , sighandler_crash);
|
366
|
-
SETSIG_IF_UNSET(SIGABRT, sighandler_crash);
|
367
|
-
SETSIG_IF_UNSET(SIGFPE , sighandler_crash);
|
368
|
-
#if !defined POSH_OS_WIN32 && !defined POSH_OS_WIN64
|
369
|
-
SETSIG_IF_UNSET(SIGBUS , sighandler_crash);
|
370
|
-
#endif
|
371
|
-
SETSIG_IF_UNSET(SIGSEGV, sighandler_crash);
|
372
|
-
|
373
260
|
atexit(&frt_hash_finalize);
|
374
261
|
|
375
262
|
utf8_encoding = rb_enc_find("UTF-8");
|
@@ -429,41 +316,3 @@ void frt_init(int argc, const char *const argv[]) {
|
|
429
316
|
FRT_SORT_FIELD_DOC_REV->compare = frt_sort_field_doc_compare; /* compare */
|
430
317
|
FRT_SORT_FIELD_DOC_REV->get_val = frt_sort_field_doc_get_val; /* get_val */
|
431
318
|
}
|
432
|
-
|
433
|
-
/**
|
434
|
-
* For general use when testing
|
435
|
-
*
|
436
|
-
* TODO wrap in #ifdef
|
437
|
-
*/
|
438
|
-
|
439
|
-
static bool p_switch = false;
|
440
|
-
static bool p_switch_tmp = false;
|
441
|
-
|
442
|
-
void p(const char *format, ...) {
|
443
|
-
va_list args;
|
444
|
-
|
445
|
-
if (!p_switch) return;
|
446
|
-
|
447
|
-
va_start(args, format);
|
448
|
-
vfprintf(stderr, format, args);
|
449
|
-
va_end(args);
|
450
|
-
}
|
451
|
-
|
452
|
-
void p_on(void) {
|
453
|
-
fprintf(stderr, "> > > > > STARTING PRINT\n");
|
454
|
-
p_switch = true;
|
455
|
-
}
|
456
|
-
|
457
|
-
void p_off(void) {
|
458
|
-
fprintf(stderr, "< < < < < STOPPING PRINT\n");
|
459
|
-
p_switch = false;
|
460
|
-
}
|
461
|
-
|
462
|
-
void frt_p_pause(void) {
|
463
|
-
p_switch_tmp = p_switch;
|
464
|
-
p_switch = false;
|
465
|
-
}
|
466
|
-
|
467
|
-
void frt_p_resume(void) {
|
468
|
-
p_switch = p_switch_tmp;
|
469
|
-
}
|
@@ -105,9 +105,6 @@ extern char *frt_dbl_to_s(char *buf, double num);
|
|
105
105
|
extern char *frt_strfmt(const char *fmt, ...);
|
106
106
|
extern char *frt_vstrfmt(const char *fmt, va_list args);
|
107
107
|
|
108
|
-
extern char *frt_get_stacktrace();
|
109
|
-
extern void frt_print_stacktrace();
|
110
|
-
|
111
108
|
extern void frt_register_for_cleanup(void *p, frt_free_ft free_func);
|
112
109
|
|
113
110
|
/**
|
@@ -277,18 +274,6 @@ extern bool frt_x_do_logging;
|
|
277
274
|
#endif
|
278
275
|
|
279
276
|
extern void frt_init(int arc, const char *const argv[]);
|
280
|
-
extern void frt_setprogname(const char *str);
|
281
|
-
extern const char *frt_progname();
|
282
277
|
extern void frt_micro_sleep(const int micro_seconds);
|
283
278
|
|
284
|
-
/**
|
285
|
-
* For general use during testing. Switch this on and off for print statements
|
286
|
-
* to only print when p_on is called and not after p_off is called
|
287
|
-
*/
|
288
|
-
extern void p(const char *format, ...);
|
289
|
-
extern void p_on();
|
290
|
-
extern void p_off();
|
291
|
-
extern void frt_p_pause();
|
292
|
-
extern void frt_p_resume();
|
293
|
-
|
294
279
|
#endif
|
@@ -61,7 +61,6 @@ void FRT_VEXIT(const char *err_type, const char *fmt, va_list args)
|
|
61
61
|
# endif
|
62
62
|
{
|
63
63
|
fflush(stdout);
|
64
|
-
fprintf(EXCEPTION_STREAM, "\n%s: ", frt_progname());
|
65
64
|
|
66
65
|
# ifdef FRT_HAS_VARARGS
|
67
66
|
fprintf(EXCEPTION_STREAM, "%s occurred at <%s>:%d in %s\n",
|
@@ -76,7 +75,6 @@ void FRT_VEXIT(const char *err_type, const char *fmt, va_list args)
|
|
76
75
|
}
|
77
76
|
|
78
77
|
fprintf(EXCEPTION_STREAM, "\n");
|
79
|
-
frt_print_stacktrace();
|
80
78
|
if (frt_x_abort_on_exception) {
|
81
79
|
exit(2); /* conventional value for failed execution */
|
82
80
|
}
|
@@ -1286,6 +1286,7 @@ FrtBooleanClause *frt_bc_alloc(void) {
|
|
1286
1286
|
FrtBooleanClause *frt_bc_init(FrtBooleanClause *self, FrtQuery *query, FrtBCType occur) {
|
1287
1287
|
self->ref_cnt = 1;
|
1288
1288
|
self->query = query;
|
1289
|
+
self->rbc = Qnil;
|
1289
1290
|
frt_bc_set_occur(self, occur);
|
1290
1291
|
return self;
|
1291
1292
|
}
|
@@ -279,22 +279,10 @@ static void append_to_msg_buf(const char *fmt, ...)
|
|
279
279
|
va_end(args);
|
280
280
|
}
|
281
281
|
|
282
|
-
|
283
|
-
static void Tstack(void) {
|
284
|
-
if (show_stack) {
|
285
|
-
char *stack = frt_get_stacktrace();
|
286
|
-
if (stack) {
|
287
|
-
append_to_msg_buf("\n\nStack trace:\n%s\n", stack);
|
288
|
-
free(stack);
|
289
|
-
}
|
290
|
-
}
|
291
|
-
}
|
292
|
-
|
293
282
|
static void vTmsg_nf(const char *fmt, va_list args)
|
294
283
|
{
|
295
284
|
if (verbose) {
|
296
285
|
vappend_to_msg_buf(fmt, args);
|
297
|
-
Tstack();
|
298
286
|
}
|
299
287
|
}
|
300
288
|
|
@@ -305,8 +293,6 @@ void vTmsg(const char *fmt, va_list args)
|
|
305
293
|
vappend_to_msg_buf(fmt, args);
|
306
294
|
va_end(args);
|
307
295
|
append_to_msg_buf("\n");
|
308
|
-
|
309
|
-
Tstack();
|
310
296
|
}
|
311
297
|
}
|
312
298
|
|
@@ -348,8 +334,6 @@ void tst_msg(const char *func, const char *fname, int line_num, const char *fmt,
|
|
348
334
|
va_start(args, fmt);
|
349
335
|
vappend_to_msg_buf(fmt, args);
|
350
336
|
va_end(args);
|
351
|
-
|
352
|
-
Tstack();
|
353
337
|
}
|
354
338
|
}
|
355
339
|
|
@@ -145,50 +145,6 @@ static void test_dbl_to_s(TestCase *tc, void *data)
|
|
145
145
|
Asequal("NaN", frt_dbl_to_s(buf, NAN));
|
146
146
|
}
|
147
147
|
|
148
|
-
|
149
|
-
/**
|
150
|
-
* Generate a stacktrace, make sure it does something
|
151
|
-
*/
|
152
|
-
static void test_stacktrace(TestCase *tc, void *data)
|
153
|
-
{
|
154
|
-
FILE *old_stream = frt_x_exception_stream;
|
155
|
-
(void)data; /* suppress warning */
|
156
|
-
int tfd = fio_tmpfile();
|
157
|
-
frt_x_exception_stream = fdopen(tfd, "w+");
|
158
|
-
Atrue(frt_x_exception_stream != NULL);
|
159
|
-
if (frt_x_exception_stream) {
|
160
|
-
frt_print_stacktrace();
|
161
|
-
long int f = ftell(frt_x_exception_stream);
|
162
|
-
Assert(f, "Stream position should not be 0");
|
163
|
-
fclose(frt_x_exception_stream);
|
164
|
-
}
|
165
|
-
frt_x_exception_stream = old_stream;
|
166
|
-
}
|
167
|
-
|
168
|
-
/**
|
169
|
-
* Generate a normally fatal signal, which gets caught
|
170
|
-
*/
|
171
|
-
/*
|
172
|
-
static void test_sighandler(TestCase *tc, void *data)
|
173
|
-
{
|
174
|
-
bool old_abort = frt_x_abort_on_exception;
|
175
|
-
FILE *old_stream = frt_x_exception_stream;
|
176
|
-
(void)data;
|
177
|
-
(void)tc;
|
178
|
-
|
179
|
-
frt_x_exception_stream = false;
|
180
|
-
frt_x_exception_stream = tmpfile();
|
181
|
-
|
182
|
-
raise(SIGSEGV);
|
183
|
-
|
184
|
-
Assert(ftell(frt_x_exception_stream), "Stream position should not be 0");
|
185
|
-
fclose(frt_x_exception_stream);
|
186
|
-
|
187
|
-
frt_x_exception_stream = old_stream;
|
188
|
-
frt_x_abort_on_exception = old_abort;
|
189
|
-
}
|
190
|
-
*/
|
191
|
-
|
192
148
|
static void test_count_leading_zeros(TestCase *tc, void *data)
|
193
149
|
{
|
194
150
|
(void)data;
|
@@ -284,8 +240,6 @@ TestSuite *ts_global(TestSuite *suite)
|
|
284
240
|
tst_run_test(suite, test_alloc, NULL);
|
285
241
|
tst_run_test(suite, test_strfmt, NULL);
|
286
242
|
tst_run_test(suite, test_dbl_to_s, NULL);
|
287
|
-
tst_run_test(suite, test_stacktrace, NULL);
|
288
|
-
// tst_run_test(suite, test_sighandler, NULL);
|
289
243
|
tst_run_test(suite, test_count_leading_zeros, NULL);
|
290
244
|
tst_run_test(suite, test_count_leading_ones, NULL);
|
291
245
|
tst_run_test(suite, test_count_trailing_zeros, NULL);
|
@@ -266,7 +266,6 @@ void tst_check_hits(TestCase *tc, FrtSearcher *searcher, FrtQuery *query, const
|
|
266
266
|
int i, count;
|
267
267
|
int total_hits = s2l(expected_hits, num_array);
|
268
268
|
FrtTopDocs *top_docs = frt_searcher_search(searcher, query, 0, total_hits + 1, NULL, NULL, NULL);
|
269
|
-
frt_p_pause();
|
270
269
|
if (!tc->failed && !Aiequal(total_hits, top_docs->total_hits)) {
|
271
270
|
int i;
|
272
271
|
Tmsg_nf("\texpected docs:\n\t ");
|
@@ -314,7 +313,6 @@ void tst_check_hits(TestCase *tc, FrtSearcher *searcher, FrtQuery *query, const
|
|
314
313
|
count = frt_searcher_search_unscored(searcher, query, num_array2, ARRAY_SIZE, num_array2[3]);
|
315
314
|
Aaiequal(num_array + 3, num_array2, count);
|
316
315
|
}
|
317
|
-
frt_p_resume();
|
318
316
|
}
|
319
317
|
|
320
318
|
void check_match_vector(TestCase *tc, FrtSearcher *searcher, FrtQuery *query,
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isomorfeus-ferret
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Biedermann
|
@@ -154,13 +154,13 @@ files:
|
|
154
154
|
- ext/isomorfeus_ferret_ext/brotli_encode.h
|
155
155
|
- ext/isomorfeus_ferret_ext/brotli_port.h
|
156
156
|
- ext/isomorfeus_ferret_ext/brotli_types.h
|
157
|
-
- ext/isomorfeus_ferret_ext/bzip_blocksort.c
|
158
|
-
- ext/isomorfeus_ferret_ext/bzip_huffman.c
|
159
157
|
- ext/isomorfeus_ferret_ext/bzlib.c
|
160
158
|
- ext/isomorfeus_ferret_ext/bzlib.h
|
159
|
+
- ext/isomorfeus_ferret_ext/bzlib_blocksort.c
|
161
160
|
- ext/isomorfeus_ferret_ext/bzlib_compress.c
|
162
161
|
- ext/isomorfeus_ferret_ext/bzlib_crctable.c
|
163
162
|
- ext/isomorfeus_ferret_ext/bzlib_decompress.c
|
163
|
+
- ext/isomorfeus_ferret_ext/bzlib_huffman.c
|
164
164
|
- ext/isomorfeus_ferret_ext/bzlib_private.h
|
165
165
|
- ext/isomorfeus_ferret_ext/bzlib_randtable.c
|
166
166
|
- ext/isomorfeus_ferret_ext/extconf.rb
|