isomorfeus-ferret 0.13.0 → 0.13.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: be4c84d556459a8ed5d2585068378c156bdb2d68507ca2786844b2c69a4e7f35
4
- data.tar.gz: '0096ef29b274ea39567e876d95d8441ff80589a8d3403c5aed3801c62377cffd'
3
+ metadata.gz: 98439b4a9e6ca849246c6e2ddd2ce1bbf117182025b3651d4f6a95593aff0eb6
4
+ data.tar.gz: 0e1e90c4bfce1014c9983f4bb5be0f3123ef502788f464e988c4671bdd1758ed
5
5
  SHA512:
6
- metadata.gz: 55aa9f39fd4971e1a80bbcdeba14906928140fd023a7ee3e581ef2bafc63a8a664f5b0fbaf86dec1119ac62fd441703d33a4b6ee3731f811736a1b2eedcef9a3
7
- data.tar.gz: 3843bd29450fb925069733913aee9f66958dfe41aba052d7d14a18cb7f2d4df376e0756d7c97bcd01527120a6a77776d9bee2a1c0a488648fc61f500dfe6e98e
6
+ metadata.gz: da674772ba34175364d0d4d93023ef380eab06334b4f9c3e1956934289a7a2016387607740fb83cc11753a3bb72de62208390933ff9f4cc341524b7fe6c0c6af
7
+ data.tar.gz: 8261029020f33cb9fb52453e007defdfd2c1dd029f9da22a02b64ae0047bcd333e62e6d6a8d15263ee71089e203f393f5e865138a31784edf48f66a483a0ffce
data/README.md CHANGED
@@ -50,6 +50,7 @@ Compression semantics have changed, now Brotli, BZip2 and LZ4 compression codecs
50
50
  - BZip2: slow compression, slow decompression, high compression ratio
51
51
  - Brotli: slow compression, fast decrompression, high compression ratio, recommended for general purpose.
52
52
  - LZ4: fast compression, fast decrompression, low compression ratio
53
+
53
54
  To see performance and compression ratios `rake ferret_compression_bench` can be run from the cloned repo.
54
55
  It uses data and code within the misc/ferret_vs_lucene directory.
55
56
 
@@ -96,6 +97,7 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
96
97
 
97
98
  ## Benchmarks
98
99
 
100
+ ### Indexing and Searching
99
101
  - clone repo
100
102
  - bundle install
101
103
  - rake ferret_vs_lucene
@@ -104,20 +106,37 @@ A recent Java JDK must be installed to compile and run lucene benchmarks.
104
106
 
105
107
  Results on Linux:
106
108
  ```
107
- Ferret:
108
- Indexing Secs: 7.36 Docs: 19043, 2587 docs/s
109
- Searching took: 0.3366296s for 8000 queries
110
- thats 23765 q/s
111
-
112
- Lucene:
113
- Indexing Secs: 4.22 Docs: 19043, 4516 docs/s
114
- Searching took: 1.48s for 8000 queries
115
- thats 5420 q/s
116
- ---------------------------------------------------
117
- Lucene 9.0.0 0b18b3b965cedaf5eb129aa41243a44c83ca826d - jpountz - 2021-12-01 14:23:49
118
- JVM 17.0.1 (Private Build)
109
+ Ferret 0.13.0:
110
+ Indexing: 9.35 secs, Docs: 19043, 2035 docs/s
111
+ Searching took: 0.3133133s for 8000 queries
112
+ thats 25533 q/s
113
+ Total found: 42000
114
+ Index size: 28Mb
115
+
116
+ Lucene 9.1.0:
117
+ Indexing: 4.20 secs, Docs: 19043, 4538 docs/s
118
+ Searching took: 1.64s for 8000 queries
119
+ thats 4875 q/s
120
+ Total found: 41000
121
+ index size: 35Mb
122
+
123
+ JVM 11.0.14.1 (Ubuntu)
119
124
  ```
120
125
 
126
+ ### Storing Fields with Compression, Indexing and Retrieval
127
+ - clone repo
128
+ - bundle install
129
+ - rake ferret_compression_benchmark
130
+
131
+ Results on Linux, 0.13.0:
132
+
133
+ | Compression | Index & Store | Retrieve | Index size |
134
+ |-------------|---------------|---------------|------------|
135
+ | none | 2008 docs/s | 153853 docs/s | 43 MB |
136
+ | brotli | 1726 docs/s | 58315 docs/s | 36 MB |
137
+ | bzip2 | 1438 docs/s | 15382 docs/s | 38 MB |
138
+ | lz4 | 1932 docs/s | 127100 docs/s | 41 MB |
139
+
121
140
  ## Future
122
141
 
123
142
  Lots of things to do:
@@ -232,71 +232,6 @@ void frt_dummy_free(void *p) {
232
232
  (void)p; /* suppress unused argument warning */
233
233
  }
234
234
 
235
- #ifdef HAVE_GDB
236
- #define CMD_BUF_SIZE (128 + FILENAME_MAX)
237
- /* need to declare this as it is masked by default in linux */
238
-
239
- static char *build_shell_command(void) {
240
- int pid = getpid();
241
- char *buf = FRT_ALLOC_N(char, CMD_BUF_SIZE);
242
- char *command =
243
- "gdb -quiet -ex='bt' -ex='quit' %s %d 2>/dev/null | grep '^[ #]'";
244
-
245
- snprintf(buf, CMD_BUF_SIZE, command, frt_progname(), pid);
246
- return buf;
247
- }
248
-
249
- #endif
250
-
251
- /**
252
- * Call out to gdb to get our stacktrace.
253
- */
254
- char *frt_get_stacktrace(void) {
255
- #ifdef HAVE_GDB
256
- FILE *stream;
257
- char *gdb_filename = NULL, *buf = NULL, *stack = NULL;
258
- int offset = -FRT_BUFFER_SIZE;
259
-
260
- if ( !(buf = build_shell_command()) ) {
261
- fprintf(EXCEPTION_STREAM,
262
- "Unable to build stacktrace shell command\n");
263
- goto cleanup;
264
- }
265
-
266
- if ( !(stream = popen(buf, "r")) ) {
267
- fprintf(EXCEPTION_STREAM,
268
- "Unable to exec stacktrace shell command: '%s'\n", buf);
269
- goto cleanup;
270
- }
271
-
272
- do {
273
- offset += FRT_BUFFER_SIZE;
274
- FRT_REALLOC_N(stack, char, offset + FRT_BUFFER_SIZE);
275
- FRT_ZEROSET_N(stack + offset, char, FRT_BUFFER_SIZE);
276
- } while(fread(stack + offset, 1, FRT_BUFFER_SIZE, stream) == FRT_BUFFER_SIZE);
277
-
278
- pclose(stream);
279
-
280
- cleanup:
281
- if (gdb_filename) free(gdb_filename);
282
- if (buf) free(buf);
283
- return stack;
284
- #else
285
- return NULL;
286
- #endif
287
- }
288
-
289
- void frt_print_stacktrace(void) {
290
- char *stack = frt_get_stacktrace();
291
-
292
- if (stack) {
293
- fprintf(EXCEPTION_STREAM, "Stack trace:\n%s", stack);
294
- free(stack);
295
- } else {
296
- fprintf(EXCEPTION_STREAM, "Stack trace not available\n");
297
- }
298
- }
299
-
300
235
  typedef struct FreeMe {
301
236
  void *p;
302
237
  frt_free_ft free_func;
@@ -321,55 +256,7 @@ void frt_register_for_cleanup(void *p, frt_free_ft free_func) {
321
256
  free_me->free_func = free_func;
322
257
  }
323
258
 
324
- #define MAX_PROG_NAME 200
325
- static char name[MAX_PROG_NAME]; /* program name for error msgs */
326
-
327
- /* frt_setprogname: set stored name of program */
328
- void frt_setprogname(const char *str) {
329
- strncpy(name, str, sizeof(name) - 1);
330
- }
331
-
332
- const char *frt_progname(void) {
333
- return name;
334
- }
335
-
336
- static const char *signal_to_string(int signum) {
337
- switch (signum)
338
- {
339
- case SIGILL: return "SIGILL";
340
- case SIGABRT: return "SIGABRT";
341
- case SIGFPE: return "SIGFPE";
342
- #if !defined POSH_OS_WIN32 && !defined POSH_OS_WIN64
343
- case SIGBUS: return "SIGBUS";
344
- #endif
345
- case SIGSEGV: return "SIGSEGV";
346
- }
347
-
348
- return "Unknown Signal";
349
- }
350
-
351
- static void sighandler_crash(int signum) {
352
- frt_print_stacktrace();
353
- FRT_XEXIT("Signal", "Exiting on signal %s (%d)", signal_to_string(signum), signum);
354
- }
355
-
356
- #define SETSIG_IF_UNSET(sig, handler) do { \
357
- signal(sig, handler); \
358
- } while(0)
359
-
360
259
  void frt_init(int argc, const char *const argv[]) {
361
- if (argc > 0) {
362
- frt_setprogname(argv[0]);
363
- }
364
-
365
- SETSIG_IF_UNSET(SIGILL , sighandler_crash);
366
- SETSIG_IF_UNSET(SIGABRT, sighandler_crash);
367
- SETSIG_IF_UNSET(SIGFPE , sighandler_crash);
368
- #if !defined POSH_OS_WIN32 && !defined POSH_OS_WIN64
369
- SETSIG_IF_UNSET(SIGBUS , sighandler_crash);
370
- #endif
371
- SETSIG_IF_UNSET(SIGSEGV, sighandler_crash);
372
-
373
260
  atexit(&frt_hash_finalize);
374
261
 
375
262
  utf8_encoding = rb_enc_find("UTF-8");
@@ -429,41 +316,3 @@ void frt_init(int argc, const char *const argv[]) {
429
316
  FRT_SORT_FIELD_DOC_REV->compare = frt_sort_field_doc_compare; /* compare */
430
317
  FRT_SORT_FIELD_DOC_REV->get_val = frt_sort_field_doc_get_val; /* get_val */
431
318
  }
432
-
433
- /**
434
- * For general use when testing
435
- *
436
- * TODO wrap in #ifdef
437
- */
438
-
439
- static bool p_switch = false;
440
- static bool p_switch_tmp = false;
441
-
442
- void p(const char *format, ...) {
443
- va_list args;
444
-
445
- if (!p_switch) return;
446
-
447
- va_start(args, format);
448
- vfprintf(stderr, format, args);
449
- va_end(args);
450
- }
451
-
452
- void p_on(void) {
453
- fprintf(stderr, "> > > > > STARTING PRINT\n");
454
- p_switch = true;
455
- }
456
-
457
- void p_off(void) {
458
- fprintf(stderr, "< < < < < STOPPING PRINT\n");
459
- p_switch = false;
460
- }
461
-
462
- void frt_p_pause(void) {
463
- p_switch_tmp = p_switch;
464
- p_switch = false;
465
- }
466
-
467
- void frt_p_resume(void) {
468
- p_switch = p_switch_tmp;
469
- }
@@ -105,9 +105,6 @@ extern char *frt_dbl_to_s(char *buf, double num);
105
105
  extern char *frt_strfmt(const char *fmt, ...);
106
106
  extern char *frt_vstrfmt(const char *fmt, va_list args);
107
107
 
108
- extern char *frt_get_stacktrace();
109
- extern void frt_print_stacktrace();
110
-
111
108
  extern void frt_register_for_cleanup(void *p, frt_free_ft free_func);
112
109
 
113
110
  /**
@@ -277,18 +274,6 @@ extern bool frt_x_do_logging;
277
274
  #endif
278
275
 
279
276
  extern void frt_init(int arc, const char *const argv[]);
280
- extern void frt_setprogname(const char *str);
281
- extern const char *frt_progname();
282
277
  extern void frt_micro_sleep(const int micro_seconds);
283
278
 
284
- /**
285
- * For general use during testing. Switch this on and off for print statements
286
- * to only print when p_on is called and not after p_off is called
287
- */
288
- extern void p(const char *format, ...);
289
- extern void p_on();
290
- extern void p_off();
291
- extern void frt_p_pause();
292
- extern void frt_p_resume();
293
-
294
279
  #endif
@@ -61,7 +61,6 @@ void FRT_VEXIT(const char *err_type, const char *fmt, va_list args)
61
61
  # endif
62
62
  {
63
63
  fflush(stdout);
64
- fprintf(EXCEPTION_STREAM, "\n%s: ", frt_progname());
65
64
 
66
65
  # ifdef FRT_HAS_VARARGS
67
66
  fprintf(EXCEPTION_STREAM, "%s occurred at <%s>:%d in %s\n",
@@ -76,7 +75,6 @@ void FRT_VEXIT(const char *err_type, const char *fmt, va_list args)
76
75
  }
77
76
 
78
77
  fprintf(EXCEPTION_STREAM, "\n");
79
- frt_print_stacktrace();
80
78
  if (frt_x_abort_on_exception) {
81
79
  exit(2); /* conventional value for failed execution */
82
80
  }
@@ -1286,6 +1286,7 @@ FrtBooleanClause *frt_bc_alloc(void) {
1286
1286
  FrtBooleanClause *frt_bc_init(FrtBooleanClause *self, FrtQuery *query, FrtBCType occur) {
1287
1287
  self->ref_cnt = 1;
1288
1288
  self->query = query;
1289
+ self->rbc = Qnil;
1289
1290
  frt_bc_set_occur(self, occur);
1290
1291
  return self;
1291
1292
  }
@@ -250,9 +250,7 @@ void Init_Ferret(void) {
250
250
  }
251
251
 
252
252
  void Init_isomorfeus_ferret_ext(void) {
253
- const char *const progname[] = {"ruby"};
254
-
255
- frt_init(1, progname);
253
+ frt_init(0, NULL);
256
254
 
257
255
  /* IDs */
258
256
  id_new = rb_intern("new");
@@ -279,22 +279,10 @@ static void append_to_msg_buf(const char *fmt, ...)
279
279
  va_end(args);
280
280
  }
281
281
 
282
-
283
- static void Tstack(void) {
284
- if (show_stack) {
285
- char *stack = frt_get_stacktrace();
286
- if (stack) {
287
- append_to_msg_buf("\n\nStack trace:\n%s\n", stack);
288
- free(stack);
289
- }
290
- }
291
- }
292
-
293
282
  static void vTmsg_nf(const char *fmt, va_list args)
294
283
  {
295
284
  if (verbose) {
296
285
  vappend_to_msg_buf(fmt, args);
297
- Tstack();
298
286
  }
299
287
  }
300
288
 
@@ -305,8 +293,6 @@ void vTmsg(const char *fmt, va_list args)
305
293
  vappend_to_msg_buf(fmt, args);
306
294
  va_end(args);
307
295
  append_to_msg_buf("\n");
308
-
309
- Tstack();
310
296
  }
311
297
  }
312
298
 
@@ -348,8 +334,6 @@ void tst_msg(const char *func, const char *fname, int line_num, const char *fmt,
348
334
  va_start(args, fmt);
349
335
  vappend_to_msg_buf(fmt, args);
350
336
  va_end(args);
351
-
352
- Tstack();
353
337
  }
354
338
  }
355
339
 
@@ -145,50 +145,6 @@ static void test_dbl_to_s(TestCase *tc, void *data)
145
145
  Asequal("NaN", frt_dbl_to_s(buf, NAN));
146
146
  }
147
147
 
148
-
149
- /**
150
- * Generate a stacktrace, make sure it does something
151
- */
152
- static void test_stacktrace(TestCase *tc, void *data)
153
- {
154
- FILE *old_stream = frt_x_exception_stream;
155
- (void)data; /* suppress warning */
156
- int tfd = fio_tmpfile();
157
- frt_x_exception_stream = fdopen(tfd, "w+");
158
- Atrue(frt_x_exception_stream != NULL);
159
- if (frt_x_exception_stream) {
160
- frt_print_stacktrace();
161
- long int f = ftell(frt_x_exception_stream);
162
- Assert(f, "Stream position should not be 0");
163
- fclose(frt_x_exception_stream);
164
- }
165
- frt_x_exception_stream = old_stream;
166
- }
167
-
168
- /**
169
- * Generate a normally fatal signal, which gets caught
170
- */
171
- /*
172
- static void test_sighandler(TestCase *tc, void *data)
173
- {
174
- bool old_abort = frt_x_abort_on_exception;
175
- FILE *old_stream = frt_x_exception_stream;
176
- (void)data;
177
- (void)tc;
178
-
179
- frt_x_exception_stream = false;
180
- frt_x_exception_stream = tmpfile();
181
-
182
- raise(SIGSEGV);
183
-
184
- Assert(ftell(frt_x_exception_stream), "Stream position should not be 0");
185
- fclose(frt_x_exception_stream);
186
-
187
- frt_x_exception_stream = old_stream;
188
- frt_x_abort_on_exception = old_abort;
189
- }
190
- */
191
-
192
148
  static void test_count_leading_zeros(TestCase *tc, void *data)
193
149
  {
194
150
  (void)data;
@@ -284,8 +240,6 @@ TestSuite *ts_global(TestSuite *suite)
284
240
  tst_run_test(suite, test_alloc, NULL);
285
241
  tst_run_test(suite, test_strfmt, NULL);
286
242
  tst_run_test(suite, test_dbl_to_s, NULL);
287
- tst_run_test(suite, test_stacktrace, NULL);
288
- // tst_run_test(suite, test_sighandler, NULL);
289
243
  tst_run_test(suite, test_count_leading_zeros, NULL);
290
244
  tst_run_test(suite, test_count_leading_ones, NULL);
291
245
  tst_run_test(suite, test_count_trailing_zeros, NULL);
@@ -266,7 +266,6 @@ void tst_check_hits(TestCase *tc, FrtSearcher *searcher, FrtQuery *query, const
266
266
  int i, count;
267
267
  int total_hits = s2l(expected_hits, num_array);
268
268
  FrtTopDocs *top_docs = frt_searcher_search(searcher, query, 0, total_hits + 1, NULL, NULL, NULL);
269
- frt_p_pause();
270
269
  if (!tc->failed && !Aiequal(total_hits, top_docs->total_hits)) {
271
270
  int i;
272
271
  Tmsg_nf("\texpected docs:\n\t ");
@@ -314,7 +313,6 @@ void tst_check_hits(TestCase *tc, FrtSearcher *searcher, FrtQuery *query, const
314
313
  count = frt_searcher_search_unscored(searcher, query, num_array2, ARRAY_SIZE, num_array2[3]);
315
314
  Aaiequal(num_array + 3, num_array2, count);
316
315
  }
317
- frt_p_resume();
318
316
  }
319
317
 
320
318
  void check_match_vector(TestCase *tc, FrtSearcher *searcher, FrtQuery *query,
@@ -1,5 +1,5 @@
1
1
  module Isomorfeus
2
2
  module Ferret
3
- VERSION = '0.13.0'
3
+ VERSION = '0.13.1'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isomorfeus-ferret
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.13.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Biedermann
@@ -154,13 +154,13 @@ files:
154
154
  - ext/isomorfeus_ferret_ext/brotli_encode.h
155
155
  - ext/isomorfeus_ferret_ext/brotli_port.h
156
156
  - ext/isomorfeus_ferret_ext/brotli_types.h
157
- - ext/isomorfeus_ferret_ext/bzip_blocksort.c
158
- - ext/isomorfeus_ferret_ext/bzip_huffman.c
159
157
  - ext/isomorfeus_ferret_ext/bzlib.c
160
158
  - ext/isomorfeus_ferret_ext/bzlib.h
159
+ - ext/isomorfeus_ferret_ext/bzlib_blocksort.c
161
160
  - ext/isomorfeus_ferret_ext/bzlib_compress.c
162
161
  - ext/isomorfeus_ferret_ext/bzlib_crctable.c
163
162
  - ext/isomorfeus_ferret_ext/bzlib_decompress.c
163
+ - ext/isomorfeus_ferret_ext/bzlib_huffman.c
164
164
  - ext/isomorfeus_ferret_ext/bzlib_private.h
165
165
  - ext/isomorfeus_ferret_ext/bzlib_randtable.c
166
166
  - ext/isomorfeus_ferret_ext/extconf.rb