isomorfeus-ferret 0.13.0 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: be4c84d556459a8ed5d2585068378c156bdb2d68507ca2786844b2c69a4e7f35
4
- data.tar.gz: '0096ef29b274ea39567e876d95d8441ff80589a8d3403c5aed3801c62377cffd'
3
+ metadata.gz: 98439b4a9e6ca849246c6e2ddd2ce1bbf117182025b3651d4f6a95593aff0eb6
4
+ data.tar.gz: 0e1e90c4bfce1014c9983f4bb5be0f3123ef502788f464e988c4671bdd1758ed
5
5
  SHA512:
6
- metadata.gz: 55aa9f39fd4971e1a80bbcdeba14906928140fd023a7ee3e581ef2bafc63a8a664f5b0fbaf86dec1119ac62fd441703d33a4b6ee3731f811736a1b2eedcef9a3
7
- data.tar.gz: 3843bd29450fb925069733913aee9f66958dfe41aba052d7d14a18cb7f2d4df376e0756d7c97bcd01527120a6a77776d9bee2a1c0a488648fc61f500dfe6e98e
6
+ metadata.gz: da674772ba34175364d0d4d93023ef380eab06334b4f9c3e1956934289a7a2016387607740fb83cc11753a3bb72de62208390933ff9f4cc341524b7fe6c0c6af
7
+ data.tar.gz: 8261029020f33cb9fb52453e007defdfd2c1dd029f9da22a02b64ae0047bcd333e62e6d6a8d15263ee71089e203f393f5e865138a31784edf48f66a483a0ffce
data/README.md CHANGED
@@ -50,6 +50,7 @@ Compression semantics have changed, now Brotli, BZip2 and LZ4 compression codecs
50
50
  - BZip2: slow compression, slow decompression, high compression ratio
51
51
  - Brotli: slow compression, fast decrompression, high compression ratio, recommended for general purpose.
52
52
  - LZ4: fast compression, fast decrompression, low compression ratio
53
+
53
54
  To see performance and compression ratios `rake ferret_compression_bench` can be run from the cloned repo.
54
55
  It uses data and code within the misc/ferret_vs_lucene directory.
55
56
 
@@ -96,6 +97,7 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
96
97
 
97
98
  ## Benchmarks
98
99
 
100
+ ### Indexing and Searching
99
101
  - clone repo
100
102
  - bundle install
101
103
  - rake ferret_vs_lucene
@@ -104,20 +106,37 @@ A recent Java JDK must be installed to compile and run lucene benchmarks.
104
106
 
105
107
  Results on Linux:
106
108
  ```
107
- Ferret:
108
- Indexing Secs: 7.36 Docs: 19043, 2587 docs/s
109
- Searching took: 0.3366296s for 8000 queries
110
- thats 23765 q/s
111
-
112
- Lucene:
113
- Indexing Secs: 4.22 Docs: 19043, 4516 docs/s
114
- Searching took: 1.48s for 8000 queries
115
- thats 5420 q/s
116
- ---------------------------------------------------
117
- Lucene 9.0.0 0b18b3b965cedaf5eb129aa41243a44c83ca826d - jpountz - 2021-12-01 14:23:49
118
- JVM 17.0.1 (Private Build)
109
+ Ferret 0.13.0:
110
+ Indexing: 9.35 secs, Docs: 19043, 2035 docs/s
111
+ Searching took: 0.3133133s for 8000 queries
112
+ thats 25533 q/s
113
+ Total found: 42000
114
+ Index size: 28Mb
115
+
116
+ Lucene 9.1.0:
117
+ Indexing: 4.20 secs, Docs: 19043, 4538 docs/s
118
+ Searching took: 1.64s for 8000 queries
119
+ thats 4875 q/s
120
+ Total found: 41000
121
+ index size: 35Mb
122
+
123
+ JVM 11.0.14.1 (Ubuntu)
119
124
  ```
120
125
 
126
+ ### Storing Fields with Compression, Indexing and Retrieval
127
+ - clone repo
128
+ - bundle install
129
+ - rake ferret_compression_benchmark
130
+
131
+ Results on Linux, 0.13.0:
132
+
133
+ | Compression | Index & Store | Retrieve | Index size |
134
+ |-------------|---------------|---------------|------------|
135
+ | none | 2008 docs/s | 153853 docs/s | 43 MB |
136
+ | brotli | 1726 docs/s | 58315 docs/s | 36 MB |
137
+ | bzip2 | 1438 docs/s | 15382 docs/s | 38 MB |
138
+ | lz4 | 1932 docs/s | 127100 docs/s | 41 MB |
139
+
121
140
  ## Future
122
141
 
123
142
  Lots of things to do:
@@ -232,71 +232,6 @@ void frt_dummy_free(void *p) {
232
232
  (void)p; /* suppress unused argument warning */
233
233
  }
234
234
 
235
- #ifdef HAVE_GDB
236
- #define CMD_BUF_SIZE (128 + FILENAME_MAX)
237
- /* need to declare this as it is masked by default in linux */
238
-
239
- static char *build_shell_command(void) {
240
- int pid = getpid();
241
- char *buf = FRT_ALLOC_N(char, CMD_BUF_SIZE);
242
- char *command =
243
- "gdb -quiet -ex='bt' -ex='quit' %s %d 2>/dev/null | grep '^[ #]'";
244
-
245
- snprintf(buf, CMD_BUF_SIZE, command, frt_progname(), pid);
246
- return buf;
247
- }
248
-
249
- #endif
250
-
251
- /**
252
- * Call out to gdb to get our stacktrace.
253
- */
254
- char *frt_get_stacktrace(void) {
255
- #ifdef HAVE_GDB
256
- FILE *stream;
257
- char *gdb_filename = NULL, *buf = NULL, *stack = NULL;
258
- int offset = -FRT_BUFFER_SIZE;
259
-
260
- if ( !(buf = build_shell_command()) ) {
261
- fprintf(EXCEPTION_STREAM,
262
- "Unable to build stacktrace shell command\n");
263
- goto cleanup;
264
- }
265
-
266
- if ( !(stream = popen(buf, "r")) ) {
267
- fprintf(EXCEPTION_STREAM,
268
- "Unable to exec stacktrace shell command: '%s'\n", buf);
269
- goto cleanup;
270
- }
271
-
272
- do {
273
- offset += FRT_BUFFER_SIZE;
274
- FRT_REALLOC_N(stack, char, offset + FRT_BUFFER_SIZE);
275
- FRT_ZEROSET_N(stack + offset, char, FRT_BUFFER_SIZE);
276
- } while(fread(stack + offset, 1, FRT_BUFFER_SIZE, stream) == FRT_BUFFER_SIZE);
277
-
278
- pclose(stream);
279
-
280
- cleanup:
281
- if (gdb_filename) free(gdb_filename);
282
- if (buf) free(buf);
283
- return stack;
284
- #else
285
- return NULL;
286
- #endif
287
- }
288
-
289
- void frt_print_stacktrace(void) {
290
- char *stack = frt_get_stacktrace();
291
-
292
- if (stack) {
293
- fprintf(EXCEPTION_STREAM, "Stack trace:\n%s", stack);
294
- free(stack);
295
- } else {
296
- fprintf(EXCEPTION_STREAM, "Stack trace not available\n");
297
- }
298
- }
299
-
300
235
  typedef struct FreeMe {
301
236
  void *p;
302
237
  frt_free_ft free_func;
@@ -321,55 +256,7 @@ void frt_register_for_cleanup(void *p, frt_free_ft free_func) {
321
256
  free_me->free_func = free_func;
322
257
  }
323
258
 
324
- #define MAX_PROG_NAME 200
325
- static char name[MAX_PROG_NAME]; /* program name for error msgs */
326
-
327
- /* frt_setprogname: set stored name of program */
328
- void frt_setprogname(const char *str) {
329
- strncpy(name, str, sizeof(name) - 1);
330
- }
331
-
332
- const char *frt_progname(void) {
333
- return name;
334
- }
335
-
336
- static const char *signal_to_string(int signum) {
337
- switch (signum)
338
- {
339
- case SIGILL: return "SIGILL";
340
- case SIGABRT: return "SIGABRT";
341
- case SIGFPE: return "SIGFPE";
342
- #if !defined POSH_OS_WIN32 && !defined POSH_OS_WIN64
343
- case SIGBUS: return "SIGBUS";
344
- #endif
345
- case SIGSEGV: return "SIGSEGV";
346
- }
347
-
348
- return "Unknown Signal";
349
- }
350
-
351
- static void sighandler_crash(int signum) {
352
- frt_print_stacktrace();
353
- FRT_XEXIT("Signal", "Exiting on signal %s (%d)", signal_to_string(signum), signum);
354
- }
355
-
356
- #define SETSIG_IF_UNSET(sig, handler) do { \
357
- signal(sig, handler); \
358
- } while(0)
359
-
360
259
  void frt_init(int argc, const char *const argv[]) {
361
- if (argc > 0) {
362
- frt_setprogname(argv[0]);
363
- }
364
-
365
- SETSIG_IF_UNSET(SIGILL , sighandler_crash);
366
- SETSIG_IF_UNSET(SIGABRT, sighandler_crash);
367
- SETSIG_IF_UNSET(SIGFPE , sighandler_crash);
368
- #if !defined POSH_OS_WIN32 && !defined POSH_OS_WIN64
369
- SETSIG_IF_UNSET(SIGBUS , sighandler_crash);
370
- #endif
371
- SETSIG_IF_UNSET(SIGSEGV, sighandler_crash);
372
-
373
260
  atexit(&frt_hash_finalize);
374
261
 
375
262
  utf8_encoding = rb_enc_find("UTF-8");
@@ -429,41 +316,3 @@ void frt_init(int argc, const char *const argv[]) {
429
316
  FRT_SORT_FIELD_DOC_REV->compare = frt_sort_field_doc_compare; /* compare */
430
317
  FRT_SORT_FIELD_DOC_REV->get_val = frt_sort_field_doc_get_val; /* get_val */
431
318
  }
432
-
433
- /**
434
- * For general use when testing
435
- *
436
- * TODO wrap in #ifdef
437
- */
438
-
439
- static bool p_switch = false;
440
- static bool p_switch_tmp = false;
441
-
442
- void p(const char *format, ...) {
443
- va_list args;
444
-
445
- if (!p_switch) return;
446
-
447
- va_start(args, format);
448
- vfprintf(stderr, format, args);
449
- va_end(args);
450
- }
451
-
452
- void p_on(void) {
453
- fprintf(stderr, "> > > > > STARTING PRINT\n");
454
- p_switch = true;
455
- }
456
-
457
- void p_off(void) {
458
- fprintf(stderr, "< < < < < STOPPING PRINT\n");
459
- p_switch = false;
460
- }
461
-
462
- void frt_p_pause(void) {
463
- p_switch_tmp = p_switch;
464
- p_switch = false;
465
- }
466
-
467
- void frt_p_resume(void) {
468
- p_switch = p_switch_tmp;
469
- }
@@ -105,9 +105,6 @@ extern char *frt_dbl_to_s(char *buf, double num);
105
105
  extern char *frt_strfmt(const char *fmt, ...);
106
106
  extern char *frt_vstrfmt(const char *fmt, va_list args);
107
107
 
108
- extern char *frt_get_stacktrace();
109
- extern void frt_print_stacktrace();
110
-
111
108
  extern void frt_register_for_cleanup(void *p, frt_free_ft free_func);
112
109
 
113
110
  /**
@@ -277,18 +274,6 @@ extern bool frt_x_do_logging;
277
274
  #endif
278
275
 
279
276
  extern void frt_init(int arc, const char *const argv[]);
280
- extern void frt_setprogname(const char *str);
281
- extern const char *frt_progname();
282
277
  extern void frt_micro_sleep(const int micro_seconds);
283
278
 
284
- /**
285
- * For general use during testing. Switch this on and off for print statements
286
- * to only print when p_on is called and not after p_off is called
287
- */
288
- extern void p(const char *format, ...);
289
- extern void p_on();
290
- extern void p_off();
291
- extern void frt_p_pause();
292
- extern void frt_p_resume();
293
-
294
279
  #endif
@@ -61,7 +61,6 @@ void FRT_VEXIT(const char *err_type, const char *fmt, va_list args)
61
61
  # endif
62
62
  {
63
63
  fflush(stdout);
64
- fprintf(EXCEPTION_STREAM, "\n%s: ", frt_progname());
65
64
 
66
65
  # ifdef FRT_HAS_VARARGS
67
66
  fprintf(EXCEPTION_STREAM, "%s occurred at <%s>:%d in %s\n",
@@ -76,7 +75,6 @@ void FRT_VEXIT(const char *err_type, const char *fmt, va_list args)
76
75
  }
77
76
 
78
77
  fprintf(EXCEPTION_STREAM, "\n");
79
- frt_print_stacktrace();
80
78
  if (frt_x_abort_on_exception) {
81
79
  exit(2); /* conventional value for failed execution */
82
80
  }
@@ -1286,6 +1286,7 @@ FrtBooleanClause *frt_bc_alloc(void) {
1286
1286
  FrtBooleanClause *frt_bc_init(FrtBooleanClause *self, FrtQuery *query, FrtBCType occur) {
1287
1287
  self->ref_cnt = 1;
1288
1288
  self->query = query;
1289
+ self->rbc = Qnil;
1289
1290
  frt_bc_set_occur(self, occur);
1290
1291
  return self;
1291
1292
  }
@@ -250,9 +250,7 @@ void Init_Ferret(void) {
250
250
  }
251
251
 
252
252
  void Init_isomorfeus_ferret_ext(void) {
253
- const char *const progname[] = {"ruby"};
254
-
255
- frt_init(1, progname);
253
+ frt_init(0, NULL);
256
254
 
257
255
  /* IDs */
258
256
  id_new = rb_intern("new");
@@ -279,22 +279,10 @@ static void append_to_msg_buf(const char *fmt, ...)
279
279
  va_end(args);
280
280
  }
281
281
 
282
-
283
- static void Tstack(void) {
284
- if (show_stack) {
285
- char *stack = frt_get_stacktrace();
286
- if (stack) {
287
- append_to_msg_buf("\n\nStack trace:\n%s\n", stack);
288
- free(stack);
289
- }
290
- }
291
- }
292
-
293
282
  static void vTmsg_nf(const char *fmt, va_list args)
294
283
  {
295
284
  if (verbose) {
296
285
  vappend_to_msg_buf(fmt, args);
297
- Tstack();
298
286
  }
299
287
  }
300
288
 
@@ -305,8 +293,6 @@ void vTmsg(const char *fmt, va_list args)
305
293
  vappend_to_msg_buf(fmt, args);
306
294
  va_end(args);
307
295
  append_to_msg_buf("\n");
308
-
309
- Tstack();
310
296
  }
311
297
  }
312
298
 
@@ -348,8 +334,6 @@ void tst_msg(const char *func, const char *fname, int line_num, const char *fmt,
348
334
  va_start(args, fmt);
349
335
  vappend_to_msg_buf(fmt, args);
350
336
  va_end(args);
351
-
352
- Tstack();
353
337
  }
354
338
  }
355
339
 
@@ -145,50 +145,6 @@ static void test_dbl_to_s(TestCase *tc, void *data)
145
145
  Asequal("NaN", frt_dbl_to_s(buf, NAN));
146
146
  }
147
147
 
148
-
149
- /**
150
- * Generate a stacktrace, make sure it does something
151
- */
152
- static void test_stacktrace(TestCase *tc, void *data)
153
- {
154
- FILE *old_stream = frt_x_exception_stream;
155
- (void)data; /* suppress warning */
156
- int tfd = fio_tmpfile();
157
- frt_x_exception_stream = fdopen(tfd, "w+");
158
- Atrue(frt_x_exception_stream != NULL);
159
- if (frt_x_exception_stream) {
160
- frt_print_stacktrace();
161
- long int f = ftell(frt_x_exception_stream);
162
- Assert(f, "Stream position should not be 0");
163
- fclose(frt_x_exception_stream);
164
- }
165
- frt_x_exception_stream = old_stream;
166
- }
167
-
168
- /**
169
- * Generate a normally fatal signal, which gets caught
170
- */
171
- /*
172
- static void test_sighandler(TestCase *tc, void *data)
173
- {
174
- bool old_abort = frt_x_abort_on_exception;
175
- FILE *old_stream = frt_x_exception_stream;
176
- (void)data;
177
- (void)tc;
178
-
179
- frt_x_exception_stream = false;
180
- frt_x_exception_stream = tmpfile();
181
-
182
- raise(SIGSEGV);
183
-
184
- Assert(ftell(frt_x_exception_stream), "Stream position should not be 0");
185
- fclose(frt_x_exception_stream);
186
-
187
- frt_x_exception_stream = old_stream;
188
- frt_x_abort_on_exception = old_abort;
189
- }
190
- */
191
-
192
148
  static void test_count_leading_zeros(TestCase *tc, void *data)
193
149
  {
194
150
  (void)data;
@@ -284,8 +240,6 @@ TestSuite *ts_global(TestSuite *suite)
284
240
  tst_run_test(suite, test_alloc, NULL);
285
241
  tst_run_test(suite, test_strfmt, NULL);
286
242
  tst_run_test(suite, test_dbl_to_s, NULL);
287
- tst_run_test(suite, test_stacktrace, NULL);
288
- // tst_run_test(suite, test_sighandler, NULL);
289
243
  tst_run_test(suite, test_count_leading_zeros, NULL);
290
244
  tst_run_test(suite, test_count_leading_ones, NULL);
291
245
  tst_run_test(suite, test_count_trailing_zeros, NULL);
@@ -266,7 +266,6 @@ void tst_check_hits(TestCase *tc, FrtSearcher *searcher, FrtQuery *query, const
266
266
  int i, count;
267
267
  int total_hits = s2l(expected_hits, num_array);
268
268
  FrtTopDocs *top_docs = frt_searcher_search(searcher, query, 0, total_hits + 1, NULL, NULL, NULL);
269
- frt_p_pause();
270
269
  if (!tc->failed && !Aiequal(total_hits, top_docs->total_hits)) {
271
270
  int i;
272
271
  Tmsg_nf("\texpected docs:\n\t ");
@@ -314,7 +313,6 @@ void tst_check_hits(TestCase *tc, FrtSearcher *searcher, FrtQuery *query, const
314
313
  count = frt_searcher_search_unscored(searcher, query, num_array2, ARRAY_SIZE, num_array2[3]);
315
314
  Aaiequal(num_array + 3, num_array2, count);
316
315
  }
317
- frt_p_resume();
318
316
  }
319
317
 
320
318
  void check_match_vector(TestCase *tc, FrtSearcher *searcher, FrtQuery *query,
@@ -1,5 +1,5 @@
1
1
  module Isomorfeus
2
2
  module Ferret
3
- VERSION = '0.13.0'
3
+ VERSION = '0.13.1'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isomorfeus-ferret
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.13.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Biedermann
@@ -154,13 +154,13 @@ files:
154
154
  - ext/isomorfeus_ferret_ext/brotli_encode.h
155
155
  - ext/isomorfeus_ferret_ext/brotli_port.h
156
156
  - ext/isomorfeus_ferret_ext/brotli_types.h
157
- - ext/isomorfeus_ferret_ext/bzip_blocksort.c
158
- - ext/isomorfeus_ferret_ext/bzip_huffman.c
159
157
  - ext/isomorfeus_ferret_ext/bzlib.c
160
158
  - ext/isomorfeus_ferret_ext/bzlib.h
159
+ - ext/isomorfeus_ferret_ext/bzlib_blocksort.c
161
160
  - ext/isomorfeus_ferret_ext/bzlib_compress.c
162
161
  - ext/isomorfeus_ferret_ext/bzlib_crctable.c
163
162
  - ext/isomorfeus_ferret_ext/bzlib_decompress.c
163
+ - ext/isomorfeus_ferret_ext/bzlib_huffman.c
164
164
  - ext/isomorfeus_ferret_ext/bzlib_private.h
165
165
  - ext/isomorfeus_ferret_ext/bzlib_randtable.c
166
166
  - ext/isomorfeus_ferret_ext/extconf.rb