isomorfeus-ferret 0.13.0 → 0.13.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: be4c84d556459a8ed5d2585068378c156bdb2d68507ca2786844b2c69a4e7f35
4
- data.tar.gz: '0096ef29b274ea39567e876d95d8441ff80589a8d3403c5aed3801c62377cffd'
3
+ metadata.gz: 958c051fa7f771e25ee1fb29c47aad9ab1cd7a570750f73450ae582474ef6fb5
4
+ data.tar.gz: beba6b22ba7493f324be38997fee4f4a56efc896281fcbb2f3521ebef6795029
5
5
  SHA512:
6
- metadata.gz: 55aa9f39fd4971e1a80bbcdeba14906928140fd023a7ee3e581ef2bafc63a8a664f5b0fbaf86dec1119ac62fd441703d33a4b6ee3731f811736a1b2eedcef9a3
7
- data.tar.gz: 3843bd29450fb925069733913aee9f66958dfe41aba052d7d14a18cb7f2d4df376e0756d7c97bcd01527120a6a77776d9bee2a1c0a488648fc61f500dfe6e98e
6
+ metadata.gz: 98bef5c79e3d4e8854d6a4cb68e2cb796c6f27bbaf1c7b7669ab895e8656d12d033651f8d8b29ac8d7504b46dba7567aa52f2ac297285ff2c6c62e52c48d0019
7
+ data.tar.gz: 165497f356ab32cdc6250b3eada43d9ec36d61ee992114bffa9c38372c7894455caeb95aa789cf025c2538c7f9e1eddafe87e66b58536424a4c8e116a123af88
data/README.md CHANGED
@@ -26,7 +26,7 @@ It should work on *nixes, *nuxes, *BSDs and also works on Windows.
26
26
  - The :store option no longer accepts :compress, compression must now be specified by the separate :compress options (see below).
27
27
  - The ASCII-specific Tokenizers and Analyzers have been removed
28
28
 
29
- ### Sring Encoding support
29
+ ### String Encoding support
30
30
 
31
31
  #### Input strings and stored fields
32
32
 
@@ -37,12 +37,13 @@ All Ruby string encodings are supported.
37
37
  When fields are stored, they are now stored with the encoding, so that when they are retrieved again, they
38
38
  retain the original encoding with positions matching the string in its original encoding.
39
39
 
40
- #### Tokens and Filters
40
+ #### Tokens, Terms, Filters and Queries
41
41
 
42
42
  Tokens are internally converted to UTF-8, which may change their length compared to their original encoding,
43
- yet they retain position information according to the source in its original encoding.
44
- The benefit is, that Filters, Stemmers or anything else working with Tokens only needs to support UTF-8 encoding,
45
- greatly simplifying things and ensuring consistent query results.
43
+ yet they retain position information according to the source in its original encoding. Terms are likewise stored in UTF-8 encoding.
44
+ Queries are converted to UTF-8 encoding too.
45
+ The benefit is, that Filters, Stemmers or anything else working with Tokens and Terms only needs to support UTF-8 encoding,
46
+ greatly simplifying things and ensuring consistent query results, independent of source encoding.
46
47
 
47
48
  ### Compression
48
49
 
@@ -50,6 +51,7 @@ Compression semantics have changed, now Brotli, BZip2 and LZ4 compression codecs
50
51
  - BZip2: slow compression, slow decompression, high compression ratio
51
52
  - Brotli: slow compression, fast decrompression, high compression ratio, recommended for general purpose.
52
53
  - LZ4: fast compression, fast decrompression, low compression ratio
54
+
53
55
  To see performance and compression ratios `rake ferret_compression_bench` can be run from the cloned repo.
54
56
  It uses data and code within the misc/ferret_vs_lucene directory.
55
57
 
@@ -96,6 +98,7 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
96
98
 
97
99
  ## Benchmarks
98
100
 
101
+ ### Indexing and Searching
99
102
  - clone repo
100
103
  - bundle install
101
104
  - rake ferret_vs_lucene
@@ -104,20 +107,37 @@ A recent Java JDK must be installed to compile and run lucene benchmarks.
104
107
 
105
108
  Results on Linux:
106
109
  ```
107
- Ferret:
108
- Indexing Secs: 7.36 Docs: 19043, 2587 docs/s
109
- Searching took: 0.3366296s for 8000 queries
110
- thats 23765 q/s
111
-
112
- Lucene:
113
- Indexing Secs: 4.22 Docs: 19043, 4516 docs/s
114
- Searching took: 1.48s for 8000 queries
115
- thats 5420 q/s
116
- ---------------------------------------------------
117
- Lucene 9.0.0 0b18b3b965cedaf5eb129aa41243a44c83ca826d - jpountz - 2021-12-01 14:23:49
118
- JVM 17.0.1 (Private Build)
110
+ Ferret 0.13.0:
111
+ Indexing: 9.35 secs, Docs: 19043, 2035 docs/s
112
+ Searching took: 0.3133133s for 8000 queries
113
+ thats 25533 q/s
114
+ Total found: 42000
115
+ Index size: 28Mb
116
+
117
+ Lucene 9.1.0:
118
+ Indexing: 4.20 secs, Docs: 19043, 4538 docs/s
119
+ Searching took: 1.64s for 8000 queries
120
+ thats 4875 q/s
121
+ Total found: 41000
122
+ index size: 35Mb
123
+
124
+ JVM 11.0.14.1 (Ubuntu)
119
125
  ```
120
126
 
127
+ ### Storing Fields with Compression, Indexing and Retrieval
128
+ - clone repo
129
+ - bundle install
130
+ - rake ferret_compression_benchmark
131
+
132
+ Results on Linux, 0.13.0:
133
+
134
+ | Compression | Index & Store | Retrieve | Index size |
135
+ |-------------|---------------|---------------|------------|
136
+ | none | 2008 docs/s | 153853 docs/s | 43 MB |
137
+ | brotli | 1726 docs/s | 58315 docs/s | 36 MB |
138
+ | bzip2 | 1438 docs/s | 15382 docs/s | 38 MB |
139
+ | lz4 | 1932 docs/s | 127100 docs/s | 41 MB |
140
+
121
141
  ## Future
122
142
 
123
143
  Lots of things to do:
@@ -64,6 +64,7 @@ static ID id_fld_num_map;
64
64
  static ID id_field_num;
65
65
  static ID id_boost;
66
66
 
67
+ extern rb_encoding *utf8_encoding;
67
68
  extern void frb_set_term(VALUE rterm, FrtTerm *t);
68
69
  extern FrtAnalyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
69
70
  extern VALUE frb_get_analyzer(FrtAnalyzer *a);
@@ -181,8 +182,9 @@ static VALUE frb_get_field_info(FrtFieldInfo *fi) {
181
182
  fi->rfi = TypedData_Wrap_Struct(cFieldInfo, &frb_field_info_t, fi);
182
183
  FRT_REF(fi);
183
184
  }
185
+ return fi->rfi;
184
186
  }
185
- return fi->rfi;
187
+ return Qnil;
186
188
  }
187
189
 
188
190
  /*
@@ -411,8 +413,9 @@ static VALUE frb_get_field_infos(FrtFieldInfos *fis) {
411
413
  fis->rfis = TypedData_Wrap_Struct(cFieldInfos, &frb_field_infos_t, fis);
412
414
  FRT_REF(fis);
413
415
  }
416
+ return fis->rfis;
414
417
  }
415
- return fis->rfis;
418
+ return Qnil;
416
419
  }
417
420
 
418
421
  /*
@@ -496,11 +499,6 @@ static VALUE frb_fis_get(VALUE self, VALUE ridx) {
496
499
  case T_STRING:
497
500
  rfi = frb_get_field_info(frt_fis_get_field(fis, frb_field(ridx)));
498
501
  break;
499
- /*
500
- case T_STRING:
501
- rfi = frb_get_field_info(frt_fis_get_field(fis, StringValuePtr(ridx)));
502
- break;
503
- */
504
502
  default:
505
503
  rb_raise(rb_eArgError, "Can't index FieldInfos with %s",
506
504
  rs2s(rb_obj_as_string(ridx)));
@@ -1219,6 +1217,7 @@ static VALUE frb_get_tv_term(FrtTVTerm *tv_term) {
1219
1217
  VALUE rtext;
1220
1218
  VALUE rpositions = Qnil;
1221
1219
  rtext = rb_str_new2(tv_term->text);
1220
+ rb_enc_associate(rtext, utf8_encoding);
1222
1221
  if (tv_term->positions) {
1223
1222
  int *positions = tv_term->positions;
1224
1223
  rpositions = rb_ary_new2(freq);
@@ -1697,10 +1696,9 @@ frb_iw_delete(VALUE self, VALUE rfield, VALUE rterm)
1697
1696
  * Get the FieldInfos object for this FrtIndexWriter. This is useful if you need
1698
1697
  * to dynamically add new fields to the index with specific properties.
1699
1698
  */
1700
- static VALUE
1701
- frb_iw_field_infos(VALUE self)
1702
- {
1703
- FrtIndexWriter *iw = (FrtIndexWriter *)DATA_PTR(self);
1699
+ static VALUE frb_iw_field_infos(VALUE self) {
1700
+ FrtIndexWriter *iw;
1701
+ TypedData_Get_Struct(self, FrtIndexWriter, &frb_index_writer_t, iw);
1704
1702
  return frb_get_field_infos(iw->fis);
1705
1703
  }
1706
1704
 
@@ -2715,10 +2713,9 @@ frb_ir_fields(VALUE self)
2715
2713
  *
2716
2714
  * Get the FieldInfos object for this IndexReader.
2717
2715
  */
2718
- static VALUE
2719
- frb_ir_field_infos(VALUE self)
2720
- {
2721
- FrtIndexReader *ir = (FrtIndexReader *)DATA_PTR(self);
2716
+ static VALUE frb_ir_field_infos(VALUE self) {
2717
+ FrtIndexReader *ir;
2718
+ TypedData_Get_Struct(self, FrtIndexReader, &frb_index_reader_t, ir);
2722
2719
  return frb_get_field_infos(ir->fis);
2723
2720
  }
2724
2721
 
@@ -3085,10 +3082,6 @@ static void Init_TermDocEnum(void) {
3085
3082
  rb_define_method(cTermDocEnum, "to_json", frb_tde_to_json, -1);
3086
3083
  }
3087
3084
 
3088
- /* rdochack
3089
- cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
3090
- */
3091
-
3092
3085
  /*
3093
3086
  * Document-class: Ferret::Index::TermVector::TVOffsets
3094
3087
  *
@@ -3107,9 +3100,6 @@ cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
3107
3100
  */
3108
3101
  static void Init_TVOffsets(void) {
3109
3102
  const char *tv_offsets_class = "TVOffsets";
3110
- /* rdochack
3111
- cTVOffsets = rb_define_class_under(cTermVector, "TVOffsets", rb_cObject);
3112
- */
3113
3103
  cTVOffsets = rb_struct_define(tv_offsets_class, "start", "end", NULL);
3114
3104
  rb_set_class_path(cTVOffsets, cTermVector, tv_offsets_class);
3115
3105
  rb_const_set(mIndex, rb_intern(tv_offsets_class), cTVOffsets);
@@ -3130,13 +3120,8 @@ static void Init_TVOffsets(void) {
3130
3120
  * tv_term = tv.find {|tvt| tvt.term = "fox"}
3131
3121
  * offsets = tv_term.positions.collect {|pos| tv.offsets[pos]}
3132
3122
  */
3133
- static void
3134
- Init_TVTerm(void)
3135
- {
3123
+ static void Init_TVTerm(void) {
3136
3124
  const char *tv_term_class = "TVTerm";
3137
- /* rdochack
3138
- cTVTerm = rb_define_class_under(cTermVector, "TVTerm", rb_cObject);
3139
- */
3140
3125
  cTVTerm = rb_struct_define(tv_term_class, "text", "freq", "positions", NULL);
3141
3126
  rb_set_class_path(cTVTerm, cTermVector, tv_term_class);
3142
3127
  rb_const_set(mIndex, rb_intern(tv_term_class), cTVTerm);
@@ -3172,15 +3157,9 @@ Init_TVTerm(void)
3172
3157
  * particular that you need to store both positions and offsets if you want
3173
3158
  * to associate offsets with particular terms.
3174
3159
  */
3175
- static void
3176
- Init_TermVector(void)
3177
- {
3160
+ static void Init_TermVector(void) {
3178
3161
  const char *tv_class = "TermVector";
3179
- /* rdochack
3180
- cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
3181
- */
3182
- cTermVector = rb_struct_define(tv_class,
3183
- "field", "terms", "offsets", NULL);
3162
+ cTermVector = rb_struct_define(tv_class, "field", "terms", "offsets", NULL);
3184
3163
  rb_set_class_path(cTermVector, mIndex, tv_class);
3185
3164
  rb_const_set(mIndex, rb_intern(tv_class), cTermVector);
3186
3165
 
@@ -232,71 +232,6 @@ void frt_dummy_free(void *p) {
232
232
  (void)p; /* suppress unused argument warning */
233
233
  }
234
234
 
235
- #ifdef HAVE_GDB
236
- #define CMD_BUF_SIZE (128 + FILENAME_MAX)
237
- /* need to declare this as it is masked by default in linux */
238
-
239
- static char *build_shell_command(void) {
240
- int pid = getpid();
241
- char *buf = FRT_ALLOC_N(char, CMD_BUF_SIZE);
242
- char *command =
243
- "gdb -quiet -ex='bt' -ex='quit' %s %d 2>/dev/null | grep '^[ #]'";
244
-
245
- snprintf(buf, CMD_BUF_SIZE, command, frt_progname(), pid);
246
- return buf;
247
- }
248
-
249
- #endif
250
-
251
- /**
252
- * Call out to gdb to get our stacktrace.
253
- */
254
- char *frt_get_stacktrace(void) {
255
- #ifdef HAVE_GDB
256
- FILE *stream;
257
- char *gdb_filename = NULL, *buf = NULL, *stack = NULL;
258
- int offset = -FRT_BUFFER_SIZE;
259
-
260
- if ( !(buf = build_shell_command()) ) {
261
- fprintf(EXCEPTION_STREAM,
262
- "Unable to build stacktrace shell command\n");
263
- goto cleanup;
264
- }
265
-
266
- if ( !(stream = popen(buf, "r")) ) {
267
- fprintf(EXCEPTION_STREAM,
268
- "Unable to exec stacktrace shell command: '%s'\n", buf);
269
- goto cleanup;
270
- }
271
-
272
- do {
273
- offset += FRT_BUFFER_SIZE;
274
- FRT_REALLOC_N(stack, char, offset + FRT_BUFFER_SIZE);
275
- FRT_ZEROSET_N(stack + offset, char, FRT_BUFFER_SIZE);
276
- } while(fread(stack + offset, 1, FRT_BUFFER_SIZE, stream) == FRT_BUFFER_SIZE);
277
-
278
- pclose(stream);
279
-
280
- cleanup:
281
- if (gdb_filename) free(gdb_filename);
282
- if (buf) free(buf);
283
- return stack;
284
- #else
285
- return NULL;
286
- #endif
287
- }
288
-
289
- void frt_print_stacktrace(void) {
290
- char *stack = frt_get_stacktrace();
291
-
292
- if (stack) {
293
- fprintf(EXCEPTION_STREAM, "Stack trace:\n%s", stack);
294
- free(stack);
295
- } else {
296
- fprintf(EXCEPTION_STREAM, "Stack trace not available\n");
297
- }
298
- }
299
-
300
235
  typedef struct FreeMe {
301
236
  void *p;
302
237
  frt_free_ft free_func;
@@ -321,55 +256,7 @@ void frt_register_for_cleanup(void *p, frt_free_ft free_func) {
321
256
  free_me->free_func = free_func;
322
257
  }
323
258
 
324
- #define MAX_PROG_NAME 200
325
- static char name[MAX_PROG_NAME]; /* program name for error msgs */
326
-
327
- /* frt_setprogname: set stored name of program */
328
- void frt_setprogname(const char *str) {
329
- strncpy(name, str, sizeof(name) - 1);
330
- }
331
-
332
- const char *frt_progname(void) {
333
- return name;
334
- }
335
-
336
- static const char *signal_to_string(int signum) {
337
- switch (signum)
338
- {
339
- case SIGILL: return "SIGILL";
340
- case SIGABRT: return "SIGABRT";
341
- case SIGFPE: return "SIGFPE";
342
- #if !defined POSH_OS_WIN32 && !defined POSH_OS_WIN64
343
- case SIGBUS: return "SIGBUS";
344
- #endif
345
- case SIGSEGV: return "SIGSEGV";
346
- }
347
-
348
- return "Unknown Signal";
349
- }
350
-
351
- static void sighandler_crash(int signum) {
352
- frt_print_stacktrace();
353
- FRT_XEXIT("Signal", "Exiting on signal %s (%d)", signal_to_string(signum), signum);
354
- }
355
-
356
- #define SETSIG_IF_UNSET(sig, handler) do { \
357
- signal(sig, handler); \
358
- } while(0)
359
-
360
259
  void frt_init(int argc, const char *const argv[]) {
361
- if (argc > 0) {
362
- frt_setprogname(argv[0]);
363
- }
364
-
365
- SETSIG_IF_UNSET(SIGILL , sighandler_crash);
366
- SETSIG_IF_UNSET(SIGABRT, sighandler_crash);
367
- SETSIG_IF_UNSET(SIGFPE , sighandler_crash);
368
- #if !defined POSH_OS_WIN32 && !defined POSH_OS_WIN64
369
- SETSIG_IF_UNSET(SIGBUS , sighandler_crash);
370
- #endif
371
- SETSIG_IF_UNSET(SIGSEGV, sighandler_crash);
372
-
373
260
  atexit(&frt_hash_finalize);
374
261
 
375
262
  utf8_encoding = rb_enc_find("UTF-8");
@@ -429,41 +316,3 @@ void frt_init(int argc, const char *const argv[]) {
429
316
  FRT_SORT_FIELD_DOC_REV->compare = frt_sort_field_doc_compare; /* compare */
430
317
  FRT_SORT_FIELD_DOC_REV->get_val = frt_sort_field_doc_get_val; /* get_val */
431
318
  }
432
-
433
- /**
434
- * For general use when testing
435
- *
436
- * TODO wrap in #ifdef
437
- */
438
-
439
- static bool p_switch = false;
440
- static bool p_switch_tmp = false;
441
-
442
- void p(const char *format, ...) {
443
- va_list args;
444
-
445
- if (!p_switch) return;
446
-
447
- va_start(args, format);
448
- vfprintf(stderr, format, args);
449
- va_end(args);
450
- }
451
-
452
- void p_on(void) {
453
- fprintf(stderr, "> > > > > STARTING PRINT\n");
454
- p_switch = true;
455
- }
456
-
457
- void p_off(void) {
458
- fprintf(stderr, "< < < < < STOPPING PRINT\n");
459
- p_switch = false;
460
- }
461
-
462
- void frt_p_pause(void) {
463
- p_switch_tmp = p_switch;
464
- p_switch = false;
465
- }
466
-
467
- void frt_p_resume(void) {
468
- p_switch = p_switch_tmp;
469
- }
@@ -105,9 +105,6 @@ extern char *frt_dbl_to_s(char *buf, double num);
105
105
  extern char *frt_strfmt(const char *fmt, ...);
106
106
  extern char *frt_vstrfmt(const char *fmt, va_list args);
107
107
 
108
- extern char *frt_get_stacktrace();
109
- extern void frt_print_stacktrace();
110
-
111
108
  extern void frt_register_for_cleanup(void *p, frt_free_ft free_func);
112
109
 
113
110
  /**
@@ -277,18 +274,6 @@ extern bool frt_x_do_logging;
277
274
  #endif
278
275
 
279
276
  extern void frt_init(int arc, const char *const argv[]);
280
- extern void frt_setprogname(const char *str);
281
- extern const char *frt_progname();
282
277
  extern void frt_micro_sleep(const int micro_seconds);
283
278
 
284
- /**
285
- * For general use during testing. Switch this on and off for print statements
286
- * to only print when p_on is called and not after p_off is called
287
- */
288
- extern void p(const char *format, ...);
289
- extern void p_on();
290
- extern void p_off();
291
- extern void frt_p_pause();
292
- extern void frt_p_resume();
293
-
294
279
  #endif
@@ -14,6 +14,7 @@
14
14
  #undef close
15
15
  #undef read
16
16
 
17
+ extern rb_encoding *utf8_encoding;
17
18
  extern void frt_micro_sleep(const int micro_seconds);
18
19
 
19
20
  #define GET_LOCK(lock, name, store, err_msg) do {\
@@ -1710,8 +1711,7 @@ static FrtTermVector *frt_fr_read_term_vector(FrtFieldsReader *fr, int field_num
1710
1711
  total_len = delta_start + delta_len;
1711
1712
  frt_is_read_bytes(fdt_in, buffer + delta_start, delta_len);
1712
1713
  buffer[total_len++] = '\0';
1713
- term->text = (char *)memcpy(FRT_ALLOC_N(char, total_len),
1714
- buffer, total_len);
1714
+ term->text = (char *)memcpy(FRT_ALLOC_N(char, total_len), buffer, total_len);
1715
1715
 
1716
1716
  /* read freq */
1717
1717
  freq = term->freq = frt_is_read_vint(fdt_in);
@@ -1822,8 +1822,7 @@ FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num, int field_n
1822
1822
  *
1823
1823
  ****************************************************************************/
1824
1824
 
1825
- FrtFieldsWriter *frt_fw_open(FrtStore *store, const char *segment, FrtFieldInfos *fis)
1826
- {
1825
+ FrtFieldsWriter *frt_fw_open(FrtStore *store, const char *segment, FrtFieldInfos *fis) {
1827
1826
  FrtFieldsWriter *fw = FRT_ALLOC(FrtFieldsWriter);
1828
1827
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
1829
1828
  size_t segment_len = strlen(segment);
@@ -1844,8 +1843,7 @@ FrtFieldsWriter *frt_fw_open(FrtStore *store, const char *segment, FrtFieldInfos
1844
1843
  return fw;
1845
1844
  }
1846
1845
 
1847
- void frt_fw_close(FrtFieldsWriter *fw)
1848
- {
1846
+ void frt_fw_close(FrtFieldsWriter *fw) {
1849
1847
  frt_os_close(fw->fdt_out);
1850
1848
  frt_os_close(fw->fdx_out);
1851
1849
  frt_ram_destroy_buffer(fw->buffer);
@@ -2046,8 +2044,7 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc) {
2046
2044
  frt_ramo_write_to(fw->buffer, fdt_out);
2047
2045
  }
2048
2046
 
2049
- void frt_fw_write_tv_index(FrtFieldsWriter *fw)
2050
- {
2047
+ void frt_fw_write_tv_index(FrtFieldsWriter *fw) {
2051
2048
  int i;
2052
2049
  const int tv_cnt = frt_ary_size(fw->tv_fields);
2053
2050
  FrtOutStream *fdt_out = fw->fdt_out;
@@ -5548,9 +5545,24 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw, FrtFieldInverter *fld_inv, FrtDoc
5548
5545
  for (i = 0; i < df_size; i++) {
5549
5546
  int len = df->lengths[i];
5550
5547
  char *data_ptr = df->data[i];
5551
- if (len > FRT_MAX_WORD_SIZE) {
5552
- len = FRT_MAX_WORD_SIZE - 1;
5553
- data_ptr = (char *)memcpy(buf, df->data[i], len);
5548
+ if (df->encodings[i] == utf8_encoding) {
5549
+ if (len >= FRT_MAX_WORD_SIZE) {
5550
+ len = FRT_MAX_WORD_SIZE - 1; // TODO: this may invalidate mbc's
5551
+ data_ptr = (char *)memcpy(buf, df->data[i], len);
5552
+ buf[len] = '\0';
5553
+ }
5554
+ } else if (df->encodings[i] != utf8_encoding) {
5555
+ if (len >= FRT_MAX_WORD_SIZE)
5556
+ len = FRT_MAX_WORD_SIZE - 1;
5557
+ const unsigned char *sp = (unsigned char *)df->data[i];
5558
+ unsigned char *dp = (unsigned char *)&buf;
5559
+ rb_econv_t *ec = rb_econv_open(rb_enc_name(df->encodings[i]), "UTF-8", RUBY_ECONV_INVALID_REPLACE);
5560
+ assert(ec != NULL);
5561
+ rb_econv_convert(ec, &sp, (unsigned char *)df->data[i] + len, &dp, (unsigned char *)&buf + FRT_MAX_WORD_SIZE - 1, 0);
5562
+ rb_econv_close(ec);
5563
+ len = dp - (unsigned char *)&buf;
5564
+ buf[len] = '\0';
5565
+ data_ptr = buf;
5554
5566
  }
5555
5567
  dw_add_posting(mp, curr_plists, fld_plists, doc_num, data_ptr, len, i);
5556
5568
  if (store_offsets) {
@@ -61,7 +61,6 @@ void FRT_VEXIT(const char *err_type, const char *fmt, va_list args)
61
61
  # endif
62
62
  {
63
63
  fflush(stdout);
64
- fprintf(EXCEPTION_STREAM, "\n%s: ", frt_progname());
65
64
 
66
65
  # ifdef FRT_HAS_VARARGS
67
66
  fprintf(EXCEPTION_STREAM, "%s occurred at <%s>:%d in %s\n",
@@ -76,7 +75,6 @@ void FRT_VEXIT(const char *err_type, const char *fmt, va_list args)
76
75
  }
77
76
 
78
77
  fprintf(EXCEPTION_STREAM, "\n");
79
- frt_print_stacktrace();
80
78
  if (frt_x_abort_on_exception) {
81
79
  exit(2); /* conventional value for failed execution */
82
80
  }
@@ -1286,6 +1286,7 @@ FrtBooleanClause *frt_bc_alloc(void) {
1286
1286
  FrtBooleanClause *frt_bc_init(FrtBooleanClause *self, FrtQuery *query, FrtBCType occur) {
1287
1287
  self->ref_cnt = 1;
1288
1288
  self->query = query;
1289
+ self->rbc = Qnil;
1289
1290
  frt_bc_set_occur(self, occur);
1290
1291
  return self;
1291
1292
  }
@@ -250,9 +250,7 @@ void Init_Ferret(void) {
250
250
  }
251
251
 
252
252
  void Init_isomorfeus_ferret_ext(void) {
253
- const char *const progname[] = {"ruby"};
254
-
255
- frt_init(1, progname);
253
+ frt_init(0, NULL);
256
254
 
257
255
  /* IDs */
258
256
  id_new = rb_intern("new");
@@ -279,22 +279,10 @@ static void append_to_msg_buf(const char *fmt, ...)
279
279
  va_end(args);
280
280
  }
281
281
 
282
-
283
- static void Tstack(void) {
284
- if (show_stack) {
285
- char *stack = frt_get_stacktrace();
286
- if (stack) {
287
- append_to_msg_buf("\n\nStack trace:\n%s\n", stack);
288
- free(stack);
289
- }
290
- }
291
- }
292
-
293
282
  static void vTmsg_nf(const char *fmt, va_list args)
294
283
  {
295
284
  if (verbose) {
296
285
  vappend_to_msg_buf(fmt, args);
297
- Tstack();
298
286
  }
299
287
  }
300
288
 
@@ -305,8 +293,6 @@ void vTmsg(const char *fmt, va_list args)
305
293
  vappend_to_msg_buf(fmt, args);
306
294
  va_end(args);
307
295
  append_to_msg_buf("\n");
308
-
309
- Tstack();
310
296
  }
311
297
  }
312
298
 
@@ -348,8 +334,6 @@ void tst_msg(const char *func, const char *fname, int line_num, const char *fmt,
348
334
  va_start(args, fmt);
349
335
  vappend_to_msg_buf(fmt, args);
350
336
  va_end(args);
351
-
352
- Tstack();
353
337
  }
354
338
  }
355
339
 
@@ -145,50 +145,6 @@ static void test_dbl_to_s(TestCase *tc, void *data)
145
145
  Asequal("NaN", frt_dbl_to_s(buf, NAN));
146
146
  }
147
147
 
148
-
149
- /**
150
- * Generate a stacktrace, make sure it does something
151
- */
152
- static void test_stacktrace(TestCase *tc, void *data)
153
- {
154
- FILE *old_stream = frt_x_exception_stream;
155
- (void)data; /* suppress warning */
156
- int tfd = fio_tmpfile();
157
- frt_x_exception_stream = fdopen(tfd, "w+");
158
- Atrue(frt_x_exception_stream != NULL);
159
- if (frt_x_exception_stream) {
160
- frt_print_stacktrace();
161
- long int f = ftell(frt_x_exception_stream);
162
- Assert(f, "Stream position should not be 0");
163
- fclose(frt_x_exception_stream);
164
- }
165
- frt_x_exception_stream = old_stream;
166
- }
167
-
168
- /**
169
- * Generate a normally fatal signal, which gets caught
170
- */
171
- /*
172
- static void test_sighandler(TestCase *tc, void *data)
173
- {
174
- bool old_abort = frt_x_abort_on_exception;
175
- FILE *old_stream = frt_x_exception_stream;
176
- (void)data;
177
- (void)tc;
178
-
179
- frt_x_exception_stream = false;
180
- frt_x_exception_stream = tmpfile();
181
-
182
- raise(SIGSEGV);
183
-
184
- Assert(ftell(frt_x_exception_stream), "Stream position should not be 0");
185
- fclose(frt_x_exception_stream);
186
-
187
- frt_x_exception_stream = old_stream;
188
- frt_x_abort_on_exception = old_abort;
189
- }
190
- */
191
-
192
148
  static void test_count_leading_zeros(TestCase *tc, void *data)
193
149
  {
194
150
  (void)data;
@@ -284,8 +240,6 @@ TestSuite *ts_global(TestSuite *suite)
284
240
  tst_run_test(suite, test_alloc, NULL);
285
241
  tst_run_test(suite, test_strfmt, NULL);
286
242
  tst_run_test(suite, test_dbl_to_s, NULL);
287
- tst_run_test(suite, test_stacktrace, NULL);
288
- // tst_run_test(suite, test_sighandler, NULL);
289
243
  tst_run_test(suite, test_count_leading_zeros, NULL);
290
244
  tst_run_test(suite, test_count_leading_ones, NULL);
291
245
  tst_run_test(suite, test_count_trailing_zeros, NULL);
@@ -266,7 +266,6 @@ void tst_check_hits(TestCase *tc, FrtSearcher *searcher, FrtQuery *query, const
266
266
  int i, count;
267
267
  int total_hits = s2l(expected_hits, num_array);
268
268
  FrtTopDocs *top_docs = frt_searcher_search(searcher, query, 0, total_hits + 1, NULL, NULL, NULL);
269
- frt_p_pause();
270
269
  if (!tc->failed && !Aiequal(total_hits, top_docs->total_hits)) {
271
270
  int i;
272
271
  Tmsg_nf("\texpected docs:\n\t ");
@@ -314,7 +313,6 @@ void tst_check_hits(TestCase *tc, FrtSearcher *searcher, FrtQuery *query, const
314
313
  count = frt_searcher_search_unscored(searcher, query, num_array2, ARRAY_SIZE, num_array2[3]);
315
314
  Aaiequal(num_array + 3, num_array2, count);
316
315
  }
317
- frt_p_resume();
318
316
  }
319
317
 
320
318
  void check_match_vector(TestCase *tc, FrtSearcher *searcher, FrtQuery *query,
@@ -1,5 +1,5 @@
1
1
  module Isomorfeus
2
2
  module Ferret
3
- VERSION = '0.13.0'
3
+ VERSION = '0.13.3'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isomorfeus-ferret
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.13.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Biedermann
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-04-16 00:00:00.000000000 Z
11
+ date: 2022-04-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -154,13 +154,13 @@ files:
154
154
  - ext/isomorfeus_ferret_ext/brotli_encode.h
155
155
  - ext/isomorfeus_ferret_ext/brotli_port.h
156
156
  - ext/isomorfeus_ferret_ext/brotli_types.h
157
- - ext/isomorfeus_ferret_ext/bzip_blocksort.c
158
- - ext/isomorfeus_ferret_ext/bzip_huffman.c
159
157
  - ext/isomorfeus_ferret_ext/bzlib.c
160
158
  - ext/isomorfeus_ferret_ext/bzlib.h
159
+ - ext/isomorfeus_ferret_ext/bzlib_blocksort.c
161
160
  - ext/isomorfeus_ferret_ext/bzlib_compress.c
162
161
  - ext/isomorfeus_ferret_ext/bzlib_crctable.c
163
162
  - ext/isomorfeus_ferret_ext/bzlib_decompress.c
163
+ - ext/isomorfeus_ferret_ext/bzlib_huffman.c
164
164
  - ext/isomorfeus_ferret_ext/bzlib_private.h
165
165
  - ext/isomorfeus_ferret_ext/bzlib_randtable.c
166
166
  - ext/isomorfeus_ferret_ext/extconf.rb