isomorfeus-ferret 0.13.4 → 0.13.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19cfca9e5508c89a931678c23a5b0b32d8759d0d1839ac0ecf0b043d93cd1ddf
4
- data.tar.gz: 5999edb4c8ce46718264abdfac47cdd2bcc0f8e0ddf46a0af69bfba1ec024a1c
3
+ metadata.gz: 479f3df28144a3ca25194afc36590058e027af38ba43cc42d24ffd83a35a6da7
4
+ data.tar.gz: 95c80530e4012e0f7219e3b60cc82748f4e6d1824f0958cfda6cd4735c92a82a
5
5
  SHA512:
6
- metadata.gz: e8ab5a9574a3b610b4d554a94ecbdf3252a77d7947f81fdd21bdd0c87ed6bd0940f4842091f0bec793b4e061fe218706d0e16ef64ab19d85ca48d825a1867beb
7
- data.tar.gz: 474d0d57a321f3eaf47e372f76c7102592b85c7d0bf2e8506e7e3baf6ad3cbba2b5628296b5a3cf50b9bdba3031a2712225234143343cb9dbf80bc3781d00faa
6
+ metadata.gz: 0a72ddf5c974896ac0e5d11cdab105c02894f12ffac43db12b7651393f0bfd5ebd5f99f4e03e7714a8d230c6e77f71a1f5ee1c243b01608b23555a170eabe5c9
7
+ data.tar.gz: 58a803a8de3afae0b2bdf6e0eeda41f6a2e29dc649d8e219d2d7d5c667bd27bea3fa8dd80f16af934456e9414cd0f6a2ec9b998d303593db94f43de208a3eaff
data/README.md CHANGED
@@ -63,14 +63,8 @@ fis.add_field(:compressed_field, :store => :yes, :compression => :brotli, :term_
63
63
 
64
64
  ### Performance
65
65
 
66
- The encoding support demands its toll, indexing performance dropped a bit in comparision to 0.12, but still thousands of docs per second, depending on machine/docs.
67
- On Windows the indexing performance is still terrible, but that may be resolved in a future project.
68
-
69
- Search performance is still excellent and multiple times faster than Lucene.
70
-
71
- Lucene achieves roughly double the indexing performance. This seems to be because of the different way strings and
72
- encodings are handled in Java. For example, the Java WhitespaceTokenizer code requires only one method call per character (check for whitespace), but for Ruby, to support all the different encodings, several method calls are required per character (retrieve character according to encoding, check character for whitespace).
73
- Ferret is internally using the standard Ruby string encoding methods.
66
+ For version 0.13.7 the performance bottle-nack has been identified and removed, ferret now delivers excellent indexing perfomance on all platforms, see numbers below.
67
+ On Windows performance is still not as good as on Linux, but that is equally true for Lucene and because of how the Windows filesystem works.
74
68
 
75
69
  ## Documentation
76
70
 
@@ -105,43 +99,35 @@ Ensure your locale is set to C.UTF-8, because the internal c tests don't know ho
105
99
 
106
100
  A recent Java JDK must be installed to compile and run lucene benchmarks.
107
101
 
108
- Results on Linux:
109
- ```
110
- Ferret 0.13.0:
111
- Indexing: 9.35 secs, Docs: 19043, 2035 docs/s
112
- Searching took: 0.3133133s for 8000 queries
113
- thats 25533 q/s
114
- Total found: 42000
115
- Index size: 28Mb
116
-
117
- Lucene 9.1.0:
118
- Indexing: 4.20 secs, Docs: 19043, 4538 docs/s
119
- Searching took: 1.64s for 8000 queries
120
- thats 4875 q/s
121
- Total found: 41000
122
- index size: 35Mb
123
-
124
- JVM 11.0.14.1 (Ubuntu)
125
- ```
102
+ Results, Ferret 0.13.7 vs. Lucene 9.1.0, WhitespaceAnalyzer, on old Intel Core i5 from 2015:
103
+
104
+ | OS | Task | Ferret | Lucene* |
105
+ |---------|------------|-----------------|----------------|
106
+ | Linux | Indexing | 4905 docs/s | 4785 docs/s |
107
+ | Windows | Indexing | 2361 docs/s | 2395 docs/s |
108
+ | Linux | Searching | 25664 queries/s | 4708 queries/s |
109
+ | Windows | Searching | 3646 queries/s | 935 queries/s |
110
+ | | Index Size | 28 MB | 35 MB |
111
+
112
+ *Lucene 9.1.0 on JVM 11.0.14.1 (Ubuntu)
126
113
 
127
114
  ### Storing Fields with Compression, Indexing and Retrieval
128
115
  - clone repo
129
116
  - bundle install
130
117
  - rake ferret_compression_benchmark
131
118
 
132
- Results on Linux, 0.13.0:
119
+ Results on Linux, 0.13.7, on old Intel Core i5 from 2015:
133
120
 
134
121
  | Compression | Index & Store | Retrieve | Index size |
135
122
  |-------------|---------------|---------------|------------|
136
- | none | 2008 docs/s | 153853 docs/s | 43 MB |
137
- | brotli | 1726 docs/s | 58315 docs/s | 36 MB |
138
- | bzip2 | 1438 docs/s | 15382 docs/s | 38 MB |
139
- | lz4 | 1932 docs/s | 127100 docs/s | 41 MB |
123
+ | none | 4866 docs/s | 153853 docs/s | 43 MB |
124
+ | brotli | 3539 docs/s | 58315 docs/s | 36 MB |
125
+ | bzip2 | 2624 docs/s | 15382 docs/s | 38 MB |
126
+ | lz4 | 4639 docs/s | 127100 docs/s | 41 MB |
140
127
 
141
128
  ## Future
142
129
 
143
130
  Lots of things to do:
144
- - Improve indexing performance on Windows (WriteFile is terribly slow, maybe use mapping, see libuv)
145
131
  - Bring documentation in order in a docs directory
146
132
  - Review code (especially for memory/stack issues, typical c issues)
147
133
  - Take care of ruby GVL and threading
@@ -117,13 +117,13 @@ static void bm_single_run(BenchMarkUnit *unit, BenchMarkTimes *bm_times)
117
117
  struct rusage ru_before, ru_after;
118
118
 
119
119
  if (gettimeofday(&tv_before, NULL) == -1)
120
- FRT_RAISE(FRT_UNSUPPORTED_ERROR, "gettimeofday failed\n");
120
+ rb_raise(rb_eRuntimeError, "gettimeofday failed\n");
121
121
  getrusage(RUSAGE_SELF, &ru_before);
122
122
 
123
123
  unit->run();
124
124
 
125
125
  if (gettimeofday(&tv_after, NULL) == -1)
126
- FRT_RAISE(FRT_UNSUPPORTED_ERROR, "gettimeofday failed\n");
126
+ rb_raise(rb_eRuntimeError, "gettimeofday failed\n");
127
127
  getrusage(RUSAGE_SELF, &ru_after);
128
128
 
129
129
  bm_times->rtime = TVAL_TO_SEC(tv_before, tv_after);
@@ -1271,11 +1271,11 @@ void frb_iw_free(void *p) {
1271
1271
 
1272
1272
  void frb_iw_mark(void *p) {
1273
1273
  FrtIndexWriter *iw = (FrtIndexWriter *)p;
1274
- if (iw->analyzer->ranalyzer)
1274
+ if (iw->analyzer && iw->analyzer->ranalyzer)
1275
1275
  rb_gc_mark(iw->analyzer->ranalyzer);
1276
- if (iw->store->rstore)
1276
+ if (iw->store && iw->store->rstore)
1277
1277
  rb_gc_mark(iw->store->rstore);
1278
- if (iw->fis->rfis)
1278
+ if (iw->fis && iw->fis->rfis)
1279
1279
  rb_gc_mark(iw->fis->rfis);
1280
1280
  }
1281
1281
 
@@ -329,7 +329,7 @@ static void cw_copy_file(FrtCompoundWriter *cw, FrtCWFileEntry *src, FrtOutStrea
329
329
  frt_is_close(is);
330
330
  }
331
331
 
332
- void frt_cw_close(FrtCompoundWriter *cw)
332
+ void frt_cw_close(FrtCompoundWriter *cw, FrtDeleter *dlr)
333
333
  {
334
334
  FrtOutStream *os = NULL;
335
335
  int i;
@@ -356,6 +356,9 @@ void frt_cw_close(FrtCompoundWriter *cw)
356
356
  for (i = 0; i < frt_ary_size(cw->file_entries); i++) {
357
357
  cw->file_entries[i].data_offset = frt_os_pos(os);
358
358
  cw_copy_file(cw, &cw->file_entries[i], os);
359
+ if (dlr) {
360
+ frt_deleter_queue_file(dlr, cw->file_entries[i].name);
361
+ }
359
362
  }
360
363
 
361
364
  /* Write the data offsets into the directory of the compound stream */
@@ -19,7 +19,6 @@ const char *const ERROR_TYPES[] = {
19
19
  "Lock Error"
20
20
  };
21
21
 
22
- bool frt_x_do_logging = false;
23
22
  bool frt_x_abort_on_exception = true;
24
23
  bool frt_x_has_aborted = false;
25
24
  FILE *frt_x_exception_stream = NULL;
@@ -272,15 +272,13 @@ static const struct FrtOutStreamMethods FS_OUT_STREAM_METHODS = {
272
272
  static FrtOutStream *fs_new_output(FrtStore *store, const char *filename)
273
273
  {
274
274
  char path[FRT_MAX_FILE_PATH];
275
- int fd = open(join_path(path, store->dir.path, filename),
276
- O_WRONLY | O_CREAT | O_BINARY, store->file_mode);
277
- FrtOutStream *os;
275
+ int fd = open(join_path(path, store->dir.path, filename), O_WRONLY | O_CREAT | O_BINARY, store->file_mode);
278
276
  if (fd < 0) {
279
277
  FRT_RAISE(FRT_IO_ERROR, "couldn't create OutStream %s: <%s>",
280
278
  path, strerror(errno));
281
279
  }
282
280
 
283
- os = frt_os_new();
281
+ FrtOutStream *os = frt_os_new();
284
282
  os->file.fd = fd;
285
283
  os->m = &FS_OUT_STREAM_METHODS;
286
284
  return os;
@@ -418,9 +416,7 @@ static void fs_close_lock_i(FrtLock *lock)
418
416
 
419
417
  static FrtHash *stores = NULL;
420
418
 
421
- #ifndef UNTHREADED
422
419
  static frt_mutex_t stores_mutex = FRT_MUTEX_INITIALIZER;
423
- #endif
424
420
 
425
421
  static void fs_close_i(FrtStore *store)
426
422
  {
@@ -13,7 +13,7 @@
13
13
 
14
14
  #define FRT_MAX_WORD_SIZE 255
15
15
  #define FRT_MAX_FILE_PATH 1024
16
- #define FRT_BUFFER_SIZE 1024
16
+ #define FRT_BUFFER_SIZE 16384
17
17
 
18
18
  typedef enum {
19
19
  FRT_COMPRESSION_NONE = 0,
@@ -266,13 +266,6 @@ extern FILE *frt_x_exception_stream;
266
266
  #define EXCEPTION 2
267
267
  #define EXCEPTION_STREAM (frt_x_exception_stream ? frt_x_exception_stream : stderr)
268
268
 
269
- #ifdef DEBUG
270
- extern bool frt_x_do_logging;
271
- #define xlog if (frt_x_do_logging) printf
272
- #else
273
- #define xlog()
274
- #endif
275
-
276
269
  extern void frt_init(int arc, const char *const argv[]);
277
270
  extern void frt_micro_sleep(const int micro_seconds);
278
271
 
@@ -681,15 +681,13 @@ static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num)
681
681
  if (field_num >= si->norm_gens_size
682
682
  || 0 > (norm_gen = si->norm_gens[field_num])) {
683
683
  return NULL;
684
- }
685
- else {
684
+ } else {
686
685
  const char *ext = (si->use_compound_file && norm_gen > 0) ? "s" : "f";
687
686
  return fn_for_gen_field(buf, si->name, ext, norm_gen, field_num);
688
687
  }
689
688
  }
690
689
 
691
- static void deleter_queue_file(FrtDeleter *dlr, const char *file_name);
692
- #define DEL(file_name) deleter_queue_file(dlr, file_name)
690
+ void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name);
693
691
 
694
692
  static void si_delete_files(FrtSegmentInfo *si, FrtFieldInfos *fis, FrtDeleter *dlr)
695
693
  {
@@ -700,7 +698,7 @@ static void si_delete_files(FrtSegmentInfo *si, FrtFieldInfos *fis, FrtDeleter *
700
698
 
701
699
  for (i = si->norm_gens_size - 1; i >= 0; i--) {
702
700
  if (0 <= si->norm_gens[i]) {
703
- DEL(si_norm_file_name(si, file_name, fis->fields[i]->number));
701
+ frt_deleter_queue_file(dlr, si_norm_file_name(si, file_name, fis->fields[i]->number));
704
702
  }
705
703
  }
706
704
 
@@ -710,15 +708,14 @@ static void si_delete_files(FrtSegmentInfo *si, FrtFieldInfos *fis, FrtDeleter *
710
708
 
711
709
  if (si->use_compound_file) {
712
710
  memcpy(ext, "cfs", 4);
713
- DEL(file_name);
711
+ frt_deleter_queue_file(dlr, file_name);
714
712
  if (0 <= si->del_gen) {
715
- DEL(frt_fn_for_generation(file_name, si->name, "del", si->del_gen));
713
+ frt_deleter_queue_file(dlr, frt_fn_for_generation(file_name, si->name, "del", si->del_gen));
716
714
  }
717
- }
718
- else {
715
+ } else {
719
716
  for (i = FRT_NELEMS(INDEX_EXTENSIONS) - 1; i >= 0; i--) {
720
717
  memcpy(ext, INDEX_EXTENSIONS[i], 4);
721
- DEL(file_name);
718
+ frt_deleter_queue_file(dlr, file_name);
722
719
  }
723
720
  }
724
721
  }
@@ -3695,8 +3692,7 @@ static bool file_name_filter_is_cfs_file(const char *file_name) {
3695
3692
  ****************************************************************************/
3696
3693
 
3697
3694
  #define DELETABLE_START_CAPA 8
3698
- FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store)
3699
- {
3695
+ FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store) {
3700
3696
  FrtDeleter *dlr = FRT_ALLOC(FrtDeleter);
3701
3697
  dlr->sis = sis;
3702
3698
  dlr->store = store;
@@ -3704,19 +3700,16 @@ FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store)
3704
3700
  return dlr;
3705
3701
  }
3706
3702
 
3707
- void frt_deleter_destroy(FrtDeleter *dlr)
3708
- {
3703
+ void frt_deleter_destroy(FrtDeleter *dlr) {
3709
3704
  frt_hs_destroy(dlr->pending);
3710
3705
  free(dlr);
3711
3706
  }
3712
3707
 
3713
- static void deleter_queue_file(FrtDeleter *dlr, const char *file_name)
3714
- {
3708
+ void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name) {
3715
3709
  frt_hs_add(dlr->pending, frt_estrdup(file_name));
3716
3710
  }
3717
3711
 
3718
- void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name)
3719
- {
3712
+ void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name) {
3720
3713
  FrtStore *store = dlr->store;
3721
3714
  FRT_TRY
3722
3715
  if (store->exists(store, file_name)) {
@@ -3728,8 +3721,7 @@ void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name)
3728
3721
  FRT_XENDTRY
3729
3722
  }
3730
3723
 
3731
- static void deleter_commit_pending_deletions(FrtDeleter *dlr)
3732
- {
3724
+ static void deleter_commit_pending_deletions(FrtDeleter *dlr) {
3733
3725
  FrtHashSetEntry *hse, *hse_next = dlr->pending->first;
3734
3726
  while ((hse = hse_next) != NULL) {
3735
3727
  hse_next = hse->next;
@@ -3737,23 +3729,13 @@ static void deleter_commit_pending_deletions(FrtDeleter *dlr)
3737
3729
  }
3738
3730
  }
3739
3731
 
3740
- void frt_deleter_delete_files(FrtDeleter *dlr, char **files, int file_cnt)
3741
- {
3742
- int i;
3743
- for (i = file_cnt - 1; i >= 0; i--) {
3744
- deleter_queue_file(dlr, files[i]);
3745
- }
3746
- deleter_commit_pending_deletions(dlr);
3747
- }
3748
-
3749
3732
  struct DelFilesArg {
3750
3733
  char curr_seg_file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
3751
3734
  FrtDeleter *dlr;
3752
3735
  FrtHash *current;
3753
3736
  };
3754
3737
 
3755
- static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
3756
- {
3738
+ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg) {
3757
3739
  struct DelFilesArg *dfa = (struct DelFilesArg *)arg;
3758
3740
  FrtDeleter *dlr = dfa->dlr;
3759
3741
 
@@ -3773,8 +3755,7 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
3773
3755
  if (NULL != p) {
3774
3756
  *p = '\0';
3775
3757
  extension = p + 1;
3776
- }
3777
- else {
3758
+ } else {
3778
3759
  extension = NULL;
3779
3760
  }
3780
3761
 
@@ -3789,16 +3770,14 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
3789
3770
  if (NULL == (si = (FrtSegmentInfo *)frt_h_get(dfa->current, segment_name))) {
3790
3771
  /* Delete if segment is not referenced: */
3791
3772
  do_delete = true;
3792
- }
3793
- else {
3773
+ } else {
3794
3774
  char tmp_fn[FRT_SEGMENT_NAME_MAX_LENGTH];
3795
3775
  /* OK, segment is referenced, but file may still be orphan'd: */
3796
3776
  if (file_name_filter_is_cfs_file(file_name)
3797
3777
  && si->use_compound_file) {
3798
3778
  /* This file is stored in a CFS file for this segment: */
3799
3779
  do_delete = true;
3800
- }
3801
- else if (0 == strcmp("del", extension)) {
3780
+ } else if (0 == strcmp("del", extension)) {
3802
3781
  /* This is a _segmentName_N.del file: */
3803
3782
  if (!frt_fn_for_generation(tmp_fn, segment_name, "del", si->del_gen)
3804
3783
  || 0 != strcmp(file_name, tmp_fn)) {
@@ -3807,8 +3786,7 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
3807
3786
  * this segment, then delete it: */
3808
3787
  do_delete = true;
3809
3788
  }
3810
- }
3811
- else if (NULL != extension
3789
+ } else if (NULL != extension
3812
3790
  && ('s' == *extension || 'f' == *extension)
3813
3791
  && isdigit(extension[1])) {
3814
3792
  si_norm_file_name(si, tmp_fn, atoi(extension + 1));
@@ -3817,15 +3795,14 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
3817
3795
  /* This is an orphan'd norms file: */
3818
3796
  do_delete = true;
3819
3797
  }
3820
- }
3821
- else if (0 == strcmp("cfs", extension) && !si->use_compound_file) {
3798
+ } else if (0 == strcmp("cfs", extension) && !si->use_compound_file) {
3822
3799
  /* This is a partially written _segmentName.cfs: */
3823
3800
  do_delete = true;
3824
3801
  }
3825
3802
  }
3826
3803
 
3827
3804
  if (do_delete) {
3828
- deleter_queue_file(dlr, file_name);
3805
+ frt_deleter_queue_file(dlr, file_name);
3829
3806
  }
3830
3807
  }
3831
3808
  }
@@ -3837,8 +3814,7 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
3837
3814
  * create the unused file (eg when merging segments), and we only remove from
3838
3815
  * deletable when a file is successfully deleted.
3839
3816
  */
3840
- void frt_deleter_find_deletable_files(FrtDeleter *dlr)
3841
- {
3817
+ void frt_deleter_find_deletable_files(FrtDeleter *dlr) {
3842
3818
  /* Gather all "current" segments: */
3843
3819
  int i;
3844
3820
  FrtSegmentInfos *sis = dlr->sis;
@@ -4207,7 +4183,7 @@ static void norm_rewrite(Norm *norm, FrtStore *store, FrtDeleter *dlr,
4207
4183
  const int field_num = norm->field_num;
4208
4184
 
4209
4185
  if (si_norm_file_name(si, norm_file_name, field_num)) {
4210
- deleter_queue_file(dlr, norm_file_name);
4186
+ frt_deleter_queue_file(dlr, norm_file_name);
4211
4187
  }
4212
4188
  frt_si_advance_norm_gen(si, field_num);
4213
4189
  si_norm_file_name(si, norm_file_name, field_num);
@@ -4363,7 +4339,7 @@ static void sr_commit_i(FrtIndexReader *ir)
4363
4339
  if (SR(ir)->undelete_all || SR(ir)->deleted_docs_dirty) {
4364
4340
  if (si->del_gen >= 0) {
4365
4341
  frt_fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
4366
- deleter_queue_file(ir->deleter, tmp_file_name);
4342
+ frt_deleter_queue_file(ir->deleter, tmp_file_name);
4367
4343
  }
4368
4344
  if (SR(ir)->undelete_all) {
4369
4345
  si->del_gen = -1;
@@ -5687,7 +5663,6 @@ static SegmentMergeInfo *smi_new(int base, FrtStore *store, FrtSegmentInfo *si)
5687
5663
  smi->store = frt_open_cmpd_store(store, file_name);
5688
5664
  }
5689
5665
 
5690
-
5691
5666
  sprintf(file_name, "%s.fdx", segment);
5692
5667
  smi->doc_cnt = smi->max_doc
5693
5668
  = smi->store->length(smi->store, file_name) / FIELDS_IDX_PTR_SIZE;
@@ -6116,14 +6091,7 @@ int frt_iw_doc_count(FrtIndexWriter *iw)
6116
6091
  return doc_cnt;
6117
6092
  }
6118
6093
 
6119
- #define MOVE_TO_COMPOUND_DIR(file_name)\
6120
- deleter_queue_file(dlr, file_name);\
6121
- frt_cw_add_file(cw, file_name)
6122
-
6123
- static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis,
6124
- FrtSegmentInfo *si, char *cfs_file_name,
6125
- FrtDeleter *dlr)
6126
- {
6094
+ static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis, FrtSegmentInfo *si, char *cfs_file_name, FrtDeleter *dlr) {
6127
6095
  int i;
6128
6096
  FrtCompoundWriter *cw;
6129
6097
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
@@ -6137,19 +6105,18 @@ static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis,
6137
6105
  cw = frt_open_cw(store, cfs_file_name);
6138
6106
  for (i = 0; i < FRT_NELEMS(COMPOUND_EXTENSIONS); i++) {
6139
6107
  memcpy(ext, COMPOUND_EXTENSIONS[i], 4);
6140
- MOVE_TO_COMPOUND_DIR(file_name);
6108
+ frt_cw_add_file(cw, file_name);
6141
6109
  }
6142
6110
 
6143
6111
  /* Field norm file_names */
6144
6112
  for (i = fis->size - 1; i >= 0; i--) {
6145
- if (fi_has_norms(fis->fields[i])
6146
- && si_norm_file_name(si, file_name, i)) {
6147
- MOVE_TO_COMPOUND_DIR(file_name);
6113
+ if (fi_has_norms(fis->fields[i]) && si_norm_file_name(si, file_name, i)) {
6114
+ frt_cw_add_file(cw, file_name);
6148
6115
  }
6149
6116
  }
6150
6117
 
6151
6118
  /* Perform the merge */
6152
- frt_cw_close(cw);
6119
+ frt_cw_close(cw, dlr);
6153
6120
  }
6154
6121
 
6155
6122
  static void iw_commit_compound_file(FrtIndexWriter *iw, FrtSegmentInfo *si)
@@ -627,9 +627,9 @@ struct FrtDeleter {
627
627
 
628
628
  extern FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store);
629
629
  extern void frt_deleter_destroy(FrtDeleter *dlr);
630
+ extern void frt_deleter_queue_file(FrtDeleter *dlr, const char *file_name);
630
631
  extern void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name);
631
632
  extern void frt_deleter_find_deletable_files(FrtDeleter *dlr);
632
- extern void frt_deleter_delete_files(FrtDeleter *dlr, char **files, int file_cnt);
633
633
 
634
634
  /****************************************************************************
635
635
  *
@@ -859,6 +859,6 @@ typedef struct FrtCompoundWriter {
859
859
 
860
860
  extern FrtCompoundWriter *frt_open_cw(FrtStore *store, char *name);
861
861
  extern void frt_cw_add_file(FrtCompoundWriter *cw, char *id);
862
- extern void frt_cw_close(FrtCompoundWriter *cw);
862
+ extern void frt_cw_close(FrtCompoundWriter *cw, FrtDeleter *dlr);
863
863
 
864
864
  #endif
@@ -43,7 +43,14 @@ void *frt_erealloc(void *ptr, size_t size)
43
43
  return p;
44
44
  }
45
45
 
46
- /* void micro_sleep(const int micro_seconds)
46
+
47
+ struct timeval rb_time_interval _((VALUE));
48
+
49
+ extern void frt_micro_sleep(const int micro_seconds) {
50
+ rb_thread_wait_for(rb_time_interval(rb_float_new((double)micro_seconds/1000000.0)));
51
+ }
52
+
53
+ /* void frt_micro_sleep(const int micro_seconds)
47
54
  {
48
55
  #if (defined POSH_OS_WIN32 || defined POSH_OS_WIN64) && !defined __MINGW32__
49
56
  Sleep(micro_seconds / 1000);
@@ -149,7 +149,7 @@ void frt_os_seek(FrtOutStream *os, off_t new_pos)
149
149
  */
150
150
  void frt_os_write_byte(FrtOutStream *os, frt_uchar b)
151
151
  {
152
- if (os->buf.pos >= FRT_BUFFER_SIZE) {
152
+ if (os->buf.pos >= (FRT_BUFFER_SIZE - 1)) {
153
153
  frt_os_flush(os);
154
154
  }
155
155
  write_byte(os, b);
@@ -157,15 +157,12 @@ void frt_os_write_byte(FrtOutStream *os, frt_uchar b)
157
157
 
158
158
  void frt_os_write_bytes(FrtOutStream *os, const frt_uchar *buf, int len)
159
159
  {
160
- if (os->buf.pos > 0) { /* flush buffer */
161
- frt_os_flush(os);
162
- }
163
-
164
- if (len < FRT_BUFFER_SIZE) {
165
- os->m->flush_i(os, buf, len);
166
- os->buf.start += len;
160
+ if (len < (FRT_BUFFER_SIZE - os->buf.pos)) {
161
+ memcpy(os->buf.buf + os->buf.pos, buf, len);
162
+ os->buf.pos += len;
167
163
  }
168
164
  else {
165
+ frt_os_flush(os);
169
166
  int pos = 0;
170
167
  int size;
171
168
  while (pos < len) {
@@ -5,7 +5,7 @@
5
5
  #include "frt_global.h"
6
6
  #include "frt_hash.h"
7
7
  #include "frt_hashset.h"
8
- #include "frb_threading.h"
8
+ #include "frt_threading.h"
9
9
 
10
10
  #define FRT_LOCK_PREFIX "ferret-"
11
11
  #define FRT_LOCK_EXT ".lck"
@@ -4,8 +4,8 @@
4
4
  #include "frt_except.h"
5
5
  #include "frt_hash.h"
6
6
  #include "frt_hashset.h"
7
- #include "frb_threading.h"
8
- #include "frb_lang.h"
7
+ #include "frt_threading.h"
8
+ #include "ruby.h"
9
9
 
10
10
  /* IDs */
11
11
  ID id_new;
@@ -1,10 +1,9 @@
1
1
  #ifndef __FERRET_H_
2
2
  #define __FERRET_H_
3
- #include <ruby.h>
4
3
  #include "frt_global.h"
5
4
  #include "frt_hashset.h"
6
5
  #include "frt_document.h"
7
- #include "frb_lang.h"
6
+ #include <ruby.h>
8
7
 
9
8
  /* IDs */
10
9
  extern ID id_new;
@@ -810,7 +810,7 @@ void Init_Test(void) {
810
810
  rb_define_singleton_method(mTest, "filter", frb_ts_filter, 0);
811
811
  rb_define_singleton_method(mTest, "fs_store", frb_ts_fs_store, 0);
812
812
  rb_define_singleton_method(mTest, "global", frb_ts_global, 0);
813
- rb_define_singleton_method(mTest, "hash", frb_ts_hash, 0);
813
+ rb_define_singleton_method(mTest, "test_hash", frb_ts_hash, 0);
814
814
  rb_define_singleton_method(mTest, "hashset", frb_ts_hashset, 0);
815
815
  rb_define_singleton_method(mTest, "helper", frb_ts_helper, 0);
816
816
  rb_define_singleton_method(mTest, "highlighter", frb_ts_highlighter, 0);
@@ -48,7 +48,7 @@ void test_compound_writer(TestCase *tc, void *data)
48
48
  cw = frt_open_cw(store, (char *)"cfile");
49
49
  frt_cw_add_file(cw, (char *)"file1");
50
50
  frt_cw_add_file(cw, (char *)"file2");
51
- frt_cw_close(cw);
51
+ frt_cw_close(cw, NULL);
52
52
 
53
53
  is = store->open_input(store, "cfile");
54
54
  Aiequal(2, frt_is_read_vint(is));
@@ -94,7 +94,7 @@ void test_compound_io(TestCase *tc, void *data)
94
94
  frt_cw_add_file(cw, (char *)"file1");
95
95
  frt_cw_add_file(cw, (char *)"file2");
96
96
  frt_cw_add_file(cw, (char *)"file3");
97
- frt_cw_close(cw);
97
+ frt_cw_close(cw, NULL);
98
98
 
99
99
  c_reader = frt_open_cmpd_store(store, "cfile");
100
100
  is1 = c_reader->open_input(c_reader, "file1");
@@ -137,7 +137,7 @@ void test_compound_io_many_files(TestCase *tc, void *data)
137
137
  frt_os_write_vint(os, MAGIC);
138
138
  frt_os_close(os);
139
139
  }
140
- frt_cw_close(cw);
140
+ frt_cw_close(cw, NULL);
141
141
 
142
142
  c_reader = frt_open_cmpd_store(store, "_.cfs");
143
143
  for (i = 0; i < TEST_FILE_CNT; i++) {
@@ -45,6 +45,7 @@ static void test_hash_str(TestCase *tc, void *data)
45
45
 
46
46
  size_t res;
47
47
  f = temp_open();
48
+ Assert(f != NULL, "Could not open tempfile");
48
49
  frt_h_str_print_keys(h, f);
49
50
  fseek(f, 0, SEEK_SET);
50
51
  res = fread(buf, 1, 100, f);
@@ -175,7 +175,7 @@ TestSuite *ts_threading(TestSuite *suite)
175
175
 
176
176
  tst_run_test(suite, test_number_to_str, NULL);
177
177
  tst_run_test(suite, test_threading_test, index);
178
- // tst_run_test(suite, test_threading, index);
178
+ tst_run_test(suite, test_threading, index);
179
179
 
180
180
  frt_index_destroy(index);
181
181
 
@@ -1,5 +1,5 @@
1
1
  module Isomorfeus
2
2
  module Ferret
3
- VERSION = '0.13.4'
3
+ VERSION = '0.13.7'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isomorfeus-ferret
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.4
4
+ version: 0.13.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Biedermann
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-04-19 00:00:00.000000000 Z
11
+ date: 2022-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -167,12 +167,9 @@ files:
167
167
  - ext/isomorfeus_ferret_ext/fio_tmpfile.h
168
168
  - ext/isomorfeus_ferret_ext/frb_analysis.c
169
169
  - ext/isomorfeus_ferret_ext/frb_index.c
170
- - ext/isomorfeus_ferret_ext/frb_lang.c
171
- - ext/isomorfeus_ferret_ext/frb_lang.h
172
170
  - ext/isomorfeus_ferret_ext/frb_qparser.c
173
171
  - ext/isomorfeus_ferret_ext/frb_search.c
174
172
  - ext/isomorfeus_ferret_ext/frb_store.c
175
- - ext/isomorfeus_ferret_ext/frb_threading.h
176
173
  - ext/isomorfeus_ferret_ext/frb_utils.c
177
174
  - ext/isomorfeus_ferret_ext/frt_analysis.c
178
175
  - ext/isomorfeus_ferret_ext/frt_analysis.h
@@ -1,9 +0,0 @@
1
- #include "frt_except.h"
2
- #include "frb_lang.h"
3
-
4
- struct timeval rb_time_interval _((VALUE));
5
-
6
- extern void frt_micro_sleep(const int micro_seconds)
7
- {
8
- rb_thread_wait_for(rb_time_interval(rb_float_new((double)micro_seconds/1000000.0)));
9
- }
@@ -1,17 +0,0 @@
1
- #ifndef FRT_LANG_H
2
- #define FRT_LANG_H
3
-
4
- #define RUBY_BINDINGS 1
5
-
6
- #include <stdarg.h>
7
- #include <ruby.h>
8
-
9
- #undef close
10
- #undef rename
11
- #undef read
12
-
13
- #define frt_emalloc xmalloc
14
- #define frt_ecalloc(n) xcalloc(n, 1)
15
- #define frt_erealloc xrealloc
16
-
17
- #endif
@@ -1,29 +0,0 @@
1
- #ifndef FRT_THREADING_H
2
- #define FRT_THREADING_H
3
-
4
- #include "frt_hash.h"
5
- #define UNTHREADED 1
6
-
7
- typedef void * frt_mutex_t;
8
- typedef struct FrtHash *frt_thread_key_t;
9
- typedef int frt_thread_once_t;
10
- #define FRT_MUTEX_INITIALIZER NULL
11
- #define FRT_THREAD_ONCE_INIT 1;
12
- #define frt_mutex_init(a, b)
13
- #define frt_mutex_lock(a)
14
- #define frt_mutex_trylock(a)
15
- #define frt_mutex_unlock(a)
16
- #define frt_mutex_destroy(a)
17
- #define frt_thread_key_create(a, b) frb_thread_key_create(a, b)
18
- #define frt_thread_key_delete(a) frb_thread_key_delete(a)
19
- #define frt_thread_setspecific(a, b) frb_thread_setspecific(a, b)
20
- #define frt_thread_getspecific(a) frb_thread_getspecific(a)
21
- #define frt_thread_once(a, b) frb_thread_once(a, b)
22
-
23
- void frb_thread_once(int *once_control, void (*init_routine)(void));
24
- void frb_thread_key_create(frt_thread_key_t *key, frt_free_ft destroy);
25
- void frb_thread_key_delete(frt_thread_key_t key);
26
- void frb_thread_setspecific(frt_thread_key_t key, const void *pointer);
27
- void *frb_thread_getspecific(frt_thread_key_t key);
28
-
29
- #endif