isomorfeus-ferret 0.16.2 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ca79dc44a2271fca9a4f0fae7586aab07f0d2267728591da5acd93edb08805cf
4
- data.tar.gz: 19aa0e83eac3c572bc50c995dfadc7820f0b45f28cd7968566c394eebe3d7426
3
+ metadata.gz: 74adb05c4210e599a5b55f6fc0b5c41e88fd3f50586eb57a26f49031ce1c9e07
4
+ data.tar.gz: '058640d7f9e2fec23eaa7094cd6c1f7cfeb50e42068567e3da2e06b795060434'
5
5
  SHA512:
6
- metadata.gz: 45b3051498353bf3d957a461d1fab80ceefda0dbc2ae393fc686fd3f712f619b272d3d030b2737e372ba0e52a8cc5ce0dcf19627e6cb2bf738e9d54b6a7f8dd4
7
- data.tar.gz: c9421bc05bc8f5a84f48db79b61fc6f3b41edfba3e69613a303f2057a418752f72b71775d452fea744dbb5c220ce95899c5802039ce0c23cd74b1adfb853afe3
6
+ metadata.gz: f6034d8bfc52b1413b8c60c24b9f6d3aa9dfa93c2048a2e0a9d17ea12c49907364ad1d2c1f93d19ab105e7981080f21ea96520d779caba1c9d7f142ef4ce79e8
7
+ data.tar.gz: 278e40b54014a297ad0e4c4a87a86168bf4bb6a5ddb561197c7b5ae170a0175d8414d890211ad910e06f953dd674c33d46aca3f11605f8aaa76231b01034cdfa
@@ -29,7 +29,6 @@ static VALUE sym_merge_factor;
29
29
  static VALUE sym_max_buffered_docs;
30
30
  static VALUE sym_max_merge_docs;
31
31
  static VALUE sym_max_field_length;
32
- static VALUE sym_use_compound_file;
33
32
  static VALUE sym_field_infos;
34
33
 
35
34
  static ID fsym_content;
@@ -1041,9 +1040,6 @@ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
1041
1040
  frb_create_dir(rval);
1042
1041
  store = frt_open_mdbx_store(rs2s(rval));
1043
1042
  }
1044
- /* use_compound_file defaults to true */
1045
- config.use_compound_file =
1046
- (rb_hash_aref(roptions, sym_use_compound_file) == Qfalse) ? false : true;
1047
1043
 
1048
1044
  if ((rval = rb_hash_aref(roptions, sym_analyzer)) != Qnil) {
1049
1045
  analyzer = frb_get_cwrapped_analyzer(rval);
@@ -1730,33 +1726,6 @@ frb_iw_set_max_field_length(VALUE self, VALUE rval)
1730
1726
  return rval;
1731
1727
  }
1732
1728
 
1733
- /*
1734
- * call-seq:
1735
- * iw.use_compound_file -> number
1736
- *
1737
- * Return the current value of use_compound_file
1738
- */
1739
- static VALUE
1740
- frb_iw_get_use_compound_file(VALUE self)
1741
- {
1742
- FrtIndexWriter *iw = (FrtIndexWriter *)DATA_PTR(self);
1743
- return iw->config.use_compound_file ? Qtrue : Qfalse;
1744
- }
1745
-
1746
- /*
1747
- * call-seq:
1748
- * iw.use_compound_file = use_compound_file -> use_compound_file
1749
- *
1750
- * Set the use_compound_file parameter
1751
- */
1752
- static VALUE
1753
- frb_iw_set_use_compound_file(VALUE self, VALUE rval)
1754
- {
1755
- FrtIndexWriter *iw = (FrtIndexWriter *)DATA_PTR(self);
1756
- iw->config.use_compound_file = RTEST(rval);
1757
- return rval;
1758
- }
1759
-
1760
1729
  /****************************************************************************
1761
1730
  *
1762
1731
  * IndexReader Methods
@@ -2789,11 +2758,6 @@ static void Init_TermVector(void) {
2789
2758
  * you want to be more thorough and you are indexing
2790
2759
  * documents from your file-system you may set this
2791
2760
  * parameter to Ferret::FIX_INT_MAX.
2792
- * use_compound_file:: Default: true. Uses a compound file to store the
2793
- * index. This prevents an error being raised for
2794
- * having too many files open at the same time. The
2795
- * default is true but performance is better if this is
2796
- * set to false.
2797
2761
  *
2798
2762
  *
2799
2763
  * === Deleting Documents
@@ -2825,7 +2789,6 @@ void Init_IndexWriter(void) {
2825
2789
  sym_max_buffered_docs = ID2SYM(rb_intern("max_buffered_docs"));
2826
2790
  sym_max_merge_docs = ID2SYM(rb_intern("max_merge_docs"));
2827
2791
  sym_max_field_length = ID2SYM(rb_intern("max_field_length"));
2828
- sym_use_compound_file = ID2SYM(rb_intern("use_compound_file"));
2829
2792
 
2830
2793
  cIndexWriter = rb_define_class_under(mIndex, "IndexWriter", rb_cObject);
2831
2794
  rb_define_alloc_func(cIndexWriter, frb_iw_alloc);
@@ -2842,7 +2805,6 @@ void Init_IndexWriter(void) {
2842
2805
  rb_define_const(cIndexWriter, "DEFAULT_MAX_BUFFERED_DOCS", INT2FIX(frt_default_config.max_buffered_docs));
2843
2806
  rb_define_const(cIndexWriter, "DEFAULT_MAX_MERGE_DOCS", INT2FIX(frt_default_config.max_merge_docs));
2844
2807
  rb_define_const(cIndexWriter, "DEFAULT_MAX_FIELD_LENGTH", INT2FIX(frt_default_config.max_field_length));
2845
- rb_define_const(cIndexWriter, "DEFAULT_USE_COMPOUND_FILE", frt_default_config.use_compound_file ? Qtrue : Qfalse);
2846
2808
 
2847
2809
  rb_define_method(cIndexWriter, "initialize", frb_iw_init, -1);
2848
2810
  rb_define_method(cIndexWriter, "doc_count", frb_iw_get_doc_count, 0);
@@ -2881,9 +2843,6 @@ void Init_IndexWriter(void) {
2881
2843
 
2882
2844
  rb_define_method(cIndexWriter, "max_field_length", frb_iw_get_max_field_length, 0);
2883
2845
  rb_define_method(cIndexWriter, "max_field_length=", frb_iw_set_max_field_length, 1);
2884
-
2885
- rb_define_method(cIndexWriter, "use_compound_file", frb_iw_get_use_compound_file, 0);
2886
- rb_define_method(cIndexWriter, "use_compound_file=", frb_iw_set_use_compound_file, 1);
2887
2846
  }
2888
2847
 
2889
2848
  /*
@@ -99,7 +99,6 @@ void frt_ensure_writer_open(FrtIndex *self)
99
99
  /* make sure the analzyer isn't deleted by the FrtIndexWriter */
100
100
  FRT_REF(self->analyzer);
101
101
  self->iw = frt_iw_open(NULL, self->store, self->analyzer, false);
102
- self->iw->config.use_compound_file = self->config.use_compound_file;
103
102
  }
104
103
  }
105
104
 
@@ -37,8 +37,7 @@ const FrtConfig frt_default_config = {
37
37
  10, /* default merge factor */
38
38
  10000, /* max_buffered_docs */
39
39
  INT_MAX, /* max_merge_docs */
40
- 10000, /* maximum field length (number of terms) */
41
- true /* use compound file by default */
40
+ 10000 /* maximum field length (number of terms) */
42
41
  };
43
42
 
44
43
  static void ste_reset(FrtTermEnum *te);
@@ -56,11 +55,6 @@ static const char *INDEX_EXTENSIONS[] = {
56
55
  "frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen", "cfs"
57
56
  };
58
57
 
59
- /* *** Must be three characters *** */
60
- static const char *COMPOUND_EXTENSIONS[] = {
61
- "frq", "prx", "fdx", "fdt", "tfx", "tix", "tis"
62
- };
63
-
64
58
  static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
65
59
 
66
60
  static char *u64_to_str36(char *buf, int buf_size, frt_u64 u)
@@ -512,7 +506,6 @@ FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store)
512
506
  si->norm_gens = NULL;
513
507
  si->norm_gens_size = 0;
514
508
  si->ref_cnt = 1;
515
- si->use_compound_file = false;
516
509
  return si;
517
510
  }
518
511
 
@@ -534,7 +527,6 @@ static FrtSegmentInfo *si_read(FrtStore *store, FrtInStream *is)
534
527
  si->norm_gens[i] = frt_is_read_vint(is);
535
528
  }
536
529
  }
537
- si->use_compound_file = (bool)frt_is_read_byte(is);
538
530
  FRT_XCATCHALL
539
531
  frt_store_close(si->store);
540
532
  free(si->name);
@@ -555,7 +547,6 @@ static void si_write(FrtSegmentInfo *si, FrtOutStream *os)
555
547
  frt_os_write_vint(os, si->norm_gens[i]);
556
548
  }
557
549
  }
558
- frt_os_write_byte(os, (frt_uchar)si->use_compound_file);
559
550
  }
560
551
 
561
552
  void frt_si_close(FrtSegmentInfo *si) {
@@ -572,17 +563,6 @@ bool frt_si_has_deletions(FrtSegmentInfo *si)
572
563
  return si->del_gen >= 0;
573
564
  }
574
565
 
575
- bool frt_si_has_separate_norms(FrtSegmentInfo *si)
576
- {
577
- if (si->use_compound_file && si->norm_gens) {
578
- int i;
579
- for (i = si->norm_gens_size - 1; i >= 0; i--) {
580
- if (si->norm_gens[i] > 0) return true;
581
- }
582
- }
583
- return false;
584
- }
585
-
586
566
  void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num)
587
567
  {
588
568
  if (field_num >= si->norm_gens_size) {
@@ -603,7 +583,7 @@ static char *si_norm_file_name(FrtSegmentInfo *si, char *buf, int field_num)
603
583
  || 0 > (norm_gen = si->norm_gens[field_num])) {
604
584
  return NULL;
605
585
  } else {
606
- const char *ext = (si->use_compound_file && norm_gen > 0) ? "s" : "f";
586
+ const char *ext = "f";
607
587
  return fn_for_gen_field(buf, si->name, ext, norm_gen, field_num);
608
588
  }
609
589
  }
@@ -627,17 +607,9 @@ static void si_delete_files(FrtSegmentInfo *si, FrtFieldInfos *fis, FrtDeleter *
627
607
  file_name[seg_len] = '.';
628
608
  ext = file_name + seg_len + 1;
629
609
 
630
- if (si->use_compound_file) {
631
- memcpy(ext, "cfs", 4);
610
+ for (i = FRT_NELEMS(INDEX_EXTENSIONS) - 1; i >= 0; i--) {
611
+ memcpy(ext, INDEX_EXTENSIONS[i], 4);
632
612
  frt_deleter_queue_file(dlr, file_name);
633
- if (0 <= si->del_gen) {
634
- frt_deleter_queue_file(dlr, frt_fn_for_generation(file_name, si->name, "del", si->del_gen));
635
- }
636
- } else {
637
- for (i = FRT_NELEMS(INDEX_EXTENSIONS) - 1; i >= 0; i--) {
638
- memcpy(ext, INDEX_EXTENSIONS[i], 4);
639
- frt_deleter_queue_file(dlr, file_name);
640
- }
641
613
  }
642
614
  }
643
615
 
@@ -3525,30 +3497,6 @@ bool frt_file_name_filter_is_index_file(const char *file_name, bool include_lock
3525
3497
  return false;
3526
3498
  }
3527
3499
 
3528
- /*
3529
- * Returns true if this is a file that would be contained in a CFS file. This
3530
- * function should only be called on files that pass the above "accept" (ie,
3531
- * are already known to be a Lucene index file).
3532
- */
3533
- static bool file_name_filter_is_cfs_file(const char *file_name) {
3534
- char *p = strrchr(file_name, '.');
3535
- if (NULL != p) {
3536
- char *extension = p + 1;
3537
- if (NULL != frt_h_get(fn_extensions, extension)
3538
- && 0 != strcmp(extension, "del")
3539
- && 0 != strcmp(extension, "gen")
3540
- && 0 != strcmp(extension, "cfs")) {
3541
- return true;
3542
- }
3543
- else if ('f' == *extension
3544
- && '0' <= *(extension + 1)
3545
- && '9' >= *(extension + 1)) {
3546
- return true;
3547
- }
3548
- }
3549
- return false;
3550
- }
3551
-
3552
3500
  /****************************************************************************
3553
3501
  *
3554
3502
  * Deleter
@@ -3639,11 +3587,7 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
3639
3587
  } else {
3640
3588
  char tmp_fn[FRT_SEGMENT_NAME_MAX_LENGTH];
3641
3589
  /* OK, segment is referenced, but file may still be orphan'd: */
3642
- if (file_name_filter_is_cfs_file(file_name)
3643
- && si->use_compound_file) {
3644
- /* This file is stored in a CFS file for this segment: */
3645
- do_delete = true;
3646
- } else if (0 == strcmp("del", extension)) {
3590
+ if (0 == strcmp("del", extension)) {
3647
3591
  /* This is a _segmentName_N.del file: */
3648
3592
  if (!frt_fn_for_generation(tmp_fn, segment_name, "del", si->del_gen)
3649
3593
  || 0 != strcmp(file_name, tmp_fn)) {
@@ -3661,9 +3605,6 @@ static void frt_deleter_find_deletable_files_i(const char *file_name, void *arg)
3661
3605
  /* This is an orphan'd norms file: */
3662
3606
  do_delete = true;
3663
3607
  }
3664
- } else if (0 == strcmp("cfs", extension) && !si->use_compound_file) {
3665
- /* This is a partially written _segmentName.cfs: */
3666
- do_delete = true;
3667
3608
  }
3668
3609
  }
3669
3610
 
@@ -4377,8 +4318,7 @@ static void sr_open_norms(FrtIndexReader *ir, FrtStore *cfs_store)
4377
4318
  char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
4378
4319
 
4379
4320
  for (i = si->norm_gens_size - 1; i >= 0; i--) {
4380
- FrtStore *store = (si->use_compound_file && si->norm_gens[i] == 0) ?
4381
- cfs_store : ir->store;
4321
+ FrtStore *store = ir->store;
4382
4322
  if (si_norm_file_name(si, file_name, i)) {
4383
4323
  FrtInStream *is = store->open_input(store, file_name);
4384
4324
  FRT_DEREF(is);
@@ -4425,12 +4365,6 @@ static FrtIndexReader *sr_setup_i(FrtSegmentReader *sr)
4425
4365
  sr->cfs_store = NULL;
4426
4366
 
4427
4367
  FRT_TRY
4428
- if (sr->si->use_compound_file) {
4429
- sprintf(file_name, "%s.cfs", sr_segment);
4430
- sr->cfs_store = frt_open_cmpd_store(store, file_name);
4431
- store = sr->cfs_store;
4432
- }
4433
-
4434
4368
  sr->fr = frt_fr_open(store, sr_segment, ir->fis);
4435
4369
  sr->sfi = frt_sfi_open(store, sr_segment);
4436
4370
  sr->tir = frt_tir_open(store, sr->sfi, sr_segment);
@@ -5509,10 +5443,6 @@ static SegmentMergeInfo *smi_new(int base, FrtStore *store, FrtSegmentInfo *si)
5509
5443
  smi->si = si;
5510
5444
  smi->orig_store = smi->store = store;
5511
5445
  FRT_REF(smi->orig_store);
5512
- sprintf(file_name, "%s.cfs", segment);
5513
- if (store->exists(store, file_name)) {
5514
- smi->store = frt_open_cmpd_store(store, file_name);
5515
- }
5516
5446
 
5517
5447
  sprintf(file_name, "%s.fdx", segment);
5518
5448
  smi->doc_cnt = smi->max_doc
@@ -5874,8 +5804,7 @@ static void sm_merge_norms(SegmentMerger *sm)
5874
5804
  if (si_norm_file_name(si, file_name, i)) {
5875
5805
  const int max_doc = smi->max_doc;
5876
5806
  FrtBitVector *deleted_docs = smi->deleted_docs;
5877
- store = (si->use_compound_file && si->norm_gens[i])
5878
- ? smi->orig_store : smi->store;
5807
+ store = smi->store;
5879
5808
  is = store->open_input(store, file_name);
5880
5809
  if (deleted_docs) {
5881
5810
  for (k = 0; k < max_doc; k++) {
@@ -5945,42 +5874,6 @@ int frt_iw_doc_count(FrtIndexWriter *iw)
5945
5874
  return doc_cnt;
5946
5875
  }
5947
5876
 
5948
- static void iw_create_compound_file(FrtStore *store, FrtFieldInfos *fis, FrtSegmentInfo *si, char *cfs_file_name, FrtDeleter *dlr) {
5949
- int i;
5950
- FrtCompoundWriter *cw;
5951
- char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
5952
- char *ext;
5953
- int seg_len = strlen(si->name);
5954
-
5955
- memcpy(file_name, si->name, seg_len);
5956
- file_name[seg_len] = '.';
5957
- ext = file_name + seg_len + 1;
5958
-
5959
- cw = frt_open_cw(store, cfs_file_name);
5960
- for (i = 0; i < FRT_NELEMS(COMPOUND_EXTENSIONS); i++) {
5961
- memcpy(ext, COMPOUND_EXTENSIONS[i], 4);
5962
- frt_cw_add_file(cw, file_name);
5963
- }
5964
-
5965
- /* Field norm file_names */
5966
- for (i = fis->size - 1; i >= 0; i--) {
5967
- if (bits_has_norms(fis->fields[i]->bits) && si_norm_file_name(si, file_name, i)) {
5968
- frt_cw_add_file(cw, file_name);
5969
- }
5970
- }
5971
-
5972
- /* Perform the merge */
5973
- frt_cw_close(cw, dlr);
5974
- }
5975
-
5976
- static void iw_commit_compound_file(FrtIndexWriter *iw, FrtSegmentInfo *si)
5977
- {
5978
- char cfs_name[FRT_SEGMENT_NAME_MAX_LENGTH];
5979
- sprintf(cfs_name, "%s.cfs", si->name);
5980
-
5981
- iw_create_compound_file(iw->store, iw->fis, si, cfs_name, iw->deleter);
5982
- }
5983
-
5984
5877
  static void iw_merge_segments(FrtIndexWriter *iw, const int min_seg, const int max_seg) {
5985
5878
  int i;
5986
5879
  FrtSegmentInfos *sis = iw->sis;
@@ -5999,11 +5892,6 @@ static void iw_merge_segments(FrtIndexWriter *iw, const int min_seg, const int m
5999
5892
 
6000
5893
  frt_sis_del_from_to(sis, min_seg, max_seg);
6001
5894
 
6002
- if (iw->config.use_compound_file) {
6003
- iw_commit_compound_file(iw, si);
6004
- si->use_compound_file = true;
6005
- }
6006
-
6007
5895
  frt_sis_write(sis, iw->store, iw->deleter);
6008
5896
  deleter_commit_pending_deletions(iw->deleter);
6009
5897
 
@@ -6055,10 +5943,6 @@ static void iw_flush_ram_segment(FrtIndexWriter *iw) {
6055
5943
  si->doc_cnt = iw->dw->doc_num;
6056
5944
  dw_flush(iw->dw);
6057
5945
  pthread_mutex_lock(&iw->store->mutex);
6058
- if (iw->config.use_compound_file) {
6059
- iw_commit_compound_file(iw, si);
6060
- si->use_compound_file = true;
6061
- }
6062
5946
  /* commit the segments file and the fields file */
6063
5947
  frt_sis_write(iw->sis, iw->store, iw->deleter);
6064
5948
  deleter_commit_pending_deletions(iw->deleter);
@@ -6174,10 +6058,7 @@ static void iw_optimize_i(FrtIndexWriter *iw)
6174
6058
  while (iw->sis->size > 1
6175
6059
  || (iw->sis->size == 1
6176
6060
  && (frt_si_has_deletions(iw->sis->segs[0])
6177
- || (iw->sis->segs[0]->store != iw->store)
6178
- || (iw->config.use_compound_file
6179
- && (!iw->sis->segs[0]->use_compound_file
6180
- || frt_si_has_separate_norms(iw->sis->segs[0])))))) {
6061
+ || (iw->sis->segs[0]->store != iw->store)))) {
6181
6062
  min_segment = iw->sis->size - iw->config.merge_factor;
6182
6063
  iw_merge_segments_from(iw, min_segment < 0 ? 0 : min_segment);
6183
6064
  }
@@ -6433,9 +6314,7 @@ static void iw_cp_norms(FrtIndexWriter *iw, FrtSegmentReader *sr,
6433
6314
  for (i = 0; i < field_cnt; i++) {
6434
6315
  if (bits_has_norms(fis->fields[i]->bits)
6435
6316
  && si_norm_file_name(sr->si, file_name_in, i)) {
6436
- FrtStore *store = (sr->si->use_compound_file
6437
- && sr->si->norm_gens[i] == 0) ? sr->cfs_store
6438
- : IR(sr)->store;
6317
+ FrtStore *store = IR(sr)->store;
6439
6318
  int field_num = map ? map[i] : i;
6440
6319
 
6441
6320
  norms_in = store->open_input(store, file_name_in);
@@ -32,7 +32,6 @@ typedef struct FrtConfig {
32
32
  int max_buffered_docs;
33
33
  int max_merge_docs;
34
34
  int max_field_length;
35
- bool use_compound_file;
36
35
  } FrtConfig;
37
36
 
38
37
  extern const FrtConfig frt_default_config;
@@ -151,13 +150,11 @@ typedef struct FrtSegmentInfo {
151
150
  int del_gen;
152
151
  int *norm_gens;
153
152
  int norm_gens_size;
154
- bool use_compound_file;
155
153
  } FrtSegmentInfo;
156
154
 
157
155
  extern FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store);
158
156
  extern void frt_si_close(FrtSegmentInfo *si);
159
157
  extern bool frt_si_has_deletions(FrtSegmentInfo *si);
160
- extern bool frt_si_has_separate_norms(FrtSegmentInfo *si);
161
158
  extern void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num);
162
159
 
163
160
  /****************************************************************************
@@ -13,7 +13,7 @@ void *frt_emalloc(size_t size)
13
13
  void *p = malloc(size);
14
14
 
15
15
  if (p == NULL) {
16
- FRT_RAISE(FRT_MEM_ERROR, "failed to allocate %d bytes", (int)size);
16
+ FRT_RAISE(FRT_MEM_ERROR, "failed to allocate %llu bytes", size);
17
17
  }
18
18
 
19
19
  return p;
@@ -25,7 +25,7 @@ void *frt_ecalloc(size_t size)
25
25
  void *p = calloc(1, size);
26
26
 
27
27
  if (p == NULL) {
28
- FRT_RAISE(FRT_MEM_ERROR, "failed to allocate %d bytes", (int)size);
28
+ FRT_RAISE(FRT_MEM_ERROR, "failed to allocate %llu bytes", size);
29
29
  }
30
30
 
31
31
  return p;
@@ -37,7 +37,7 @@ void *frt_erealloc(void *ptr, size_t size)
37
37
  void *p = realloc(ptr, size);
38
38
 
39
39
  if (p == NULL) {
40
- FRT_RAISE(FRT_MEM_ERROR, "failed to reallocate %d bytes", (int)size);
40
+ FRT_RAISE(FRT_MEM_ERROR, "failed to reallocate %llu bytes", size);
41
41
  }
42
42
 
43
43
  return p;
@@ -419,7 +419,7 @@ static void mdbxso_close_i(FrtOutStream *os) {
419
419
  key.iov_base = rf->name;
420
420
  key.iov_len = strlen(rf->name);
421
421
 
422
- val.iov_base = frt_emalloc(FRT_BUFFER_SIZE * rf->bufcnt);
422
+ val.iov_base = frt_emalloc(rf->len);
423
423
  val.iov_len = rf->len;
424
424
 
425
425
  int offset = 0;
@@ -172,7 +172,6 @@ struct FrtStore {
172
172
  union {
173
173
  char *path; /* for fs_store only */
174
174
  FrtHash *ht; /* for ram_store only */
175
- FrtCompoundStore *cmpd; /* for compound_store only */
176
175
  MDBXInfo *mdbx; /* for mdbx store only */
177
176
  } dir;
178
177
  #if defined POSH_OS_WIN32 || defined POSH_OS_WIN64
@@ -359,17 +358,6 @@ extern FrtStore *frt_open_ram_store(FrtStore *new_store);
359
358
  */
360
359
  extern FrtStore *frt_open_ram_store_and_copy(FrtStore *store, FrtStore *from_store, bool close_store);
361
360
 
362
- /**
363
- * Open a compound store. This is basically store which is stored within a
364
- * single file and can in turn be stored within either a FileSystem or RAM
365
- * store.
366
- *
367
- * @param store the store within which this compound store will be stored
368
- * @param filename the name of the file in which to store the compound store
369
- * @return a newly allocated Compound FrtStore.
370
- */
371
- extern FrtStore *frt_open_cmpd_store(FrtStore *store, const char *filename);
372
-
373
361
  /*
374
362
  * == RamStore functions ==
375
363
  *
@@ -724,38 +724,37 @@ static VALUE frb_ts_1710(VALUE v) { return INT2FIX(execute_test(0)); }
724
724
  static VALUE frb_ts_analysis(VALUE v) { return INT2FIX(execute_test(1)); }
725
725
  static VALUE frb_ts_array(VALUE v) { return INT2FIX(execute_test(2)); }
726
726
  static VALUE frb_ts_bitvector(VALUE v) { return INT2FIX(execute_test(3)); }
727
- static VALUE frb_ts_compound_io(VALUE v) { return INT2FIX(execute_test(4)); }
728
- static VALUE frb_ts_document(VALUE v) { return INT2FIX(execute_test(5)); }
729
- static VALUE frb_ts_except(VALUE v) { return INT2FIX(execute_test(6)); }
730
- static VALUE frb_ts_fields(VALUE v) { return INT2FIX(execute_test(7)); }
731
- static VALUE frb_ts_file_deleter(VALUE v) { return INT2FIX(execute_test(8)); }
732
- static VALUE frb_ts_filter(VALUE v) { return INT2FIX(execute_test(9)); }
733
- static VALUE frb_ts_global(VALUE v) { return INT2FIX(execute_test(10)); }
734
- static VALUE frb_ts_hash(VALUE v) { return INT2FIX(execute_test(11)); }
735
- static VALUE frb_ts_hashset(VALUE v) { return INT2FIX(execute_test(12)); }
736
- static VALUE frb_ts_helper(VALUE v) { return INT2FIX(execute_test(13)); }
737
- static VALUE frb_ts_highlighter(VALUE v) { return INT2FIX(execute_test(14)); }
738
- static VALUE frb_ts_index(VALUE v) { return INT2FIX(execute_test(15)); }
739
- static VALUE frb_ts_lang(VALUE v) { return INT2FIX(execute_test(16)); }
740
- static VALUE frb_ts_mdbx_store(VALUE v) { return INT2FIX(execute_test(17)); }
741
- static VALUE frb_ts_mem_pool(VALUE v) { return INT2FIX(execute_test(18)); }
742
- static VALUE frb_ts_multimapper(VALUE v) { return INT2FIX(execute_test(19)); }
743
- static VALUE frb_ts_priorityqueue(VALUE v){ return INT2FIX(execute_test(20)); }
744
- static VALUE frb_ts_q_const_score(VALUE v){ return INT2FIX(execute_test(21)); }
745
- static VALUE frb_ts_q_filtered(VALUE v) { return INT2FIX(execute_test(22)); }
746
- static VALUE frb_ts_q_fuzzy(VALUE v) { return INT2FIX(execute_test(23)); }
747
- static VALUE frb_ts_q_parser(VALUE v) { return INT2FIX(execute_test(24)); }
748
- static VALUE frb_ts_q_span(VALUE v) { return INT2FIX(execute_test(25)); }
749
- static VALUE frb_ts_ram_store(VALUE v) { return INT2FIX(execute_test(26)); }
750
- static VALUE frb_ts_search(VALUE v) { return INT2FIX(execute_test(27)); }
751
- static VALUE frb_ts_multi_search(VALUE v) { return INT2FIX(execute_test(28)); }
752
- static VALUE frb_ts_segments(VALUE v) { return INT2FIX(execute_test(29)); }
753
- static VALUE frb_ts_similarity(VALUE v) { return INT2FIX(execute_test(30)); }
754
- static VALUE frb_ts_sort(VALUE v) { return INT2FIX(execute_test(31)); }
755
- static VALUE frb_ts_term(VALUE v) { return INT2FIX(execute_test(32)); }
756
- static VALUE frb_ts_term_vectors(VALUE v) { return INT2FIX(execute_test(33)); }
757
- static VALUE frb_ts_test(VALUE v) { return INT2FIX(execute_test(34)); }
758
- static VALUE frb_ts_threading(VALUE v) { return INT2FIX(execute_test(35)); }
727
+ static VALUE frb_ts_document(VALUE v) { return INT2FIX(execute_test(4)); }
728
+ static VALUE frb_ts_except(VALUE v) { return INT2FIX(execute_test(5)); }
729
+ static VALUE frb_ts_fields(VALUE v) { return INT2FIX(execute_test(6)); }
730
+ static VALUE frb_ts_file_deleter(VALUE v) { return INT2FIX(execute_test(7)); }
731
+ static VALUE frb_ts_filter(VALUE v) { return INT2FIX(execute_test(8)); }
732
+ static VALUE frb_ts_global(VALUE v) { return INT2FIX(execute_test(9)); }
733
+ static VALUE frb_ts_hash(VALUE v) { return INT2FIX(execute_test(10)); }
734
+ static VALUE frb_ts_hashset(VALUE v) { return INT2FIX(execute_test(11)); }
735
+ static VALUE frb_ts_helper(VALUE v) { return INT2FIX(execute_test(12)); }
736
+ static VALUE frb_ts_highlighter(VALUE v) { return INT2FIX(execute_test(13)); }
737
+ static VALUE frb_ts_index(VALUE v) { return INT2FIX(execute_test(14)); }
738
+ static VALUE frb_ts_lang(VALUE v) { return INT2FIX(execute_test(15)); }
739
+ static VALUE frb_ts_mdbx_store(VALUE v) { return INT2FIX(execute_test(16)); }
740
+ static VALUE frb_ts_mem_pool(VALUE v) { return INT2FIX(execute_test(17)); }
741
+ static VALUE frb_ts_multimapper(VALUE v) { return INT2FIX(execute_test(18)); }
742
+ static VALUE frb_ts_priorityqueue(VALUE v){ return INT2FIX(execute_test(19)); }
743
+ static VALUE frb_ts_q_const_score(VALUE v){ return INT2FIX(execute_test(20)); }
744
+ static VALUE frb_ts_q_filtered(VALUE v) { return INT2FIX(execute_test(21)); }
745
+ static VALUE frb_ts_q_fuzzy(VALUE v) { return INT2FIX(execute_test(22)); }
746
+ static VALUE frb_ts_q_parser(VALUE v) { return INT2FIX(execute_test(23)); }
747
+ static VALUE frb_ts_q_span(VALUE v) { return INT2FIX(execute_test(24)); }
748
+ static VALUE frb_ts_ram_store(VALUE v) { return INT2FIX(execute_test(25)); }
749
+ static VALUE frb_ts_search(VALUE v) { return INT2FIX(execute_test(26)); }
750
+ static VALUE frb_ts_multi_search(VALUE v) { return INT2FIX(execute_test(27)); }
751
+ static VALUE frb_ts_segments(VALUE v) { return INT2FIX(execute_test(28)); }
752
+ static VALUE frb_ts_similarity(VALUE v) { return INT2FIX(execute_test(29)); }
753
+ static VALUE frb_ts_sort(VALUE v) { return INT2FIX(execute_test(30)); }
754
+ static VALUE frb_ts_term(VALUE v) { return INT2FIX(execute_test(31)); }
755
+ static VALUE frb_ts_term_vectors(VALUE v) { return INT2FIX(execute_test(32)); }
756
+ static VALUE frb_ts_test(VALUE v) { return INT2FIX(execute_test(33)); }
757
+ static VALUE frb_ts_threading(VALUE v) { return INT2FIX(execute_test(34)); }
759
758
 
760
759
  static VALUE frb_ts_posh(VALUE v) {
761
760
  const char *posh = POSH_GetArchString();
@@ -802,7 +801,6 @@ void Init_Test(void) {
802
801
  rb_define_singleton_method(mTest, "analysis", frb_ts_analysis, 0);
803
802
  rb_define_singleton_method(mTest, "array", frb_ts_array, 0);
804
803
  rb_define_singleton_method(mTest, "bitvector", frb_ts_bitvector, 0);
805
- rb_define_singleton_method(mTest, "compound_io", frb_ts_compound_io, 0);
806
804
  rb_define_singleton_method(mTest, "document", frb_ts_document, 0);
807
805
  rb_define_singleton_method(mTest, "except", frb_ts_except, 0);
808
806
  rb_define_singleton_method(mTest, "fields", frb_ts_fields, 0);
@@ -40,11 +40,11 @@ static void test_problem_text(TestCase *tc, void *data)
40
40
 
41
41
  frt_iw_add_doc(iw, problem_text);
42
42
  Aiequal(1, frt_iw_doc_count(iw));
43
- Assert(!store->exists(store, "_0.cfs"), "data shouldn't have been written yet");
43
+ Assert(!store->exists(store, "_0.fdx"), "data shouldn't have been written yet");
44
44
  frt_iw_commit(iw);
45
- Assert(store->exists(store, "_0.cfs"), "data should now be written");
45
+ Assert(store->exists(store, "_0.fdx"), "data should now be written");
46
46
  frt_iw_close(iw);
47
- Assert(store->exists(store, "_0.cfs"), "data should still be there");
47
+ Assert(store->exists(store, "_0.fdx"), "data should still be there");
48
48
  }
49
49
 
50
50
  TestSuite *ts_1710(TestSuite *suite)
@@ -91,22 +91,22 @@ static void test_delete_leftover_files(TestCase *tc, void *data)
91
91
  /* Create a bogus separate norms file for a
92
92
  * segment/field that actually has a separate norms file
93
93
  * already: */
94
- copy_file(store, "_2_1.s0", "_2_2.s0");
94
+ //copy_file(store, "_2_1.s0", "_2_2.s0");
95
95
 
96
96
  /* Create a bogus separate norms file for a
97
97
  * segment/field that actually has a separate norms file
98
98
  * already, using the "not compound file" extension: */
99
- copy_file(store, "_2_1.s0", "_2_2.f0");
99
+ //copy_file(store, "_2_1.s0", "_2_2.f0");
100
100
 
101
101
  /* Create a bogus separate norms file for a
102
102
  * segment/field that does not have a separate norms
103
103
  * file already: */
104
- copy_file(store, "_2_1.s0", "_1_1.s0");
104
+ //copy_file(store, "_2_1.s0", "_1_1.s0");
105
105
 
106
106
  /* Create a bogus separate norms file for a
107
107
  * segment/field that does not have a separate norms
108
108
  * file already using the "not compound file" extension: */
109
- copy_file(store, "_2_1.s0", "_1_1.f0");
109
+ //copy_file(store, "_2_1.s0", "_1_1.f0");
110
110
 
111
111
  /* Create a bogus separate del file for a
112
112
  * segment that already has a separate del file: */
@@ -120,21 +120,6 @@ static void test_delete_leftover_files(TestCase *tc, void *data)
120
120
  * non-existent segment: */
121
121
  copy_file(store, "_0_0.del", "_188_1.del");
122
122
 
123
- /* Create a bogus segment file: */
124
- copy_file(store, "_0.cfs", "_188.cfs");
125
-
126
- /* Create a bogus frq file when the CFS already exists: */
127
- copy_file(store, "_0.cfs", "_0.frq");
128
-
129
- /* Create a bogus frq file when the CFS already exists: */
130
- copy_file(store, "_0.cfs", "_0.frq");
131
- copy_file(store, "_0.cfs", "_0.prx");
132
- copy_file(store, "_0.cfs", "_0.fdx");
133
- copy_file(store, "_0.cfs", "_0.fdt");
134
- copy_file(store, "_0.cfs", "_0.tfx");
135
- copy_file(store, "_0.cfs", "_0.tix");
136
- copy_file(store, "_0.cfs", "_0.tis");
137
-
138
123
  /* Create some old segments file: */
139
124
  copy_file(store, "segments_5", "segments");
140
125
  copy_file(store, "segments_5", "segments_2");
@@ -927,24 +927,22 @@ static void test_iw_add_doc(TestCase *tc, void *data)
927
927
 
928
928
  frt_iw_add_doc(iw, docs[0]);
929
929
  Aiequal(1, frt_iw_doc_count(iw));
930
- Assert(!store->exists(store, "_0.cfs"),
931
- "data shouldn't have been written yet");
930
+ // Assert(!store->exists(store, "_0.fdx"), "data shouldn't have been written yet");
932
931
  frt_iw_commit(iw);
933
- Assert(store->exists(store, "_0.cfs"), "data should now be written");
932
+ Assert(store->exists(store, "_0.fdx"), "data should now be written");
934
933
  frt_iw_close(iw);
935
- Assert(store->exists(store, "_0.cfs"), "data should still be there");
934
+ Assert(store->exists(store, "_0.fdx"), "data should still be there");
936
935
 
937
936
  iw = frt_iw_open(NULL, store, frt_whitespace_analyzer_new(false), &frt_default_config);
938
937
  frt_iw_add_doc(iw, docs[1]);
939
938
  Aiequal(2, frt_iw_doc_count(iw));
940
- Assert(!store->exists(store, "_1.cfs"),
941
- "data shouldn't have been written yet");
942
- Assert(store->exists(store, "_0.cfs"), "data should still be there");
939
+ // Assert(!store->exists(store, "_1.fdx"), "data shouldn't have been written yet");
940
+ Assert(store->exists(store, "_0.fdx"), "data should still be there");
943
941
  frt_iw_commit(iw);
944
- Assert(store->exists(store, "_1.cfs"), "data should now be written");
942
+ Assert(store->exists(store, "_1.fdx"), "data should now be written");
945
943
  frt_iw_close(iw);
946
- Assert(store->exists(store, "_1.cfs"), "data should still be there");
947
- Assert(store->exists(store, "_0.cfs"), "data should still be there");
944
+ Assert(store->exists(store, "_1.fdx"), "data should still be there");
945
+ Assert(store->exists(store, "_0.fdx"), "data should still be there");
948
946
 
949
947
  destroy_docs(docs, BOOK_LIST_LENGTH);
950
948
  }
@@ -1141,8 +1139,8 @@ static void test_simulated_corrupt_index2(TestCase *tc, void *data)
1141
1139
  /* segment generation should be > 1 */
1142
1140
  Atrue(gen > 1);
1143
1141
 
1144
- Atrue(store->exists(store, "_0.cfs"));
1145
- store->remove(store, "_0.cfs");
1142
+ Atrue(store->exists(store, "_0.fdx"));
1143
+ store->remove(store, "_0.fdx");
1146
1144
 
1147
1145
  FRT_TRY
1148
1146
  ir = frt_ir_open(NULL, store);
@@ -1177,7 +1175,7 @@ static void test_iw_add_docs(TestCase *tc, void *data)
1177
1175
 
1178
1176
  frt_iw_close(iw);
1179
1177
  destroy_docs(docs, BOOK_LIST_LENGTH);
1180
- if (!Aiequal(3, store->count(store))) {
1178
+ if (!Aiequal(11, store->count(store))) {
1181
1179
  char *buf = frt_store_to_s(store);
1182
1180
  Tmsg("To many files: %s\n", buf);
1183
1181
  free(buf);
@@ -53,16 +53,8 @@ static void test_si(TestCase *tc, void *data)
53
53
  si->store = store;
54
54
  FRT_REF(store);
55
55
  Asi_has_vals(si, "_2", 12, store);
56
- Assert(!frt_si_has_separate_norms(si), "doesn't use compound file/have norms");
57
- si->use_compound_file = true;
58
- Assert(!frt_si_has_separate_norms(si), "doesn't have norms");
59
56
  frt_si_advance_norm_gen(si, 3);
60
- si->use_compound_file = false;
61
- Assert(!frt_si_has_separate_norms(si), "doesn't use compound file");
62
- si->use_compound_file = true;
63
- Assert(!frt_si_has_separate_norms(si), "has norms in compound file");
64
57
  frt_si_advance_norm_gen(si, 3);
65
- Assert(frt_si_has_separate_norms(si), "has seperate norms");
66
58
  frt_si_close(si);
67
59
  frt_store_close(store);
68
60
  }
@@ -7,7 +7,6 @@ TestSuite *ts_1710(TestSuite *suite);
7
7
  TestSuite *ts_analysis(TestSuite *suite);
8
8
  TestSuite *ts_array(TestSuite *suite);
9
9
  TestSuite *ts_bitvector(TestSuite *suite);
10
- TestSuite *ts_compound_io(TestSuite *suite);
11
10
  TestSuite *ts_document(TestSuite *suite);
12
11
  TestSuite *ts_except(TestSuite *suite);
13
12
  TestSuite *ts_fields(TestSuite *suite);
@@ -49,7 +48,6 @@ const struct test_list
49
48
  {ts_analysis},
50
49
  {ts_array},
51
50
  {ts_bitvector},
52
- {ts_compound_io},
53
51
  {ts_document},
54
52
  {ts_except},
55
53
  {ts_fields},
@@ -720,11 +720,11 @@ module Isomorfeus
720
720
 
721
721
  # optimizes the index. This should only be called when the index will no
722
722
  # longer be updated very often, but will be read a lot.
723
- def optimize()
723
+ def optimize
724
724
  @dir.synchronize do
725
- ensure_writer_open()
726
- @writer.optimize()
727
- @writer.close()
725
+ ensure_writer_open
726
+ @writer.optimize
727
+ @writer.close
728
728
  @writer = nil
729
729
  end
730
730
  end
@@ -1,5 +1,5 @@
1
1
  module Isomorfeus
2
2
  module Ferret
3
- VERSION = '0.16.2'
3
+ VERSION = '0.17.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isomorfeus-ferret
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.2
4
+ version: 0.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Biedermann
@@ -193,7 +193,6 @@ files:
193
193
  - ext/isomorfeus_ferret_ext/frt_array.h
194
194
  - ext/isomorfeus_ferret_ext/frt_bitvector.c
195
195
  - ext/isomorfeus_ferret_ext/frt_bitvector.h
196
- - ext/isomorfeus_ferret_ext/frt_compound_io.c
197
196
  - ext/isomorfeus_ferret_ext/frt_config.h
198
197
  - ext/isomorfeus_ferret_ext/frt_document.c
199
198
  - ext/isomorfeus_ferret_ext/frt_document.h
@@ -333,7 +332,6 @@ files:
333
332
  - ext/isomorfeus_ferret_ext/test_analysis.c
334
333
  - ext/isomorfeus_ferret_ext/test_array.c
335
334
  - ext/isomorfeus_ferret_ext/test_bitvector.c
336
- - ext/isomorfeus_ferret_ext/test_compound_io.c
337
335
  - ext/isomorfeus_ferret_ext/test_document.c
338
336
  - ext/isomorfeus_ferret_ext/test_except.c
339
337
  - ext/isomorfeus_ferret_ext/test_fields.c
@@ -1,352 +0,0 @@
1
- #include "frt_index.h"
2
- #include "frt_array.h"
3
-
4
- extern void frt_store_close(FrtStore *store);
5
- extern FrtInStream *frt_is_new();
6
- extern FrtStore *frt_store_new();
7
-
8
- /****************************************************************************
9
- *
10
- * CompoundStore
11
- *
12
- ****************************************************************************/
13
-
14
- typedef struct FileEntry {
15
- frt_off_t offset;
16
- frt_off_t length;
17
- } FileEntry;
18
-
19
- static void cmpd_touch(FrtStore *store, const char *file_name) {
20
- store->dir.cmpd->store->touch(store->dir.cmpd->store, file_name);
21
- }
22
-
23
- static int cmpd_exists(FrtStore *store, const char *file_name) {
24
- if (frt_h_get(store->dir.cmpd->entries, file_name) != NULL) {
25
- return true;
26
- } else {
27
- return false;
28
- }
29
- }
30
-
31
- static int cmpd_remove(FrtStore *store, const char *file_name) {
32
- (void)store;
33
- (void)file_name;
34
- FRT_RAISE(FRT_UNSUPPORTED_ERROR, "%s", FRT_UNSUPPORTED_ERROR_MSG);
35
- return 0;
36
- }
37
-
38
- static void cmpd_rename(FrtStore *store, const char *from, const char *to) {
39
- (void)store;
40
- (void)from;
41
- (void)to;
42
- FRT_RAISE(FRT_UNSUPPORTED_ERROR, "%s", FRT_UNSUPPORTED_ERROR_MSG);
43
- }
44
-
45
- static int cmpd_count(FrtStore *store) {
46
- return store->dir.cmpd->entries->size;
47
- }
48
-
49
- static void cmpd_each(FrtStore *store, void (*func)(const char *fname, void *arg), void *arg) {
50
- FrtHash *ht = store->dir.cmpd->entries;
51
- int i;
52
- for (i = 0; i <= ht->mask; i++) {
53
- char *fn = (char *)ht->table[i].key;
54
- if (fn) {
55
- func(fn, arg);
56
- }
57
- }
58
- }
59
-
60
- static void cmpd_clear(FrtStore *store) {
61
- (void)store;
62
- FRT_RAISE(FRT_UNSUPPORTED_ERROR, "%s", FRT_UNSUPPORTED_ERROR_MSG);
63
- }
64
-
65
- static void cmpd_close_i(FrtStore *store) {
66
- FrtCompoundStore *cmpd = store->dir.cmpd;
67
- if (cmpd->stream == NULL) {
68
- FRT_RAISE(FRT_IO_ERROR, "Tried to close already closed compound store");
69
- }
70
-
71
- frt_h_destroy(cmpd->entries);
72
-
73
- frt_is_close(cmpd->stream);
74
- cmpd->stream = NULL;
75
- frt_store_close(cmpd->store);
76
- free(store->dir.cmpd);
77
- }
78
-
79
- static frt_off_t cmpd_length(FrtStore *store, const char *file_name) {
80
- FileEntry *fe = (FileEntry *)frt_h_get(store->dir.cmpd->entries, file_name);
81
- if (fe != NULL) {
82
- return fe->length;
83
- } else {
84
- return 0;
85
- }
86
- }
87
-
88
- static void cmpdi_seek_i(FrtInStream *is, frt_off_t pos) {
89
- (void)is;
90
- (void)pos;
91
- }
92
-
93
- static void cmpdi_close_i(FrtInStream *is) {
94
- frt_is_close(is->d.cis->sub);
95
- free(is->d.cis);
96
- }
97
-
98
- static frt_off_t cmpdi_length_i(FrtInStream *is) {
99
- return (is->d.cis->length);
100
- }
101
-
102
- /*
103
- * raises: FRT_EOF_ERROR
104
- */
105
- static void cmpdi_read_i(FrtInStream *is, frt_uchar *b, int len) {
106
- FrtCompoundInStream *cis = is->d.cis;
107
- frt_off_t start = frt_is_pos(is);
108
-
109
- if ((start + len) > cis->length) {
110
- FRT_RAISE(FRT_EOF_ERROR, "Tried to read past end of file. File length is "
111
- "<%"FRT_OFF_T_PFX"d> and tried to read to <%"FRT_OFF_T_PFX"d>",
112
- cis->length, start + len);
113
- }
114
-
115
- frt_is_seek(cis->sub, cis->offset + start);
116
- frt_is_read_bytes(cis->sub, b, len);
117
- }
118
-
119
- static const struct FrtInStreamMethods CMPD_IN_STREAM_METHODS = {
120
- cmpdi_read_i,
121
- cmpdi_seek_i,
122
- cmpdi_length_i,
123
- cmpdi_close_i
124
- };
125
-
126
- static FrtInStream *cmpd_create_input(FrtInStream *sub_is, frt_off_t offset, frt_off_t length) {
127
- FrtInStream *is = frt_is_new();
128
- FrtCompoundInStream *cis = FRT_ALLOC(FrtCompoundInStream);
129
-
130
- cis->sub = sub_is;
131
- FRT_REF(sub_is);
132
- cis->offset = offset;
133
- cis->length = length;
134
- is->d.cis = cis;
135
- is->m = &CMPD_IN_STREAM_METHODS;
136
-
137
- return is;
138
- }
139
-
140
- static FrtInStream *cmpd_open_input(FrtStore *store, const char *file_name) {
141
- FileEntry *entry;
142
- FrtCompoundStore *cmpd = store->dir.cmpd;
143
- FrtInStream *is;
144
-
145
- pthread_mutex_lock(&store->mutex);
146
- if (cmpd->stream == NULL) {
147
- pthread_mutex_unlock(&store->mutex);
148
- FRT_RAISE(FRT_IO_ERROR, "Can't open compound file input stream. Parent "
149
- "stream is closed.");
150
- }
151
-
152
- entry = (FileEntry *)frt_h_get(cmpd->entries, file_name);
153
- if (entry == NULL) {
154
- pthread_mutex_unlock(&store->mutex);
155
- FRT_RAISE(FRT_IO_ERROR, "File '%s' does not exist: ", file_name);
156
- }
157
-
158
- is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
159
- pthread_mutex_unlock(&store->mutex);
160
-
161
- return is;
162
- }
163
-
164
- static FrtOutStream *cmpd_new_output(FrtStore *store, const char *file_name) {
165
- (void)store;
166
- (void)file_name;
167
- FRT_RAISE(FRT_UNSUPPORTED_ERROR, "%s", FRT_UNSUPPORTED_ERROR_MSG);
168
- return NULL;
169
- }
170
-
171
- static FrtLock *cmpd_open_lock_i(FrtStore *store, const char *lock_name) {
172
- (void)store;
173
- (void)lock_name;
174
- FRT_RAISE(FRT_UNSUPPORTED_ERROR, "%s", FRT_UNSUPPORTED_ERROR_MSG);
175
- return NULL;
176
- }
177
-
178
- static void cmpd_close_lock_i(FrtLock *lock) {
179
- (void)lock;
180
- FRT_RAISE(FRT_UNSUPPORTED_ERROR, "%s", FRT_UNSUPPORTED_ERROR_MSG);
181
- }
182
-
183
- FrtStore *frt_open_cmpd_store(FrtStore *store, const char *name) {
184
- int count, i;
185
- frt_off_t offset;
186
- char *fname;
187
- FileEntry *volatile entry = NULL;
188
- FrtStore *new_store = NULL;
189
- FrtCompoundStore *volatile cmpd = NULL;
190
- FrtInStream *volatile is = NULL;
191
-
192
- FRT_TRY
193
- cmpd = FRT_ALLOC_AND_ZERO(FrtCompoundStore);
194
-
195
- cmpd->store = store;
196
- FRT_REF(store);
197
- cmpd->name = name;
198
- cmpd->entries = frt_h_new_str(&free, &free);
199
- is = cmpd->stream = store->open_input(store, cmpd->name);
200
-
201
- /* read the directory and init files */
202
- count = frt_is_read_vint(is);
203
- entry = NULL;
204
- for (i = 0; i < count; i++) {
205
- offset = (off_t)frt_is_read_i64(is);
206
- fname = frt_is_read_string(is);
207
-
208
- if (entry != NULL) {
209
- /* set length of the previous entry */
210
- entry->length = offset - entry->offset;
211
- }
212
-
213
- entry = FRT_ALLOC(FileEntry);
214
- entry->offset = offset;
215
- frt_h_set(cmpd->entries, fname, entry);
216
- }
217
- FRT_XCATCHALL
218
- frt_store_close(store);
219
- if (is) frt_is_close(is);
220
- if (cmpd->entries) frt_h_destroy(cmpd->entries);
221
- free(cmpd);
222
- FRT_XENDTRY
223
-
224
- /* set the length of the final entry */
225
- if (entry != NULL) {
226
- entry->length = frt_is_length(is) - entry->offset;
227
- }
228
-
229
- new_store = frt_store_new();
230
- new_store->dir.cmpd = cmpd;
231
- new_store->touch = &cmpd_touch;
232
- new_store->exists = &cmpd_exists;
233
- new_store->remove = &cmpd_remove;
234
- new_store->rename = &cmpd_rename;
235
- new_store->count = &cmpd_count;
236
- new_store->clear = &cmpd_clear;
237
- new_store->length = &cmpd_length;
238
- new_store->each = &cmpd_each;
239
- new_store->close_i = &cmpd_close_i;
240
- new_store->new_output = &cmpd_new_output;
241
- new_store->open_input = &cmpd_open_input;
242
- new_store->open_lock_i = &cmpd_open_lock_i;
243
- new_store->close_lock_i = &cmpd_close_lock_i;
244
-
245
- return new_store;
246
- }
247
-
248
- /****************************************************************************
249
- *
250
- * CompoundWriter
251
- *
252
- ****************************************************************************/
253
-
254
- FrtCompoundWriter *frt_open_cw(FrtStore *store, char *name) {
255
- FrtCompoundWriter *cw = FRT_ALLOC(FrtCompoundWriter);
256
- cw->store = store;
257
- FRT_REF(store);
258
- cw->name = name;
259
- cw->ids = frt_hs_new_str(&free);
260
- cw->file_entries = frt_ary_new_type_capa(FrtCWFileEntry, FRT_CW_INIT_CAPA);
261
- return cw;
262
- }
263
-
264
- void frt_cw_add_file(FrtCompoundWriter *cw, char *id) {
265
- id = frt_estrdup(id);
266
- if (frt_hs_add(cw->ids, id) != FRT_HASH_KEY_DOES_NOT_EXIST) {
267
- FRT_RAISE(FRT_IO_ERROR, "Tried to add file \"%s\" which has already been "
268
- "added to the compound store", id);
269
- }
270
-
271
- frt_ary_grow(cw->file_entries);
272
- frt_ary_last(cw->file_entries).name = id;
273
- }
274
-
275
- static void cw_copy_file(FrtCompoundWriter *cw, FrtCWFileEntry *src, FrtOutStream *os) {
276
- frt_off_t start_ptr = frt_os_pos(os);
277
- frt_off_t end_ptr;
278
- frt_off_t remainder, length, len;
279
- frt_uchar buffer[FRT_BUFFER_SIZE];
280
-
281
- FrtInStream *is = cw->store->open_input(cw->store, src->name);
282
-
283
- remainder = length = frt_is_length(is);
284
-
285
- while (remainder > 0) {
286
- len = FRT_MIN(remainder, FRT_BUFFER_SIZE);
287
- frt_is_read_bytes(is, buffer, len);
288
- frt_os_write_bytes(os, buffer, len);
289
- remainder -= len;
290
- }
291
-
292
- /* Verify that remainder is 0 */
293
- if (remainder != 0) {
294
- FRT_RAISE(FRT_IO_ERROR, "There seems to be an error in the compound file "
295
- "should have read to the end but there are <%"FRT_OFF_T_PFX"d> "
296
- "bytes left", remainder);
297
- }
298
-
299
- /* Verify that the output length diff is equal to original file */
300
- end_ptr = frt_os_pos(os);
301
- len = end_ptr - start_ptr;
302
- if (len != length) {
303
- FRT_RAISE(FRT_IO_ERROR, "Difference in compound file output file offsets "
304
- "<%"FRT_OFF_T_PFX"d> does not match the original file lenght "
305
- "<%"FRT_OFF_T_PFX"d>", len, length);
306
- }
307
-
308
- frt_is_close(is);
309
- }
310
-
311
- void frt_cw_close(FrtCompoundWriter *cw, FrtDeleter *dlr) {
312
- FrtOutStream *os = NULL;
313
- int i;
314
-
315
- if (cw->ids->size <= 0) {
316
- FRT_RAISE(FRT_STATE_ERROR, "Tried to merge compound file with no entries");
317
- }
318
-
319
- os = cw->store->new_output(cw->store, cw->name);
320
-
321
- frt_os_write_vint(os, frt_ary_size(cw->file_entries));
322
-
323
- /* Write the directory with all offsets at 0.
324
- * Remember the positions of directory entries so that we can adjust the
325
- * offsets later */
326
- for (i = 0; i < frt_ary_size(cw->file_entries); i++) {
327
- cw->file_entries[i].dir_offset = frt_os_pos(os);
328
- frt_os_write_u64(os, 0); /* for now */
329
- frt_os_write_string(os, cw->file_entries[i].name);
330
- }
331
-
332
- /* Open the files and copy their data into the stream. Remember the
333
- * locations of each file's data section. */
334
- for (i = 0; i < frt_ary_size(cw->file_entries); i++) {
335
- cw->file_entries[i].data_offset = frt_os_pos(os);
336
- cw_copy_file(cw, &cw->file_entries[i], os);
337
- if (dlr) {
338
- frt_deleter_queue_file(dlr, cw->file_entries[i].name);
339
- }
340
- }
341
-
342
- /* Write the data offsets into the directory of the compound stream */
343
- for (i = 0; i < frt_ary_size(cw->file_entries); i++) {
344
- frt_os_seek(os, cw->file_entries[i].dir_offset);
345
- frt_os_write_u64(os, cw->file_entries[i].data_offset);
346
- }
347
- frt_os_close(os);
348
- frt_hs_destroy(cw->ids);
349
- frt_ary_free(cw->file_entries);
350
- frt_store_close(cw->store);
351
- free(cw);
352
- }
@@ -1,170 +0,0 @@
1
- #include "frt_store.h"
2
- #include "frt_index.h"
3
- #include "testhelper.h"
4
- #include "test.h"
5
-
6
- void test_compound_reader(TestCase *tc, void *data)
7
- {
8
- FrtStore *store = (FrtStore *)data;
9
- char *p;
10
- FrtOutStream *os = store->new_output(store, "cfile");
11
- FrtInStream *is1;
12
- FrtInStream *is2;
13
- FrtStore *c_reader;
14
- frt_os_write_vint(os, 2);
15
- frt_os_write_u64(os, 29);
16
- frt_os_write_string(os, "file1");
17
- frt_os_write_u64(os, 33);
18
- frt_os_write_string(os, "file2");
19
- frt_os_write_u32(os, 20);
20
- frt_os_write_string(os, "this is file 2");
21
- frt_os_close(os);
22
-
23
- c_reader = frt_open_cmpd_store(store, "cfile");
24
- Aiequal(4, c_reader->length(c_reader, "file1"));
25
- Aiequal(15, c_reader->length(c_reader, "file2"));
26
- is1 = c_reader->open_input(c_reader, "file1");
27
- is2 = c_reader->open_input(c_reader, "file2");
28
- Aiequal(20, frt_is_read_u32(is1));
29
- Asequal("this is file 2", p = frt_is_read_string(is2)); free(p);
30
- frt_is_close(is1);
31
- frt_is_close(is2);
32
- frt_store_close(c_reader);
33
- }
34
-
35
- void test_compound_writer(TestCase *tc, void *data)
36
- {
37
- FrtStore *store = (FrtStore *)data;
38
- char *p;
39
- FrtOutStream *os1 = store->new_output(store, "file1");
40
- FrtOutStream *os2 = store->new_output(store, "file2");
41
- FrtCompoundWriter *cw;
42
- FrtInStream *is;
43
-
44
- frt_os_write_u32(os1, 20);
45
- frt_os_write_string(os2, "this is file2");
46
- frt_os_close(os1);
47
- frt_os_close(os2);
48
- cw = frt_open_cw(store, "cfile");
49
- frt_cw_add_file(cw, "file1");
50
- frt_cw_add_file(cw, "file2");
51
- frt_cw_close(cw, NULL);
52
-
53
- is = store->open_input(store, "cfile");
54
- Aiequal(2, frt_is_read_vint(is));
55
- Aiequal(29, frt_is_read_u64(is));
56
- Asequal("file1", p = frt_is_read_string(is)); free(p);
57
- Aiequal(33, frt_is_read_u64(is));
58
- Asequal("file2", p = frt_is_read_string(is)); free(p);
59
- Aiequal(20, frt_is_read_u32(is));
60
- Asequal("this is file2", p = frt_is_read_string(is)); free(p);
61
-
62
- frt_is_close(is);
63
- }
64
-
65
- void test_compound_io(TestCase *tc, void *data)
66
- {
67
- FrtStore *c_reader;
68
- FrtInStream *is1, *is2, *is3;
69
- FrtStore *store = (FrtStore *)data;
70
- FrtCompoundWriter *cw;
71
- char *p;
72
- FrtOutStream *os1 = store->new_output(store, "file1");
73
- FrtOutStream *os2 = store->new_output(store, "file2");
74
- FrtOutStream *os3 = store->new_output(store, "file3");
75
- char long_string[10000];
76
- const char *short_string = "this is a short string";
77
- int slen = (int)strlen(short_string);
78
- int i;
79
-
80
- for (i = 0; i < 20; i++) {
81
- frt_os_write_u32(os1, rand()%10000);
82
- }
83
-
84
- for (i = 0; i < 10000 - slen; i += slen) {
85
- sprintf(long_string + i, "%s", short_string);
86
- }
87
- long_string[i] = 0;
88
- frt_os_write_string(os2, long_string);
89
- frt_os_write_string(os3, short_string);
90
- frt_os_close(os1);
91
- frt_os_close(os2);
92
- frt_os_close(os3);
93
- cw = frt_open_cw(store, (char *)"cfile");
94
- frt_cw_add_file(cw, (char *)"file1");
95
- frt_cw_add_file(cw, (char *)"file2");
96
- frt_cw_add_file(cw, (char *)"file3");
97
- frt_cw_close(cw, NULL);
98
-
99
- c_reader = frt_open_cmpd_store(store, "cfile");
100
- is1 = c_reader->open_input(c_reader, "file1");
101
- for (i = 0; i < 20; i++) {
102
- Assert(frt_is_read_u32(is1) < 10000, "should be a rand between 0 and 10000");
103
- }
104
- frt_is_close(is1);
105
- is2 = c_reader->open_input(c_reader, "file2");
106
- Asequal(long_string, p = frt_is_read_string(is2)); free(p);
107
- frt_is_close(is2);
108
- is3 = c_reader->open_input(c_reader, "file3");
109
- Asequal(short_string, p = frt_is_read_string(is3)); free(p);
110
- frt_is_close(is3);
111
-
112
- frt_store_close(c_reader);
113
- }
114
-
115
- #define MAX_TEST_WORDS 50
116
- #define TEST_FILE_CNT 100
117
-
118
- void test_compound_io_many_files(TestCase *tc, void *data)
119
- {
120
- static const int MAGIC = 250777;
121
-
122
- FrtStore *store = (FrtStore *)data;
123
- char buf[MAX_TEST_WORDS * (TEST_WORD_LIST_MAX_LEN + 1)];
124
- char *str;
125
- int i;
126
- FrtOutStream *os;
127
- FrtInStream *is;
128
- FrtCompoundWriter *cw;
129
- FrtStore *c_reader;
130
-
131
- cw = frt_open_cw(store, (char *)"_.cfs");
132
- for (i = 0; i < TEST_FILE_CNT; i++) {
133
- sprintf(buf, "_%d.txt", i);
134
- frt_cw_add_file(cw, buf);
135
- os = store->new_output(store, buf);
136
- frt_os_write_string(os, make_random_string(buf, MAX_TEST_WORDS));
137
- frt_os_write_vint(os, MAGIC);
138
- frt_os_close(os);
139
- }
140
- frt_cw_close(cw, NULL);
141
-
142
- c_reader = frt_open_cmpd_store(store, "_.cfs");
143
- for (i = 0; i < TEST_FILE_CNT; i++) {
144
- sprintf(buf, "_%d.txt", i);
145
- is = c_reader->open_input(c_reader, buf);
146
- str = frt_is_read_string(is);
147
-
148
- free(str);
149
- Aiequal(MAGIC, frt_is_read_vint(is));
150
- Aiequal(frt_is_length(is), frt_is_pos(is));
151
- frt_is_close(is);
152
- }
153
- frt_store_close(c_reader);
154
- }
155
-
156
- TestSuite *ts_compound_io(TestSuite *suite)
157
- {
158
- FrtStore *store = frt_open_ram_store(NULL);
159
-
160
- suite = ADD_SUITE(suite);
161
-
162
- tst_run_test(suite, test_compound_reader, store);
163
- tst_run_test(suite, test_compound_writer, store);
164
- tst_run_test(suite, test_compound_io, store);
165
- tst_run_test(suite, test_compound_io_many_files, store);
166
-
167
- frt_store_close(store);
168
-
169
- return suite;
170
- }