ferret 0.3.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
data/ext/analysis.h ADDED
@@ -0,0 +1,76 @@
1
+ #ifndef FRT_ANALYSIS_H
2
+ #define FRT_ANALYSIS_H
3
+
4
+ #include <global.h>
5
+ #include <hash.h>
6
+
7
+ /****************************************************************************
8
+ *
9
+ * Token
10
+ *
11
+ ****************************************************************************/
12
+
13
+ typedef struct Token {
14
+ char text[MAX_WORD_SIZE];
15
+ int start;
16
+ int end;
17
+ int pos_inc;
18
+ } Token;
19
+
20
+ Token *tk_create();
21
+ void tk_destroy(void *p);
22
+ Token *tk_set(Token *tk, char *text, int tlen, int start, int end, int pos_inc);
23
+ Token *tk_set_no_len(Token *tk, char *text, int start, int end, int pos_inc);
24
+ int tk_eq(Token *tk1, Token *tk2);
25
+ int tk_cmp(Token *tk1, Token *tk2);
26
+
27
+ /****************************************************************************
28
+ *
29
+ * TokenStream
30
+ *
31
+ ****************************************************************************/
32
+
33
+ typedef struct TokenStream TokenStream;
34
+ struct TokenStream {
35
+ void *data;
36
+ char *text;
37
+ int pos;
38
+ Token *token;
39
+ Token *(*next)(TokenStream *ts);
40
+ void (*reset)(TokenStream *ts, char *text);
41
+ void (*destroy)(void *p);
42
+ TokenStream *sub_ts; // used by filters
43
+ };
44
+
45
+ #define ts_next(mts) mts->next(mts)
46
+ #define ts_destroy(mts) mts->destroy(mts)
47
+
48
+ TokenStream *whitespace_tokenizer_create();
49
+ TokenStream *letter_tokenizer_create();
50
+ TokenStream *standard_tokenizer_create();
51
+ TokenStream *lowercase_filter_create(TokenStream *ts);
52
+ TokenStream *stop_filter_create_with_words(TokenStream *ts, char **words, int len);
53
+ TokenStream *stop_filter_create(TokenStream *ts);
54
+
55
+ /****************************************************************************
56
+ *
57
+ * Analyzer
58
+ *
59
+ ****************************************************************************/
60
+
61
+ typedef struct Analyzer {
62
+ void *data;
63
+ TokenStream *current_ts;
64
+ TokenStream *(*get_ts)(struct Analyzer *a, char *field, char *text);
65
+ void (*destroy)(void *p);
66
+ } Analyzer;
67
+
68
+ #define a_destroy(ma) ma->destroy(ma)
69
+ #define a_get_ts(ma, field, text) ma->get_ts(ma, field, text)
70
+
71
+ Analyzer *whitespace_analyzer_create();
72
+ Analyzer *letter_analyzer_create();
73
+ Analyzer *standard_analyzer_create();
74
+ Analyzer *standard_analyzer_create_with_words(char **words, int len);
75
+
76
+ #endif
data/ext/array.c ADDED
@@ -0,0 +1,83 @@
1
+ #include <global.h>
2
+ #include <array.h>
3
+ #include <string.h>
4
+
5
+ Array *ary_create(int allocate, void (*free_elem)(void *p))
6
+ {
7
+ Array *ary = ALLOC(Array);
8
+ if (allocate == 0) {
9
+ ary->elems = NULL;
10
+ } else {
11
+ ary->elems = ALLOC_N(void *, allocate);
12
+ memset(ary->elems, 0, sizeof(void *) * allocate);
13
+ }
14
+ ary->size = 0;
15
+ ary->allocated = allocate;
16
+ ary->free_elem = free_elem;
17
+
18
+ return ary;
19
+ }
20
+
21
+ void ary_destroy(void *p)
22
+ {
23
+ Array *ary = (Array *)p;
24
+ int i;
25
+ for (i = 0; i < ary->size; i++) {
26
+ if (ary->free_elem != NULL && ary->elems[i] != NULL)
27
+ ary->free_elem(ary->elems[i]);
28
+ }
29
+ free(ary->elems);
30
+ free(ary);
31
+ }
32
+
33
+ void ary_set(Array *ary, int index, void *value)
34
+ {
35
+ if (index >= ary->allocated) {
36
+ ary->allocated = (index + 1)*2;
37
+ REALLOC_N(ary->elems, void *, (ary->allocated));
38
+ memset((&ary->elems[ary->size]), 0, sizeof(void *) * (ary->allocated - ary->size));
39
+ }
40
+
41
+ if (index >= ary->size)
42
+ ary->size = index + 1;
43
+
44
+ if (ary->free_elem != NULL && ary->elems[index] != NULL)
45
+ ary->free_elem(ary->elems[index]);
46
+
47
+ ary->elems[index] = value;
48
+ }
49
+
50
+ void ary_append(Array *ary, void *value)
51
+ {
52
+ ary_set(ary, ary->size, value);
53
+ }
54
+
55
+ void *ary_get(Array *ary, int index)
56
+ {
57
+ if (index >= ary->size)
58
+ return NULL;
59
+ return ary->elems[index];
60
+ }
61
+
62
+ void ary_delete(Array *ary, int index)
63
+ {
64
+ if (index >= ary->size)
65
+ return;
66
+ ary->free_elem(ary->elems[index]);
67
+ ary->elems[index] = NULL;
68
+ if (index == ary->size - 1)
69
+ ary->size--;
70
+ }
71
+
72
+ void *ary_remove(Array *ary, int index)
73
+ {
74
+ void *p;
75
+ if (index >= ary->size)
76
+ return NULL;
77
+ p = ary->elems[index];
78
+ ary->elems[index] = NULL;
79
+ ary->size--;
80
+ memmove(&ary->elems[index], &ary->elems[index + 1],
81
+ sizeof(void *) *(ary->size - index));
82
+ return p;
83
+ }
data/ext/array.h ADDED
@@ -0,0 +1,19 @@
1
+ #ifndef FRT_ARRAY_H
2
+ #define FRT_ARRAY_H
3
+
4
+ typedef struct Array {
5
+ void **elems;
6
+ int size;
7
+ int allocated;
8
+ void (*free_elem)(void *p);
9
+ } Array;
10
+
11
+ Array *ary_create(int size, void (*free_elem)(void *p));
12
+ void ary_destroy(void *p);
13
+ void ary_set(Array *ary, int index, void *value);
14
+ void ary_append(Array *ary, void *value);
15
+ void *ary_get(Array *ary, int index);
16
+ void ary_delete(Array *ary, int index);
17
+ void *ary_remove(Array *ary, int index);
18
+
19
+ #endif
data/ext/bitvector.c ADDED
@@ -0,0 +1,164 @@
1
+ #include <bitvector.h>
2
+ #include <string.h>
3
+
4
+ BitVector *bv_create_size(int size)
5
+ {
6
+ BitVector *bv = ALLOC(BitVector);
7
+
8
+ bv->capa = (size >> 3) + 1;
9
+ bv->bits = ALLOC_N(uchar, bv->capa);
10
+ memset(bv->bits, 0, bv->capa);
11
+
12
+ bv->size = 0;
13
+ bv->count = 0;
14
+ bv->curr_bit = -1;
15
+ return bv;
16
+ }
17
+
18
+ BitVector *bv_create()
19
+ {
20
+ return bv_create_size(BV_INIT_CAPA);
21
+ }
22
+
23
+ void bv_destroy(void *p)
24
+ {
25
+ BitVector *bv = (BitVector *)p;
26
+ free(bv->bits);
27
+ free(bv);
28
+ }
29
+
30
+ void bv_set(BitVector *bv, int bit)
31
+ {
32
+ int byte = bit>>3;
33
+ uchar bitmask = 1<<(bit&7);
34
+ if (bv->size <= byte) {
35
+ bv->size = byte + 1;
36
+ if (bv->size >= bv->capa) {
37
+ int capa = bv->capa * 2;
38
+ while (capa < bv->size) capa *= 2;
39
+ REALLOC_N(bv->bits, uchar, capa);
40
+ memset(bv->bits + bv->capa, 0, capa - bv->capa);
41
+ bv->capa = capa;
42
+ }
43
+ }
44
+ uchar *byte_p = &(bv->bits[byte]);
45
+ if ((bitmask & *byte_p) == 0) {
46
+ bv->count++;
47
+ *byte_p |= bitmask;
48
+ }
49
+ }
50
+
51
+ int bv_get(BitVector *bv, int bit)
52
+ {
53
+ int byte = bit>>3;
54
+ if (byte >= bv->size) return 0;
55
+ return (bv->bits[byte]>>(bit&7))&1;
56
+ }
57
+
58
+ void bv_clear(BitVector *bv)
59
+ {
60
+ memset(bv->bits, 0, bv->size);
61
+ bv->count = 0;
62
+ }
63
+
64
+ void bv_unset(BitVector *bv, int bit)
65
+ {
66
+ int byte = bit>>3;
67
+ if (byte >= bv->size) return;
68
+ uchar *byte_p = &(bv->bits[byte]);
69
+ uchar bitmask = 1<<(bit&7);
70
+ if ((bitmask & *byte_p) > 0) {
71
+ bv->count--;
72
+ *byte_p &= ~bitmask;
73
+ }
74
+ }
75
+
76
+ void bv_write(BitVector *bv, Store *store, char *name)
77
+ {
78
+ OutStream *os = store->create_output(store, name);
79
+ os_write_vint(os, bv->size);
80
+ os_write_bytes(os, bv->bits, bv->size);
81
+ os_close(os);
82
+ }
83
+
84
+ const uchar BYTE_COUNTS[] = { // table of bits/char
85
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
86
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
87
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
88
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
89
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
91
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
93
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
94
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
96
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
97
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
99
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
101
+ };
102
+
103
+ int bv_count(BitVector *bv)
104
+ {
105
+ // if the vector has been modified
106
+ int i, c = 0;
107
+ uchar *bytes = bv->bits;
108
+ for (i = 0; i < bv->size; i++)
109
+ c += BYTE_COUNTS[bytes[i]]; // sum bits per char
110
+ bv->count = c;
111
+ return c;
112
+ }
113
+
114
+ BitVector *bv_read(Store *store, char *name)
115
+ {
116
+ BitVector *bv = ALLOC(BitVector);
117
+ InStream *is = store->open_input(store, name);
118
+ bv->capa = bv->size = is_read_vint(is);
119
+ bv->bits = ALLOC_N(uchar, bv->capa);
120
+ is_read_bytes(is, bv->bits, 0, bv->size);
121
+ is_close(is);
122
+ bv_count(bv);
123
+ return bv;
124
+ }
125
+
126
+ void bv_scan_reset(BitVector *bv)
127
+ {
128
+ bv->curr_bit = -1;
129
+ }
130
+
131
+ inline int bv_scan_next_from(BitVector *bv, register const int from)
132
+ {
133
+ register const uchar *const bits = bv->bits;
134
+ register const int size = bv->size;
135
+ register int byte_pos = (from) >> 3;
136
+ register int inc = ((from) & 7);
137
+ register int bit = 1 << inc;
138
+ register int mask = 0xff << inc;
139
+ register int byte;
140
+
141
+ if (byte_pos >= size) return -1;
142
+ if ((bits[byte_pos]&mask) == 0) {
143
+ inc = 0;
144
+ bit = 1;
145
+ do {
146
+ byte_pos++;
147
+ if (byte_pos >= size) return -1;
148
+ } while (bits[byte_pos] == 0);
149
+ }
150
+
151
+ byte = bits[byte_pos];
152
+ while ((byte & bit) == 0) {
153
+ bit <<= 1;
154
+ inc++;
155
+ }
156
+
157
+ return bv->curr_bit = ((byte_pos << 3) + inc);
158
+ }
159
+
160
+ inline int bv_scan_next(BitVector *bv)
161
+ {
162
+ return bv_scan_next_from(bv, bv->curr_bit+1);
163
+ }
164
+
data/ext/bitvector.h ADDED
@@ -0,0 +1,29 @@
1
+ #ifndef FRT_BIT_VECTOR_H
2
+ #define FRT_BIT_VECTOR_H
3
+
4
+ #include <global.h>
5
+ #include <store.h>
6
+
7
+ #define BV_INIT_CAPA 256
8
+ typedef struct BitVector {
9
+ uchar *bits;
10
+ int size;
11
+ int capa;
12
+ int count;
13
+ int curr_bit;
14
+ } BitVector;
15
+
16
+ BitVector *bv_create();;
17
+ BitVector *bv_create_size(int size);
18
+ void bv_destroy(void *bv);
19
+ void bv_set(BitVector *bv, int bit);
20
+ int bv_get(BitVector *bv, int bit);
21
+ void bv_clear(BitVector *bv);
22
+ void bv_unset(BitVector *bv, int bit);
23
+ void bv_write(BitVector *bv, Store *store, char *name);
24
+ BitVector *bv_read(Store *store, char *name);
25
+ void bv_scan_reset(BitVector *bv);
26
+ int bv_scan_next(BitVector *bv);
27
+ int bv_scan_next_from(BitVector *bv, int from);
28
+
29
+ #endif
data/ext/compound_io.c ADDED
@@ -0,0 +1,335 @@
1
+ #include "index.h"
2
+
3
+ /****************************************************************************
4
+ *
5
+ * CompoundStore
6
+ *
7
+ ****************************************************************************/
8
+
9
+ typedef struct FileEntry {
10
+ int offset;
11
+ int length;
12
+ } FileEntry;
13
+
14
+ void cmpd_touch(Store *store, char *filename)
15
+ {
16
+ store->dir.cmpd->store->touch(store->dir.cmpd->store, filename);
17
+ }
18
+
19
+ int cmpd_exists(Store *store, char *filename)
20
+ {
21
+ if (h_get(store->dir.cmpd->entries, filename) != NULL)
22
+ return true;
23
+ else
24
+ return false;
25
+ }
26
+
27
+ int cmpd_remove(Store *store, char *filename)
28
+ {
29
+ eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
30
+ return 0;
31
+ }
32
+
33
+ int cmpd_rename(Store *store, char *from, char *to)
34
+ {
35
+ eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
36
+ return 0;
37
+ }
38
+
39
+ int cmpd_count(Store *store)
40
+ {
41
+ return store->dir.cmpd->entries->used;
42
+ }
43
+
44
+ void cmpd_clear(Store *store)
45
+ {
46
+ eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
47
+ }
48
+
49
+ void cmpd_close(Store *store)
50
+ {
51
+ mutex_lock(&store->mutex);
52
+ CompoundStore *cmpd = store->dir.cmpd;
53
+ if (cmpd->stream == NULL)
54
+ eprintf(IO_ERROR, "Already closed");
55
+
56
+ h_destroy(cmpd->entries);
57
+
58
+ is_close(cmpd->stream);
59
+ cmpd->stream = NULL;
60
+ free(store->dir.cmpd);
61
+ store_destroy(store);
62
+ }
63
+
64
+ int cmpd_length(Store *store, char *filename)
65
+ {
66
+ FileEntry *fe = (FileEntry *)h_get(store->dir.cmpd->entries, filename);
67
+ if (fe != NULL)
68
+ return fe->length;
69
+ else
70
+ return 0;
71
+ }
72
+
73
+ void cmpdi_seek_internal(InStream *is, int pos) {}
74
+ void cmpdi_close_internal(InStream *is)
75
+ {
76
+ //is_close(is->d.cis->sub);
77
+ free(is->d.cis);
78
+ }
79
+
80
+ void cmpdi_clone_internal(InStream *is, InStream *new_is)
81
+ {
82
+ CompoundInStream *cis = ALLOC(CompoundInStream);
83
+ //cis->sub = is_clone(is->d.cis->sub);
84
+ cis->sub = is->d.cis->sub;
85
+ cis->offset = is->d.cis->offset;
86
+ cis->length = is->d.cis->length;
87
+ new_is->d.cis = cis;
88
+ }
89
+
90
+ int cmpdi_length_internal(InStream *is)
91
+ {
92
+ return (is->d.cis->length);
93
+ }
94
+
95
+ void cmpdi_read_internal(InStream *is, uchar *b, int offset, int len)
96
+ {
97
+ CompoundInStream *cis = is->d.cis;
98
+ int start = is_pos(is);
99
+ if ((start + len) > cis->length)
100
+ eprintf(EOF_ERROR, "read past EOF");
101
+ is_seek(cis->sub, cis->offset + start);
102
+ is_read_bytes(cis->sub, b, offset, len);
103
+ }
104
+
105
+ InStream *cmpd_create_input(InStream *sub_is, int offset, int length)
106
+ {
107
+ InStream *is = is_create();
108
+ CompoundInStream *cis = ALLOC(CompoundInStream);
109
+ //cis->sub = is_clone(sub_is);
110
+ cis->sub = sub_is;
111
+ cis->offset = offset;
112
+ cis->length = length;
113
+ is->d.cis = cis;
114
+ is->file = NULL;
115
+
116
+ is->read_internal = &cmpdi_read_internal;
117
+ is->seek_internal = &cmpdi_seek_internal;
118
+ is->close_internal = &cmpdi_close_internal;
119
+ is->clone_internal = &cmpdi_clone_internal;
120
+ is->length_internal = &cmpdi_length_internal;
121
+ return is;
122
+ }
123
+
124
+ InStream *cmpd_open_input(Store *store, const char *filename)
125
+ {
126
+ CompoundStore *cmpd = store->dir.cmpd;
127
+ InStream *is;
128
+
129
+ mutex_lock(&store->mutex);
130
+ if (cmpd->stream == NULL) {
131
+ mutex_unlock(&store->mutex);
132
+ eprintf(IO_ERROR, "Stream closed");
133
+ }
134
+
135
+ FileEntry *entry = (FileEntry *)h_get(cmpd->entries, filename);
136
+ if (entry == NULL) {
137
+ mutex_unlock(&store->mutex);
138
+ eprintf(IO_ERROR, "No sub-file with id <%s> found", filename);
139
+ }
140
+
141
+ is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
142
+ mutex_unlock(&store->mutex);
143
+
144
+ return is;
145
+ }
146
+
147
+ OutStream *cmpd_create_output(Store *store, const char *filename)
148
+ {
149
+ eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
150
+ return NULL;
151
+ }
152
+
153
+ Lock *cmpd_open_lock(Store *store, char *lockname)
154
+ {
155
+ eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
156
+ return NULL;
157
+ }
158
+
159
+ void cmpd_close_lock(Lock *lock)
160
+ {
161
+ eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
162
+ }
163
+
164
+ Store *open_cmpd_store(Store *store, const char *name)
165
+ {
166
+ CompoundStore *cmpd = ALLOC(CompoundStore);
167
+ Store *new_store = store_create();
168
+
169
+ cmpd->store = store;
170
+ cmpd->name = name;
171
+ cmpd->entries = h_new_str(&efree, &efree);
172
+ InStream *is = cmpd->stream = store->open_input(store, cmpd->name);
173
+
174
+ // read the directory and init files
175
+ int count = is_read_vint(is);
176
+ FileEntry *entry = NULL;
177
+ int i, offset;
178
+ char *fname;
179
+ for (i = 0; i < count; i++) {
180
+ offset = is_read_long(is);
181
+ fname = is_read_string(is);
182
+
183
+ if (entry != NULL) {
184
+ // set length of the previous entry
185
+ entry->length = offset - entry->offset;
186
+ }
187
+
188
+ entry = ALLOC(FileEntry);
189
+ entry->offset = offset;
190
+ h_set(cmpd->entries, fname, entry);
191
+ }
192
+
193
+ // set the length of the final entry
194
+ if (entry != NULL)
195
+ entry->length = is_length(is) - entry->offset;
196
+
197
+ new_store->dir.cmpd = cmpd;
198
+ new_store->touch = &cmpd_touch;
199
+ new_store->exists = &cmpd_exists;
200
+ new_store->remove = &cmpd_remove;
201
+ new_store->rename = &cmpd_rename;
202
+ new_store->count = &cmpd_count;
203
+ new_store->close = &cmpd_close;
204
+ new_store->clear = &cmpd_clear;
205
+ new_store->length = &cmpd_length;
206
+ new_store->create_output = &cmpd_create_output;
207
+ new_store->open_input = &cmpd_open_input;
208
+ new_store->open_lock = &cmpd_open_lock;
209
+ new_store->close_lock = &cmpd_close_lock;
210
+ return new_store;
211
+ }
212
+
213
+ /****************************************************************************
214
+ *
215
+ * CompoundWriter
216
+ *
217
+ ****************************************************************************/
218
+
219
+ typedef struct WFileEntry {
220
+ char *name;
221
+ int dir_offset;
222
+ int data_offset;
223
+ } WFileEntry;
224
+
225
+ WFileEntry *wfe_create(char *name)
226
+ {
227
+ WFileEntry *wfe = ALLOC(WFileEntry);
228
+ wfe->name = name;
229
+ return wfe;
230
+ }
231
+
232
+ void wfe_destroy(void *p)
233
+ {
234
+ WFileEntry *wfe = (WFileEntry *)p;
235
+ efree(wfe);
236
+ }
237
+
238
+ CompoundWriter *open_cw(Store *store, char *name)
239
+ {
240
+ CompoundWriter *cw = ALLOC(CompoundWriter);
241
+ cw->store = store;
242
+ cw->name = name;
243
+ cw->ids = hs_str_create(NULL);
244
+ cw->file_entries = ary_create(1, &wfe_destroy);
245
+ cw->merged = false;
246
+ return cw;
247
+ }
248
+
249
+ void cw_add_file(CompoundWriter *cw, char *id)
250
+ {
251
+ if (cw->merged) eprintf(STATE_ERROR, "Already merged");
252
+ if (hs_add(cw->ids, id) != HASH_KEY_DOES_NOT_EXIST)
253
+ eprintf(STATE_ERROR, "Already merged");
254
+
255
+ hs_add(cw->ids, id);
256
+ ary_append(cw->file_entries, wfe_create(id));
257
+ }
258
+
259
+ void cw_copy_file(CompoundWriter *cw, WFileEntry *src, OutStream *os)
260
+ {
261
+
262
+ int start_ptr = os_pos(os);
263
+
264
+ InStream *is = cw->store->open_input(cw->store, src->name);
265
+ int remainder, length, len;
266
+ remainder = length = is_length(is);
267
+
268
+ uchar buffer[BUFFER_SIZE];
269
+ while (remainder > 0) {
270
+ len = MIN(remainder, BUFFER_SIZE);
271
+ is_read_bytes(is, buffer, 0, len);
272
+ os_write_bytes(os, buffer, len);
273
+ remainder -= len;
274
+ }
275
+
276
+ // Verify that remainder is 0
277
+ if (remainder != 0)
278
+ eprintf(IO_ERROR, "Non-zero remainder length after copying: %ld "
279
+ "(id:%s, length: %ld, buffer size: %ld\n", remainder,
280
+ src->name, length, BUFFER_SIZE);
281
+
282
+ // Verify that the output length diff is equal to original file
283
+ int end_ptr = os_pos(os);
284
+ int diff = end_ptr - start_ptr;
285
+ if (diff != length)
286
+ eprintf(IO_ERROR, "Difference in the output file offsets %ld "
287
+ " does not match the original file length ", diff, length);
288
+
289
+ is_close(is);
290
+ }
291
+
292
+ void cw_close(CompoundWriter *cw)
293
+ {
294
+ if (cw->merged) eprintf(STATE_ERROR, "Already merged");
295
+ if (cw->ids->size <= 0)
296
+ eprintf(STATE_ERROR, "No Files to merge into the compound file");
297
+
298
+ cw->merged = true;
299
+
300
+ OutStream *os = cw->store->create_output(cw->store, cw->name);
301
+ os_write_vint(os, cw->file_entries->size);
302
+
303
+ /* Write the directory with all offsets at 0.
304
+ * Remember the positions of directory entries so that we can adjust the
305
+ * offsets later */
306
+ int i;
307
+ WFileEntry *wfe;
308
+ for (i = 0; i < cw->file_entries->size; i++) {
309
+ wfe = (WFileEntry *)cw->file_entries->elems[i];
310
+ wfe->dir_offset = os_pos(os);
311
+ os_write_long(os, 0); // for now
312
+ os_write_string(os, wfe->name);
313
+ }
314
+
315
+ /* Open the files and copy their data into the stream. Remember the
316
+ * locations of each file's data section. */
317
+ for (i = 0; i < cw->file_entries->size; i++) {
318
+ wfe = (WFileEntry *)cw->file_entries->elems[i];
319
+ wfe->data_offset = os_pos(os);
320
+ cw_copy_file(cw, wfe, os);
321
+ }
322
+
323
+ /* Write the data offsets into the directory of the compound stream */
324
+ for (i = 0; i < cw->file_entries->size; i++) {
325
+ wfe = (WFileEntry *)cw->file_entries->elems[i];
326
+ os_seek(os, wfe->dir_offset);
327
+ os_write_long(os, wfe->data_offset);
328
+ }
329
+
330
+ os_close(os);
331
+ hs_destroy(cw->ids);
332
+ ary_destroy(cw->file_entries);
333
+ free(cw);
334
+ }
335
+