ferret 0.3.2 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
data/ext/analysis.h ADDED
@@ -0,0 +1,76 @@
1
+ #ifndef FRT_ANALYSIS_H
2
+ #define FRT_ANALYSIS_H
3
+
4
+ #include <global.h>
5
+ #include <hash.h>
6
+
7
+ /****************************************************************************
8
+ *
9
+ * Token
10
+ *
11
+ ****************************************************************************/
12
+
13
+ typedef struct Token {
14
+ char text[MAX_WORD_SIZE];
15
+ int start;
16
+ int end;
17
+ int pos_inc;
18
+ } Token;
19
+
20
+ Token *tk_create();
21
+ void tk_destroy(void *p);
22
+ Token *tk_set(Token *tk, char *text, int tlen, int start, int end, int pos_inc);
23
+ Token *tk_set_no_len(Token *tk, char *text, int start, int end, int pos_inc);
24
+ int tk_eq(Token *tk1, Token *tk2);
25
+ int tk_cmp(Token *tk1, Token *tk2);
26
+
27
+ /****************************************************************************
28
+ *
29
+ * TokenStream
30
+ *
31
+ ****************************************************************************/
32
+
33
+ typedef struct TokenStream TokenStream;
34
+ struct TokenStream {
35
+ void *data;
36
+ char *text;
37
+ int pos;
38
+ Token *token;
39
+ Token *(*next)(TokenStream *ts);
40
+ void (*reset)(TokenStream *ts, char *text);
41
+ void (*destroy)(void *p);
42
+ TokenStream *sub_ts; // used by filters
43
+ };
44
+
45
+ #define ts_next(mts) mts->next(mts)
46
+ #define ts_destroy(mts) mts->destroy(mts)
47
+
48
+ TokenStream *whitespace_tokenizer_create();
49
+ TokenStream *letter_tokenizer_create();
50
+ TokenStream *standard_tokenizer_create();
51
+ TokenStream *lowercase_filter_create(TokenStream *ts);
52
+ TokenStream *stop_filter_create_with_words(TokenStream *ts, char **words, int len);
53
+ TokenStream *stop_filter_create(TokenStream *ts);
54
+
55
+ /****************************************************************************
56
+ *
57
+ * Analyzer
58
+ *
59
+ ****************************************************************************/
60
+
61
+ typedef struct Analyzer {
62
+ void *data;
63
+ TokenStream *current_ts;
64
+ TokenStream *(*get_ts)(struct Analyzer *a, char *field, char *text);
65
+ void (*destroy)(void *p);
66
+ } Analyzer;
67
+
68
+ #define a_destroy(ma) ma->destroy(ma)
69
+ #define a_get_ts(ma, field, text) ma->get_ts(ma, field, text)
70
+
71
+ Analyzer *whitespace_analyzer_create();
72
+ Analyzer *letter_analyzer_create();
73
+ Analyzer *standard_analyzer_create();
74
+ Analyzer *standard_analyzer_create_with_words(char **words, int len);
75
+
76
+ #endif
data/ext/array.c ADDED
@@ -0,0 +1,83 @@
1
+ #include <global.h>
2
+ #include <array.h>
3
+ #include <string.h>
4
+
5
+ Array *ary_create(int allocate, void (*free_elem)(void *p))
6
+ {
7
+ Array *ary = ALLOC(Array);
8
+ if (allocate == 0) {
9
+ ary->elems = NULL;
10
+ } else {
11
+ ary->elems = ALLOC_N(void *, allocate);
12
+ memset(ary->elems, 0, sizeof(void *) * allocate);
13
+ }
14
+ ary->size = 0;
15
+ ary->allocated = allocate;
16
+ ary->free_elem = free_elem;
17
+
18
+ return ary;
19
+ }
20
+
21
+ void ary_destroy(void *p)
22
+ {
23
+ Array *ary = (Array *)p;
24
+ int i;
25
+ for (i = 0; i < ary->size; i++) {
26
+ if (ary->free_elem != NULL && ary->elems[i] != NULL)
27
+ ary->free_elem(ary->elems[i]);
28
+ }
29
+ free(ary->elems);
30
+ free(ary);
31
+ }
32
+
33
+ void ary_set(Array *ary, int index, void *value)
34
+ {
35
+ if (index >= ary->allocated) {
36
+ ary->allocated = (index + 1)*2;
37
+ REALLOC_N(ary->elems, void *, (ary->allocated));
38
+ memset((&ary->elems[ary->size]), 0, sizeof(void *) * (ary->allocated - ary->size));
39
+ }
40
+
41
+ if (index >= ary->size)
42
+ ary->size = index + 1;
43
+
44
+ if (ary->free_elem != NULL && ary->elems[index] != NULL)
45
+ ary->free_elem(ary->elems[index]);
46
+
47
+ ary->elems[index] = value;
48
+ }
49
+
50
+ void ary_append(Array *ary, void *value)
51
+ {
52
+ ary_set(ary, ary->size, value);
53
+ }
54
+
55
+ void *ary_get(Array *ary, int index)
56
+ {
57
+ if (index >= ary->size)
58
+ return NULL;
59
+ return ary->elems[index];
60
+ }
61
+
62
+ void ary_delete(Array *ary, int index)
63
+ {
64
+ if (index >= ary->size)
65
+ return;
66
+ ary->free_elem(ary->elems[index]);
67
+ ary->elems[index] = NULL;
68
+ if (index == ary->size - 1)
69
+ ary->size--;
70
+ }
71
+
72
+ void *ary_remove(Array *ary, int index)
73
+ {
74
+ void *p;
75
+ if (index >= ary->size)
76
+ return NULL;
77
+ p = ary->elems[index];
78
+ ary->elems[index] = NULL;
79
+ ary->size--;
80
+ memmove(&ary->elems[index], &ary->elems[index + 1],
81
+ sizeof(void *) *(ary->size - index));
82
+ return p;
83
+ }
data/ext/array.h ADDED
@@ -0,0 +1,19 @@
1
+ #ifndef FRT_ARRAY_H
2
+ #define FRT_ARRAY_H
3
+
4
+ typedef struct Array {
5
+ void **elems;
6
+ int size;
7
+ int allocated;
8
+ void (*free_elem)(void *p);
9
+ } Array;
10
+
11
+ Array *ary_create(int size, void (*free_elem)(void *p));
12
+ void ary_destroy(void *p);
13
+ void ary_set(Array *ary, int index, void *value);
14
+ void ary_append(Array *ary, void *value);
15
+ void *ary_get(Array *ary, int index);
16
+ void ary_delete(Array *ary, int index);
17
+ void *ary_remove(Array *ary, int index);
18
+
19
+ #endif
data/ext/bitvector.c ADDED
@@ -0,0 +1,164 @@
1
+ #include <bitvector.h>
2
+ #include <string.h>
3
+
4
+ BitVector *bv_create_size(int size)
5
+ {
6
+ BitVector *bv = ALLOC(BitVector);
7
+
8
+ bv->capa = (size >> 3) + 1;
9
+ bv->bits = ALLOC_N(uchar, bv->capa);
10
+ memset(bv->bits, 0, bv->capa);
11
+
12
+ bv->size = 0;
13
+ bv->count = 0;
14
+ bv->curr_bit = -1;
15
+ return bv;
16
+ }
17
+
18
+ BitVector *bv_create()
19
+ {
20
+ return bv_create_size(BV_INIT_CAPA);
21
+ }
22
+
23
+ void bv_destroy(void *p)
24
+ {
25
+ BitVector *bv = (BitVector *)p;
26
+ free(bv->bits);
27
+ free(bv);
28
+ }
29
+
30
+ void bv_set(BitVector *bv, int bit)
31
+ {
32
+ int byte = bit>>3;
33
+ uchar bitmask = 1<<(bit&7);
34
+ if (bv->size <= byte) {
35
+ bv->size = byte + 1;
36
+ if (bv->size >= bv->capa) {
37
+ int capa = bv->capa * 2;
38
+ while (capa < bv->size) capa *= 2;
39
+ REALLOC_N(bv->bits, uchar, capa);
40
+ memset(bv->bits + bv->capa, 0, capa - bv->capa);
41
+ bv->capa = capa;
42
+ }
43
+ }
44
+ uchar *byte_p = &(bv->bits[byte]);
45
+ if ((bitmask & *byte_p) == 0) {
46
+ bv->count++;
47
+ *byte_p |= bitmask;
48
+ }
49
+ }
50
+
51
+ int bv_get(BitVector *bv, int bit)
52
+ {
53
+ int byte = bit>>3;
54
+ if (byte >= bv->size) return 0;
55
+ return (bv->bits[byte]>>(bit&7))&1;
56
+ }
57
+
58
+ void bv_clear(BitVector *bv)
59
+ {
60
+ memset(bv->bits, 0, bv->size);
61
+ bv->count = 0;
62
+ }
63
+
64
+ void bv_unset(BitVector *bv, int bit)
65
+ {
66
+ int byte = bit>>3;
67
+ if (byte >= bv->size) return;
68
+ uchar *byte_p = &(bv->bits[byte]);
69
+ uchar bitmask = 1<<(bit&7);
70
+ if ((bitmask & *byte_p) > 0) {
71
+ bv->count--;
72
+ *byte_p &= ~bitmask;
73
+ }
74
+ }
75
+
76
+ void bv_write(BitVector *bv, Store *store, char *name)
77
+ {
78
+ OutStream *os = store->create_output(store, name);
79
+ os_write_vint(os, bv->size);
80
+ os_write_bytes(os, bv->bits, bv->size);
81
+ os_close(os);
82
+ }
83
+
84
+ const uchar BYTE_COUNTS[] = { // table of bits/char
85
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
86
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
87
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
88
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
89
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
91
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
93
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
94
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
96
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
97
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
99
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
101
+ };
102
+
103
+ int bv_count(BitVector *bv)
104
+ {
105
+ // if the vector has been modified
106
+ int i, c = 0;
107
+ uchar *bytes = bv->bits;
108
+ for (i = 0; i < bv->size; i++)
109
+ c += BYTE_COUNTS[bytes[i]]; // sum bits per char
110
+ bv->count = c;
111
+ return c;
112
+ }
113
+
114
+ BitVector *bv_read(Store *store, char *name)
115
+ {
116
+ BitVector *bv = ALLOC(BitVector);
117
+ InStream *is = store->open_input(store, name);
118
+ bv->capa = bv->size = is_read_vint(is);
119
+ bv->bits = ALLOC_N(uchar, bv->capa);
120
+ is_read_bytes(is, bv->bits, 0, bv->size);
121
+ is_close(is);
122
+ bv_count(bv);
123
+ return bv;
124
+ }
125
+
126
+ void bv_scan_reset(BitVector *bv)
127
+ {
128
+ bv->curr_bit = -1;
129
+ }
130
+
131
+ inline int bv_scan_next_from(BitVector *bv, register const int from)
132
+ {
133
+ register const uchar *const bits = bv->bits;
134
+ register const int size = bv->size;
135
+ register int byte_pos = (from) >> 3;
136
+ register int inc = ((from) & 7);
137
+ register int bit = 1 << inc;
138
+ register int mask = 0xff << inc;
139
+ register int byte;
140
+
141
+ if (byte_pos >= size) return -1;
142
+ if ((bits[byte_pos]&mask) == 0) {
143
+ inc = 0;
144
+ bit = 1;
145
+ do {
146
+ byte_pos++;
147
+ if (byte_pos >= size) return -1;
148
+ } while (bits[byte_pos] == 0);
149
+ }
150
+
151
+ byte = bits[byte_pos];
152
+ while ((byte & bit) == 0) {
153
+ bit <<= 1;
154
+ inc++;
155
+ }
156
+
157
+ return bv->curr_bit = ((byte_pos << 3) + inc);
158
+ }
159
+
160
+ inline int bv_scan_next(BitVector *bv)
161
+ {
162
+ return bv_scan_next_from(bv, bv->curr_bit+1);
163
+ }
164
+
data/ext/bitvector.h ADDED
@@ -0,0 +1,29 @@
1
+ #ifndef FRT_BIT_VECTOR_H
2
+ #define FRT_BIT_VECTOR_H
3
+
4
+ #include <global.h>
5
+ #include <store.h>
6
+
7
+ #define BV_INIT_CAPA 256
8
+ typedef struct BitVector {
9
+ uchar *bits;
10
+ int size;
11
+ int capa;
12
+ int count;
13
+ int curr_bit;
14
+ } BitVector;
15
+
16
+ BitVector *bv_create();;
17
+ BitVector *bv_create_size(int size);
18
+ void bv_destroy(void *bv);
19
+ void bv_set(BitVector *bv, int bit);
20
+ int bv_get(BitVector *bv, int bit);
21
+ void bv_clear(BitVector *bv);
22
+ void bv_unset(BitVector *bv, int bit);
23
+ void bv_write(BitVector *bv, Store *store, char *name);
24
+ BitVector *bv_read(Store *store, char *name);
25
+ void bv_scan_reset(BitVector *bv);
26
+ int bv_scan_next(BitVector *bv);
27
+ int bv_scan_next_from(BitVector *bv, int from);
28
+
29
+ #endif
data/ext/compound_io.c ADDED
@@ -0,0 +1,335 @@
1
+ #include "index.h"
2
+
3
+ /****************************************************************************
4
+ *
5
+ * CompoundStore
6
+ *
7
+ ****************************************************************************/
8
+
9
+ typedef struct FileEntry {
10
+ int offset;
11
+ int length;
12
+ } FileEntry;
13
+
14
+ void cmpd_touch(Store *store, char *filename)
15
+ {
16
+ store->dir.cmpd->store->touch(store->dir.cmpd->store, filename);
17
+ }
18
+
19
+ int cmpd_exists(Store *store, char *filename)
20
+ {
21
+ if (h_get(store->dir.cmpd->entries, filename) != NULL)
22
+ return true;
23
+ else
24
+ return false;
25
+ }
26
+
27
+ int cmpd_remove(Store *store, char *filename)
28
+ {
29
+ eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
30
+ return 0;
31
+ }
32
+
33
+ int cmpd_rename(Store *store, char *from, char *to)
34
+ {
35
+ eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
36
+ return 0;
37
+ }
38
+
39
+ int cmpd_count(Store *store)
40
+ {
41
+ return store->dir.cmpd->entries->used;
42
+ }
43
+
44
+ void cmpd_clear(Store *store)
45
+ {
46
+ eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
47
+ }
48
+
49
+ void cmpd_close(Store *store)
50
+ {
51
+ mutex_lock(&store->mutex);
52
+ CompoundStore *cmpd = store->dir.cmpd;
53
+ if (cmpd->stream == NULL)
54
+ eprintf(IO_ERROR, "Already closed");
55
+
56
+ h_destroy(cmpd->entries);
57
+
58
+ is_close(cmpd->stream);
59
+ cmpd->stream = NULL;
60
+ free(store->dir.cmpd);
61
+ store_destroy(store);
62
+ }
63
+
64
+ int cmpd_length(Store *store, char *filename)
65
+ {
66
+ FileEntry *fe = (FileEntry *)h_get(store->dir.cmpd->entries, filename);
67
+ if (fe != NULL)
68
+ return fe->length;
69
+ else
70
+ return 0;
71
+ }
72
+
73
+ void cmpdi_seek_internal(InStream *is, int pos) {}
74
+ void cmpdi_close_internal(InStream *is)
75
+ {
76
+ //is_close(is->d.cis->sub);
77
+ free(is->d.cis);
78
+ }
79
+
80
+ void cmpdi_clone_internal(InStream *is, InStream *new_is)
81
+ {
82
+ CompoundInStream *cis = ALLOC(CompoundInStream);
83
+ //cis->sub = is_clone(is->d.cis->sub);
84
+ cis->sub = is->d.cis->sub;
85
+ cis->offset = is->d.cis->offset;
86
+ cis->length = is->d.cis->length;
87
+ new_is->d.cis = cis;
88
+ }
89
+
90
+ int cmpdi_length_internal(InStream *is)
91
+ {
92
+ return (is->d.cis->length);
93
+ }
94
+
95
+ void cmpdi_read_internal(InStream *is, uchar *b, int offset, int len)
96
+ {
97
+ CompoundInStream *cis = is->d.cis;
98
+ int start = is_pos(is);
99
+ if ((start + len) > cis->length)
100
+ eprintf(EOF_ERROR, "read past EOF");
101
+ is_seek(cis->sub, cis->offset + start);
102
+ is_read_bytes(cis->sub, b, offset, len);
103
+ }
104
+
105
+ InStream *cmpd_create_input(InStream *sub_is, int offset, int length)
106
+ {
107
+ InStream *is = is_create();
108
+ CompoundInStream *cis = ALLOC(CompoundInStream);
109
+ //cis->sub = is_clone(sub_is);
110
+ cis->sub = sub_is;
111
+ cis->offset = offset;
112
+ cis->length = length;
113
+ is->d.cis = cis;
114
+ is->file = NULL;
115
+
116
+ is->read_internal = &cmpdi_read_internal;
117
+ is->seek_internal = &cmpdi_seek_internal;
118
+ is->close_internal = &cmpdi_close_internal;
119
+ is->clone_internal = &cmpdi_clone_internal;
120
+ is->length_internal = &cmpdi_length_internal;
121
+ return is;
122
+ }
123
+
124
+ InStream *cmpd_open_input(Store *store, const char *filename)
125
+ {
126
+ CompoundStore *cmpd = store->dir.cmpd;
127
+ InStream *is;
128
+
129
+ mutex_lock(&store->mutex);
130
+ if (cmpd->stream == NULL) {
131
+ mutex_unlock(&store->mutex);
132
+ eprintf(IO_ERROR, "Stream closed");
133
+ }
134
+
135
+ FileEntry *entry = (FileEntry *)h_get(cmpd->entries, filename);
136
+ if (entry == NULL) {
137
+ mutex_unlock(&store->mutex);
138
+ eprintf(IO_ERROR, "No sub-file with id <%s> found", filename);
139
+ }
140
+
141
+ is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
142
+ mutex_unlock(&store->mutex);
143
+
144
+ return is;
145
+ }
146
+
147
+ OutStream *cmpd_create_output(Store *store, const char *filename)
148
+ {
149
+ eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
150
+ return NULL;
151
+ }
152
+
153
+ Lock *cmpd_open_lock(Store *store, char *lockname)
154
+ {
155
+ eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
156
+ return NULL;
157
+ }
158
+
159
+ void cmpd_close_lock(Lock *lock)
160
+ {
161
+ eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
162
+ }
163
+
164
+ Store *open_cmpd_store(Store *store, const char *name)
165
+ {
166
+ CompoundStore *cmpd = ALLOC(CompoundStore);
167
+ Store *new_store = store_create();
168
+
169
+ cmpd->store = store;
170
+ cmpd->name = name;
171
+ cmpd->entries = h_new_str(&efree, &efree);
172
+ InStream *is = cmpd->stream = store->open_input(store, cmpd->name);
173
+
174
+ // read the directory and init files
175
+ int count = is_read_vint(is);
176
+ FileEntry *entry = NULL;
177
+ int i, offset;
178
+ char *fname;
179
+ for (i = 0; i < count; i++) {
180
+ offset = is_read_long(is);
181
+ fname = is_read_string(is);
182
+
183
+ if (entry != NULL) {
184
+ // set length of the previous entry
185
+ entry->length = offset - entry->offset;
186
+ }
187
+
188
+ entry = ALLOC(FileEntry);
189
+ entry->offset = offset;
190
+ h_set(cmpd->entries, fname, entry);
191
+ }
192
+
193
+ // set the length of the final entry
194
+ if (entry != NULL)
195
+ entry->length = is_length(is) - entry->offset;
196
+
197
+ new_store->dir.cmpd = cmpd;
198
+ new_store->touch = &cmpd_touch;
199
+ new_store->exists = &cmpd_exists;
200
+ new_store->remove = &cmpd_remove;
201
+ new_store->rename = &cmpd_rename;
202
+ new_store->count = &cmpd_count;
203
+ new_store->close = &cmpd_close;
204
+ new_store->clear = &cmpd_clear;
205
+ new_store->length = &cmpd_length;
206
+ new_store->create_output = &cmpd_create_output;
207
+ new_store->open_input = &cmpd_open_input;
208
+ new_store->open_lock = &cmpd_open_lock;
209
+ new_store->close_lock = &cmpd_close_lock;
210
+ return new_store;
211
+ }
212
+
213
+ /****************************************************************************
214
+ *
215
+ * CompoundWriter
216
+ *
217
+ ****************************************************************************/
218
+
219
+ typedef struct WFileEntry {
220
+ char *name;
221
+ int dir_offset;
222
+ int data_offset;
223
+ } WFileEntry;
224
+
225
+ WFileEntry *wfe_create(char *name)
226
+ {
227
+ WFileEntry *wfe = ALLOC(WFileEntry);
228
+ wfe->name = name;
229
+ return wfe;
230
+ }
231
+
232
+ void wfe_destroy(void *p)
233
+ {
234
+ WFileEntry *wfe = (WFileEntry *)p;
235
+ efree(wfe);
236
+ }
237
+
238
+ CompoundWriter *open_cw(Store *store, char *name)
239
+ {
240
+ CompoundWriter *cw = ALLOC(CompoundWriter);
241
+ cw->store = store;
242
+ cw->name = name;
243
+ cw->ids = hs_str_create(NULL);
244
+ cw->file_entries = ary_create(1, &wfe_destroy);
245
+ cw->merged = false;
246
+ return cw;
247
+ }
248
+
249
+ void cw_add_file(CompoundWriter *cw, char *id)
250
+ {
251
+ if (cw->merged) eprintf(STATE_ERROR, "Already merged");
252
+ if (hs_add(cw->ids, id) != HASH_KEY_DOES_NOT_EXIST)
253
+ eprintf(STATE_ERROR, "Already merged");
254
+
255
+ hs_add(cw->ids, id);
256
+ ary_append(cw->file_entries, wfe_create(id));
257
+ }
258
+
259
+ void cw_copy_file(CompoundWriter *cw, WFileEntry *src, OutStream *os)
260
+ {
261
+
262
+ int start_ptr = os_pos(os);
263
+
264
+ InStream *is = cw->store->open_input(cw->store, src->name);
265
+ int remainder, length, len;
266
+ remainder = length = is_length(is);
267
+
268
+ uchar buffer[BUFFER_SIZE];
269
+ while (remainder > 0) {
270
+ len = MIN(remainder, BUFFER_SIZE);
271
+ is_read_bytes(is, buffer, 0, len);
272
+ os_write_bytes(os, buffer, len);
273
+ remainder -= len;
274
+ }
275
+
276
+ // Verify that remainder is 0
277
+ if (remainder != 0)
278
+ eprintf(IO_ERROR, "Non-zero remainder length after copying: %ld "
279
+ "(id:%s, length: %ld, buffer size: %ld\n", remainder,
280
+ src->name, length, BUFFER_SIZE);
281
+
282
+ // Verify that the output length diff is equal to original file
283
+ int end_ptr = os_pos(os);
284
+ int diff = end_ptr - start_ptr;
285
+ if (diff != length)
286
+ eprintf(IO_ERROR, "Difference in the output file offsets %ld "
287
+ " does not match the original file length ", diff, length);
288
+
289
+ is_close(is);
290
+ }
291
+
292
+ void cw_close(CompoundWriter *cw)
293
+ {
294
+ if (cw->merged) eprintf(STATE_ERROR, "Already merged");
295
+ if (cw->ids->size <= 0)
296
+ eprintf(STATE_ERROR, "No Files to merge into the compound file");
297
+
298
+ cw->merged = true;
299
+
300
+ OutStream *os = cw->store->create_output(cw->store, cw->name);
301
+ os_write_vint(os, cw->file_entries->size);
302
+
303
+ /* Write the directory with all offsets at 0.
304
+ * Remember the positions of directory entries so that we can adjust the
305
+ * offsets later */
306
+ int i;
307
+ WFileEntry *wfe;
308
+ for (i = 0; i < cw->file_entries->size; i++) {
309
+ wfe = (WFileEntry *)cw->file_entries->elems[i];
310
+ wfe->dir_offset = os_pos(os);
311
+ os_write_long(os, 0); // for now
312
+ os_write_string(os, wfe->name);
313
+ }
314
+
315
+ /* Open the files and copy their data into the stream. Remember the
316
+ * locations of each file's data section. */
317
+ for (i = 0; i < cw->file_entries->size; i++) {
318
+ wfe = (WFileEntry *)cw->file_entries->elems[i];
319
+ wfe->data_offset = os_pos(os);
320
+ cw_copy_file(cw, wfe, os);
321
+ }
322
+
323
+ /* Write the data offsets into the directory of the compound stream */
324
+ for (i = 0; i < cw->file_entries->size; i++) {
325
+ wfe = (WFileEntry *)cw->file_entries->elems[i];
326
+ os_seek(os, wfe->dir_offset);
327
+ os_write_long(os, wfe->data_offset);
328
+ }
329
+
330
+ os_close(os);
331
+ hs_destroy(cw->ids);
332
+ ary_destroy(cw->file_entries);
333
+ free(cw);
334
+ }
335
+