ferret 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +23 -5
- data/TODO +2 -1
- data/ext/analysis.c +838 -177
- data/ext/analysis.h +55 -7
- data/ext/api.c +69 -0
- data/ext/api.h +27 -0
- data/ext/array.c +8 -5
- data/ext/compound_io.c +132 -96
- data/ext/document.c +58 -28
- data/ext/except.c +59 -0
- data/ext/except.h +88 -0
- data/ext/ferret.c +47 -3
- data/ext/ferret.h +3 -0
- data/ext/field.c +15 -9
- data/ext/filter.c +1 -1
- data/ext/fs_store.c +215 -34
- data/ext/global.c +72 -3
- data/ext/global.h +4 -3
- data/ext/hash.c +44 -3
- data/ext/hash.h +9 -0
- data/ext/header.h +58 -0
- data/ext/inc/except.h +88 -0
- data/ext/inc/lang.h +23 -13
- data/ext/ind.c +16 -10
- data/ext/index.h +2 -22
- data/ext/index_io.c +3 -11
- data/ext/index_rw.c +245 -193
- data/ext/lang.h +23 -13
- data/ext/libstemmer.c +92 -0
- data/ext/libstemmer.h +79 -0
- data/ext/modules.h +162 -0
- data/ext/q_boolean.c +34 -21
- data/ext/q_const_score.c +6 -12
- data/ext/q_filtered_query.c +206 -0
- data/ext/q_fuzzy.c +18 -15
- data/ext/q_match_all.c +3 -7
- data/ext/q_multi_phrase.c +10 -14
- data/ext/q_parser.c +29 -2
- data/ext/q_phrase.c +14 -21
- data/ext/q_prefix.c +15 -12
- data/ext/q_range.c +30 -28
- data/ext/q_span.c +13 -21
- data/ext/q_term.c +17 -26
- data/ext/r_analysis.c +693 -21
- data/ext/r_doc.c +11 -12
- data/ext/r_index_io.c +4 -1
- data/ext/r_qparser.c +21 -2
- data/ext/r_search.c +285 -18
- data/ext/ram_store.c +5 -2
- data/ext/search.c +11 -17
- data/ext/search.h +21 -45
- data/ext/similarity.h +67 -0
- data/ext/sort.c +30 -25
- data/ext/stem_ISO_8859_1_danish.c +338 -0
- data/ext/stem_ISO_8859_1_danish.h +16 -0
- data/ext/stem_ISO_8859_1_dutch.c +635 -0
- data/ext/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/stem_ISO_8859_1_english.c +1156 -0
- data/ext/stem_ISO_8859_1_english.h +16 -0
- data/ext/stem_ISO_8859_1_finnish.c +792 -0
- data/ext/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/stem_ISO_8859_1_french.c +1276 -0
- data/ext/stem_ISO_8859_1_french.h +16 -0
- data/ext/stem_ISO_8859_1_german.c +512 -0
- data/ext/stem_ISO_8859_1_german.h +16 -0
- data/ext/stem_ISO_8859_1_italian.c +1091 -0
- data/ext/stem_ISO_8859_1_italian.h +16 -0
- data/ext/stem_ISO_8859_1_norwegian.c +296 -0
- data/ext/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/stem_ISO_8859_1_porter.c +776 -0
- data/ext/stem_ISO_8859_1_porter.h +16 -0
- data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
- data/ext/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/stem_ISO_8859_1_spanish.c +1119 -0
- data/ext/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/stem_ISO_8859_1_swedish.c +307 -0
- data/ext/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/stem_KOI8_R_russian.c +701 -0
- data/ext/stem_KOI8_R_russian.h +16 -0
- data/ext/stem_UTF_8_danish.c +344 -0
- data/ext/stem_UTF_8_danish.h +16 -0
- data/ext/stem_UTF_8_dutch.c +653 -0
- data/ext/stem_UTF_8_dutch.h +16 -0
- data/ext/stem_UTF_8_english.c +1176 -0
- data/ext/stem_UTF_8_english.h +16 -0
- data/ext/stem_UTF_8_finnish.c +808 -0
- data/ext/stem_UTF_8_finnish.h +16 -0
- data/ext/stem_UTF_8_french.c +1296 -0
- data/ext/stem_UTF_8_french.h +16 -0
- data/ext/stem_UTF_8_german.c +526 -0
- data/ext/stem_UTF_8_german.h +16 -0
- data/ext/stem_UTF_8_italian.c +1113 -0
- data/ext/stem_UTF_8_italian.h +16 -0
- data/ext/stem_UTF_8_norwegian.c +302 -0
- data/ext/stem_UTF_8_norwegian.h +16 -0
- data/ext/stem_UTF_8_porter.c +794 -0
- data/ext/stem_UTF_8_porter.h +16 -0
- data/ext/stem_UTF_8_portuguese.c +1055 -0
- data/ext/stem_UTF_8_portuguese.h +16 -0
- data/ext/stem_UTF_8_russian.c +709 -0
- data/ext/stem_UTF_8_russian.h +16 -0
- data/ext/stem_UTF_8_spanish.c +1137 -0
- data/ext/stem_UTF_8_spanish.h +16 -0
- data/ext/stem_UTF_8_swedish.c +313 -0
- data/ext/stem_UTF_8_swedish.h +16 -0
- data/ext/stopwords.c +325 -0
- data/ext/store.c +34 -2
- data/ext/tags +2953 -0
- data/ext/term.c +21 -15
- data/ext/termdocs.c +5 -3
- data/ext/utilities.c +446 -0
- data/ext/vector.c +27 -13
- data/lib/ferret/document/document.rb +1 -1
- data/lib/ferret/index/index.rb +44 -6
- data/lib/ferret/query_parser/query_parser.tab.rb +7 -3
- data/lib/rferret.rb +2 -1
- data/test/test_helper.rb +2 -2
- data/test/unit/analysis/ctc_analyzer.rb +401 -0
- data/test/unit/analysis/ctc_tokenstream.rb +423 -0
- data/test/unit/analysis/{tc_letter_tokenizer.rb → rtc_letter_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_lower_case_filter.rb → rtc_lower_case_filter.rb} +0 -0
- data/test/unit/analysis/{tc_lower_case_tokenizer.rb → rtc_lower_case_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_per_field_analyzer_wrapper.rb → rtc_per_field_analyzer_wrapper.rb} +0 -0
- data/test/unit/analysis/{tc_porter_stem_filter.rb → rtc_porter_stem_filter.rb} +0 -0
- data/test/unit/analysis/{tc_standard_analyzer.rb → rtc_standard_analyzer.rb} +0 -0
- data/test/unit/analysis/{tc_standard_tokenizer.rb → rtc_standard_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_stop_analyzer.rb → rtc_stop_analyzer.rb} +0 -0
- data/test/unit/analysis/{tc_stop_filter.rb → rtc_stop_filter.rb} +0 -0
- data/test/unit/analysis/{tc_white_space_analyzer.rb → rtc_white_space_analyzer.rb} +0 -0
- data/test/unit/analysis/{tc_white_space_tokenizer.rb → rtc_white_space_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_word_list_loader.rb → rtc_word_list_loader.rb} +0 -0
- data/test/unit/analysis/tc_analyzer.rb +1 -2
- data/test/unit/analysis/{c_token.rb → tc_token.rb} +0 -0
- data/test/unit/document/rtc_field.rb +28 -0
- data/test/unit/document/{c_document.rb → tc_document.rb} +0 -0
- data/test/unit/document/tc_field.rb +82 -12
- data/test/unit/index/{tc_compound_file_io.rb → rtc_compound_file_io.rb} +0 -0
- data/test/unit/index/{tc_field_infos.rb → rtc_field_infos.rb} +0 -0
- data/test/unit/index/{tc_fields_io.rb → rtc_fields_io.rb} +0 -0
- data/test/unit/index/{tc_multiple_term_doc_pos_enum.rb → rtc_multiple_term_doc_pos_enum.rb} +0 -0
- data/test/unit/index/{tc_segment_infos.rb → rtc_segment_infos.rb} +0 -0
- data/test/unit/index/{tc_segment_term_docs.rb → rtc_segment_term_docs.rb} +0 -0
- data/test/unit/index/{tc_segment_term_enum.rb → rtc_segment_term_enum.rb} +0 -0
- data/test/unit/index/{tc_segment_term_vector.rb → rtc_segment_term_vector.rb} +0 -0
- data/test/unit/index/{tc_term_buffer.rb → rtc_term_buffer.rb} +0 -0
- data/test/unit/index/{tc_term_info.rb → rtc_term_info.rb} +0 -0
- data/test/unit/index/{tc_term_infos_io.rb → rtc_term_infos_io.rb} +0 -0
- data/test/unit/index/{tc_term_vectors_io.rb → rtc_term_vectors_io.rb} +0 -0
- data/test/unit/index/{c_index.rb → tc_index.rb} +26 -6
- data/test/unit/index/{c_index_reader.rb → tc_index_reader.rb} +0 -0
- data/test/unit/index/{c_index_writer.rb → tc_index_writer.rb} +0 -0
- data/test/unit/index/{c_term.rb → tc_term.rb} +0 -0
- data/test/unit/index/{c_term_voi.rb → tc_term_voi.rb} +0 -0
- data/test/unit/query_parser/{c_query_parser.rb → rtc_query_parser.rb} +14 -14
- data/test/unit/query_parser/tc_query_parser.rb +24 -16
- data/test/unit/search/{tc_similarity.rb → rtc_similarity.rb} +0 -0
- data/test/unit/search/rtc_sort_field.rb +14 -0
- data/test/unit/search/{c_filter.rb → tc_filter.rb} +11 -11
- data/test/unit/search/{c_fuzzy_query.rb → tc_fuzzy_query.rb} +0 -0
- data/test/unit/search/{c_index_searcher.rb → tc_index_searcher.rb} +0 -0
- data/test/unit/search/{c_search_and_sort.rb → tc_search_and_sort.rb} +0 -0
- data/test/unit/search/{c_sort.rb → tc_sort.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +20 -7
- data/test/unit/search/{c_spans.rb → tc_spans.rb} +0 -0
- data/test/unit/store/rtc_fs_store.rb +62 -0
- data/test/unit/store/rtc_ram_store.rb +15 -0
- data/test/unit/store/rtm_store.rb +150 -0
- data/test/unit/store/rtm_store_lock.rb +2 -0
- data/test/unit/store/tc_fs_store.rb +54 -40
- data/test/unit/store/tc_ram_store.rb +20 -0
- data/test/unit/store/tm_store.rb +30 -146
- data/test/unit/store/tm_store_lock.rb +66 -0
- data/test/unit/utils/{tc_bit_vector.rb → rtc_bit_vector.rb} +0 -0
- data/test/unit/utils/{tc_date_tools.rb → rtc_date_tools.rb} +0 -0
- data/test/unit/utils/{tc_number_tools.rb → rtc_number_tools.rb} +0 -0
- data/test/unit/utils/{tc_parameter.rb → rtc_parameter.rb} +0 -0
- data/test/unit/utils/{tc_priority_queue.rb → rtc_priority_queue.rb} +0 -0
- data/test/unit/utils/{tc_string_helper.rb → rtc_string_helper.rb} +0 -0
- data/test/unit/utils/{tc_thread.rb → rtc_thread.rb} +0 -0
- data/test/unit/utils/{tc_weak_key_hash.rb → rtc_weak_key_hash.rb} +0 -0
- metadata +360 -289
- data/test/unit/document/c_field.rb +0 -98
- data/test/unit/search/c_sort_field.rb +0 -27
- data/test/unit/store/c_fs_store.rb +0 -76
- data/test/unit/store/c_ram_store.rb +0 -35
- data/test/unit/store/m_store.rb +0 -34
- data/test/unit/store/m_store_lock.rb +0 -68
data/ext/analysis.h
CHANGED
@@ -30,27 +30,58 @@ int tk_cmp(Token *tk1, Token *tk2);
|
|
30
30
|
*
|
31
31
|
****************************************************************************/
|
32
32
|
|
33
|
+
|
33
34
|
typedef struct TokenStream TokenStream;
|
34
35
|
struct TokenStream {
|
35
36
|
void *data;
|
36
37
|
char *text;
|
37
|
-
|
38
|
+
char *t; /* ptr used to scan text */
|
38
39
|
Token *token;
|
39
40
|
Token *(*next)(TokenStream *ts);
|
40
41
|
void (*reset)(TokenStream *ts, char *text);
|
42
|
+
void (*clone_i)(TokenStream *orig_ts, TokenStream *new_ts);
|
41
43
|
void (*destroy)(void *p);
|
42
|
-
TokenStream *sub_ts;
|
44
|
+
TokenStream *sub_ts; /* used by filters */
|
45
|
+
bool destroy_sub : 1;
|
43
46
|
};
|
44
47
|
|
45
48
|
#define ts_next(mts) mts->next(mts)
|
46
49
|
#define ts_destroy(mts) mts->destroy(mts)
|
47
50
|
|
48
51
|
TokenStream *whitespace_tokenizer_create();
|
52
|
+
TokenStream *mb_whitespace_tokenizer_create(bool lowercase);
|
53
|
+
|
49
54
|
TokenStream *letter_tokenizer_create();
|
55
|
+
TokenStream *mb_letter_tokenizer_create(bool lowercase);
|
56
|
+
|
50
57
|
TokenStream *standard_tokenizer_create();
|
58
|
+
TokenStream *mb_standard_tokenizer_create();
|
59
|
+
|
51
60
|
TokenStream *lowercase_filter_create(TokenStream *ts);
|
52
|
-
TokenStream *
|
61
|
+
TokenStream *mb_lowercase_filter_create(TokenStream *ts);
|
62
|
+
|
63
|
+
extern const char *ENGLISH_STOP_WORDS[];
|
64
|
+
extern const char *FULL_ENGLISH_STOP_WORDS[];
|
65
|
+
extern const char *EXTENDED_ENGLISH_STOP_WORDS[];
|
66
|
+
extern const char *FULL_FRENCH_STOP_WORDS[];
|
67
|
+
extern const char *FULL_SPANISH_STOP_WORDS[];
|
68
|
+
extern const char *FULL_PORTUGUESE_STOP_WORDS[];
|
69
|
+
extern const char *FULL_ITALIAN_STOP_WORDS[];
|
70
|
+
extern const char *FULL_GERMAN_STOP_WORDS[];
|
71
|
+
extern const char *FULL_DUTCH_STOP_WORDS[];
|
72
|
+
extern const char *FULL_SWEDISH_STOP_WORDS[];
|
73
|
+
extern const char *FULL_NORWEGIAN_STOP_WORDS[];
|
74
|
+
extern const char *FULL_DANISH_STOP_WORDS[];
|
75
|
+
extern const char *FULL_RUSSIAN_STOP_WORDS[];
|
76
|
+
extern const char *FULL_FINNISH_STOP_WORDS[];
|
77
|
+
|
78
|
+
TokenStream *stop_filter_create_with_words_len(TokenStream *ts,
|
79
|
+
const char **words, int len);
|
80
|
+
TokenStream *stop_filter_create_with_words(TokenStream *ts, const char **words);
|
53
81
|
TokenStream *stop_filter_create(TokenStream *ts);
|
82
|
+
TokenStream *stem_filter_create(TokenStream *ts, const char * algorithm,
|
83
|
+
const char * charenc);
|
84
|
+
TokenStream *ts_clone(TokenStream *orig_ts);
|
54
85
|
|
55
86
|
/****************************************************************************
|
56
87
|
*
|
@@ -67,10 +98,27 @@ typedef struct Analyzer {
|
|
67
98
|
|
68
99
|
#define a_destroy(ma) ma->destroy(ma)
|
69
100
|
#define a_get_ts(ma, field, text) ma->get_ts(ma, field, text)
|
101
|
+
#define a_get_new_ts(ma, field, text) ts_clone(ma->get_ts(ma, field, text))
|
102
|
+
|
103
|
+
Analyzer *whitespace_analyzer_create(bool lowercase);
|
104
|
+
Analyzer *mb_whitespace_analyzer_create(bool lowercase);
|
105
|
+
|
106
|
+
Analyzer *letter_analyzer_create(bool lowercase);
|
107
|
+
Analyzer *mb_letter_analyzer_create(bool lowercase);
|
108
|
+
|
109
|
+
Analyzer *standard_analyzer_create(bool lowercase);
|
110
|
+
Analyzer *mb_standard_analyzer_create(bool lowercase);
|
111
|
+
|
112
|
+
Analyzer *standard_analyzer_create_with_words(
|
113
|
+
const char **words, bool lowercase);
|
114
|
+
Analyzer *standard_analyzer_create_with_words_len(
|
115
|
+
const char **words, int len, bool lowercase);
|
116
|
+
Analyzer *mb_standard_analyzer_create_with_words(
|
117
|
+
const char **words, bool lowercase);
|
118
|
+
Analyzer *mb_standard_analyzer_create_with_words_len(
|
119
|
+
const char **words, int len, bool lowercase);
|
70
120
|
|
71
|
-
Analyzer *
|
72
|
-
Analyzer *
|
73
|
-
Analyzer *standard_analyzer_create();
|
74
|
-
Analyzer *standard_analyzer_create_with_words(char **words, int len);
|
121
|
+
Analyzer *per_field_analyzer_create(Analyzer *def, bool destroy_subs);
|
122
|
+
void pfa_add_field(Analyzer *self, char *field, Analyzer *analyzer);
|
75
123
|
|
76
124
|
#endif
|
data/ext/api.c
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
|
2
|
+
#include <stdlib.h> /* for calloc, free */
|
3
|
+
#include "header.h"
|
4
|
+
|
5
|
+
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
|
6
|
+
{
|
7
|
+
struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
|
8
|
+
if (z == NULL) return NULL;
|
9
|
+
z->p = create_s();
|
10
|
+
if (z->p == NULL) goto error;
|
11
|
+
if (S_size)
|
12
|
+
{
|
13
|
+
int i;
|
14
|
+
z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
|
15
|
+
if (z->S == NULL) goto error;
|
16
|
+
|
17
|
+
for (i = 0; i < S_size; i++)
|
18
|
+
{
|
19
|
+
z->S[i] = create_s();
|
20
|
+
if (z->S[i] == NULL) goto error;
|
21
|
+
}
|
22
|
+
z->S_size = S_size;
|
23
|
+
}
|
24
|
+
|
25
|
+
if (I_size)
|
26
|
+
{
|
27
|
+
z->I = (int *) calloc(I_size, sizeof(int));
|
28
|
+
if (z->I == NULL) goto error;
|
29
|
+
z->I_size = I_size;
|
30
|
+
}
|
31
|
+
|
32
|
+
if (B_size)
|
33
|
+
{
|
34
|
+
z->B = (symbol *) calloc(B_size, sizeof(symbol));
|
35
|
+
if (z->B == NULL) goto error;
|
36
|
+
z->B_size = B_size;
|
37
|
+
}
|
38
|
+
|
39
|
+
return z;
|
40
|
+
error:
|
41
|
+
SN_close_env(z);
|
42
|
+
return NULL;
|
43
|
+
}
|
44
|
+
|
45
|
+
extern void SN_close_env(struct SN_env * z)
|
46
|
+
{
|
47
|
+
if (z == NULL) return;
|
48
|
+
if (z->S_size)
|
49
|
+
{
|
50
|
+
int i;
|
51
|
+
for (i = 0; i < z->S_size; i++)
|
52
|
+
{
|
53
|
+
lose_s(z->S[i]);
|
54
|
+
}
|
55
|
+
free(z->S);
|
56
|
+
}
|
57
|
+
if (z->I_size) free(z->I);
|
58
|
+
if (z->B_size) free(z->B);
|
59
|
+
if (z->p) lose_s(z->p);
|
60
|
+
free(z);
|
61
|
+
}
|
62
|
+
|
63
|
+
extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
|
64
|
+
{
|
65
|
+
int err = replace_s(z, 0, z->l, size, s, NULL);
|
66
|
+
z->c = 0;
|
67
|
+
return err;
|
68
|
+
}
|
69
|
+
|
data/ext/api.h
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
typedef unsigned char symbol;
|
3
|
+
|
4
|
+
/* Or replace 'char' above with 'short' for 16 bit characters.
|
5
|
+
|
6
|
+
More precisely, replace 'char' with whatever type guarantees the
|
7
|
+
character width you need. Note however that sizeof(symbol) should divide
|
8
|
+
HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
|
9
|
+
there is an alignment problem. In the unlikely event of a problem here,
|
10
|
+
consult Martin Porter.
|
11
|
+
|
12
|
+
*/
|
13
|
+
|
14
|
+
struct SN_env {
|
15
|
+
symbol * p;
|
16
|
+
int c; int a; int l; int lb; int bra; int ket;
|
17
|
+
int S_size; int I_size; int B_size;
|
18
|
+
symbol * * S;
|
19
|
+
int * I;
|
20
|
+
symbol * B;
|
21
|
+
};
|
22
|
+
|
23
|
+
extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
|
24
|
+
extern void SN_close_env(struct SN_env * z);
|
25
|
+
|
26
|
+
extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
|
27
|
+
|
data/ext/array.c
CHANGED
@@ -22,9 +22,11 @@ void ary_destroy(void *p)
|
|
22
22
|
{
|
23
23
|
Array *ary = (Array *)p;
|
24
24
|
int i;
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
if (ary->free_elem) {
|
26
|
+
for (i = 0; i < ary->size; i++) {
|
27
|
+
if (ary->elems[i])
|
28
|
+
ary->free_elem(ary->elems[i]);
|
29
|
+
}
|
28
30
|
}
|
29
31
|
free(ary->elems);
|
30
32
|
free(ary);
|
@@ -41,7 +43,7 @@ void ary_set(Array *ary, int index, void *value)
|
|
41
43
|
if (index >= ary->size)
|
42
44
|
ary->size = index + 1;
|
43
45
|
|
44
|
-
if (ary->free_elem
|
46
|
+
if (ary->free_elem && ary->elems[index])
|
45
47
|
ary->free_elem(ary->elems[index]);
|
46
48
|
|
47
49
|
ary->elems[index] = value;
|
@@ -63,7 +65,8 @@ void ary_delete(Array *ary, int index)
|
|
63
65
|
{
|
64
66
|
if (index >= ary->size)
|
65
67
|
return;
|
66
|
-
ary->free_elem
|
68
|
+
if (ary->free_elem && ary->elems[index])
|
69
|
+
ary->free_elem(ary->elems[index]);
|
67
70
|
ary->elems[index] = NULL;
|
68
71
|
if (index == ary->size - 1)
|
69
72
|
ary->size--;
|
data/ext/compound_io.c
CHANGED
@@ -1,4 +1,12 @@
|
|
1
|
-
#include "index.h"
|
1
|
+
#include "index.h"
|
2
|
+
static char * const ALREADY_CLOSED_MSG = "Already closed";
|
3
|
+
static char * const STREAM_CLOSED_MSG = "Stream closed";
|
4
|
+
static char * const MISSING_FILE_MSG = "No sub-file found";
|
5
|
+
static char * const ALREADY_MERGED_MSG = "Already merged";
|
6
|
+
static char * const REMAINDER_ERROR_MSG = "Non-zero remainder length after copying";
|
7
|
+
static char * const FILE_OFFSET_MSG = "Difference in the output file offsets"
|
8
|
+
" does not match the original file length";
|
9
|
+
static char * const NO_FILES_TO_MERGE_MSG = "No Files to merge into the compound file";
|
2
10
|
|
3
11
|
/****************************************************************************
|
4
12
|
*
|
@@ -24,15 +32,21 @@ int cmpd_exists(Store *store, char *filename)
|
|
24
32
|
return false;
|
25
33
|
}
|
26
34
|
|
35
|
+
/**
|
36
|
+
* @throws UNSUPPORTED_ERROR
|
37
|
+
*/
|
27
38
|
int cmpd_remove(Store *store, char *filename)
|
28
39
|
{
|
29
|
-
|
40
|
+
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
30
41
|
return 0;
|
31
42
|
}
|
32
43
|
|
44
|
+
/**
|
45
|
+
* @throws UNSUPPORTED_ERROR
|
46
|
+
*/
|
33
47
|
int cmpd_rename(Store *store, char *from, char *to)
|
34
48
|
{
|
35
|
-
|
49
|
+
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
36
50
|
return 0;
|
37
51
|
}
|
38
52
|
|
@@ -41,9 +55,12 @@ int cmpd_count(Store *store)
|
|
41
55
|
return store->dir.cmpd->entries->used;
|
42
56
|
}
|
43
57
|
|
58
|
+
/**
|
59
|
+
* @throws UNSUPPORTED_ERROR
|
60
|
+
*/
|
44
61
|
void cmpd_clear(Store *store)
|
45
62
|
{
|
46
|
-
|
63
|
+
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
47
64
|
}
|
48
65
|
|
49
66
|
void cmpd_close(Store *store)
|
@@ -51,7 +68,7 @@ void cmpd_close(Store *store)
|
|
51
68
|
mutex_lock(&store->mutex);
|
52
69
|
CompoundStore *cmpd = store->dir.cmpd;
|
53
70
|
if (cmpd->stream == NULL)
|
54
|
-
|
71
|
+
RAISE(IO_ERROR, ALREADY_CLOSED_MSG);
|
55
72
|
|
56
73
|
h_destroy(cmpd->entries);
|
57
74
|
|
@@ -92,12 +109,15 @@ int cmpdi_length_internal(InStream *is)
|
|
92
109
|
return (is->d.cis->length);
|
93
110
|
}
|
94
111
|
|
112
|
+
/*
|
113
|
+
* raises: EOF_ERROR
|
114
|
+
*/
|
95
115
|
void cmpdi_read_internal(InStream *is, uchar *b, int offset, int len)
|
96
116
|
{
|
97
117
|
CompoundInStream *cis = is->d.cis;
|
98
118
|
int start = is_pos(is);
|
99
119
|
if ((start + len) > cis->length)
|
100
|
-
|
120
|
+
RAISE(EOF_ERROR, EOF_ERROR_MSG);
|
101
121
|
is_seek(cis->sub, cis->offset + start);
|
102
122
|
is_read_bytes(cis->sub, b, offset, len);
|
103
123
|
}
|
@@ -129,13 +149,13 @@ InStream *cmpd_open_input(Store *store, const char *filename)
|
|
129
149
|
mutex_lock(&store->mutex);
|
130
150
|
if (cmpd->stream == NULL) {
|
131
151
|
mutex_unlock(&store->mutex);
|
132
|
-
|
152
|
+
RAISE(IO_ERROR, STREAM_CLOSED_MSG);
|
133
153
|
}
|
134
154
|
|
135
155
|
FileEntry *entry = (FileEntry *)h_get(cmpd->entries, filename);
|
136
156
|
if (entry == NULL) {
|
137
157
|
mutex_unlock(&store->mutex);
|
138
|
-
|
158
|
+
RAISE(IO_ERROR, MISSING_FILE_MSG);
|
139
159
|
}
|
140
160
|
|
141
161
|
is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
|
@@ -146,53 +166,64 @@ InStream *cmpd_open_input(Store *store, const char *filename)
|
|
146
166
|
|
147
167
|
OutStream *cmpd_create_output(Store *store, const char *filename)
|
148
168
|
{
|
149
|
-
|
169
|
+
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
150
170
|
return NULL;
|
151
171
|
}
|
152
172
|
|
153
173
|
Lock *cmpd_open_lock(Store *store, char *lockname)
|
154
174
|
{
|
155
|
-
|
175
|
+
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
156
176
|
return NULL;
|
157
177
|
}
|
158
178
|
|
159
179
|
void cmpd_close_lock(Lock *lock)
|
160
180
|
{
|
161
|
-
|
181
|
+
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
162
182
|
}
|
163
183
|
|
164
184
|
Store *open_cmpd_store(Store *store, const char *name)
|
165
185
|
{
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
+
Store * volatile new_store = NULL;
|
187
|
+
CompoundStore * volatile cmpd = NULL;
|
188
|
+
InStream * volatile is = NULL;
|
189
|
+
|
190
|
+
TRY
|
191
|
+
new_store = store_create();
|
192
|
+
cmpd = ALLOC(CompoundStore);
|
193
|
+
|
194
|
+
cmpd->store = store;
|
195
|
+
cmpd->name = name;
|
196
|
+
cmpd->entries = h_new_str(&efree, &efree);
|
197
|
+
is = cmpd->stream = store->open_input(store, cmpd->name);
|
198
|
+
|
199
|
+
// read the directory and init files
|
200
|
+
int count = is_read_vint(is);
|
201
|
+
FileEntry *entry = NULL;
|
202
|
+
int i, offset;
|
203
|
+
char *fname;
|
204
|
+
for (i = 0; i < count; i++) {
|
205
|
+
offset = is_read_long(is);
|
206
|
+
fname = is_read_string(is);
|
207
|
+
|
208
|
+
if (entry != NULL) {
|
209
|
+
// set length of the previous entry
|
210
|
+
entry->length = offset - entry->offset;
|
211
|
+
}
|
212
|
+
|
213
|
+
entry = ALLOC(FileEntry);
|
214
|
+
entry->offset = offset;
|
215
|
+
h_set(cmpd->entries, fname, entry);
|
186
216
|
}
|
187
217
|
|
188
|
-
|
189
|
-
entry
|
190
|
-
|
191
|
-
|
218
|
+
// set the length of the final entry
|
219
|
+
if (entry != NULL)
|
220
|
+
entry->length = is_length(is) - entry->offset;
|
221
|
+
XCATCHALL
|
222
|
+
free(new_store);
|
223
|
+
free(cmpd);
|
224
|
+
if (is) is_close(is);
|
225
|
+
XENDTRY
|
192
226
|
|
193
|
-
// set the length of the final entry
|
194
|
-
if (entry != NULL)
|
195
|
-
entry->length = is_length(is) - entry->offset;
|
196
227
|
|
197
228
|
new_store->dir.cmpd = cmpd;
|
198
229
|
new_store->touch = &cmpd_touch;
|
@@ -248,9 +279,9 @@ CompoundWriter *open_cw(Store *store, char *name)
|
|
248
279
|
|
249
280
|
void cw_add_file(CompoundWriter *cw, char *id)
|
250
281
|
{
|
251
|
-
if (cw->merged)
|
282
|
+
if (cw->merged) RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
|
252
283
|
if (hs_add(cw->ids, id) != HASH_KEY_DOES_NOT_EXIST)
|
253
|
-
|
284
|
+
RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
|
254
285
|
|
255
286
|
hs_add(cw->ids, id);
|
256
287
|
ary_append(cw->file_entries, wfe_create(id));
|
@@ -258,78 +289,83 @@ void cw_add_file(CompoundWriter *cw, char *id)
|
|
258
289
|
|
259
290
|
void cw_copy_file(CompoundWriter *cw, WFileEntry *src, OutStream *os)
|
260
291
|
{
|
261
|
-
|
262
292
|
int start_ptr = os_pos(os);
|
293
|
+
int remainder, length, len;
|
263
294
|
|
264
295
|
InStream *is = cw->store->open_input(cw->store, src->name);
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
296
|
+
|
297
|
+
TRY
|
298
|
+
remainder = length = is_length(is);
|
299
|
+
|
300
|
+
uchar buffer[BUFFER_SIZE];
|
301
|
+
while (remainder > 0) {
|
302
|
+
len = MIN(remainder, BUFFER_SIZE);
|
303
|
+
is_read_bytes(is, buffer, 0, len);
|
304
|
+
os_write_bytes(os, buffer, len);
|
305
|
+
remainder -= len;
|
306
|
+
}
|
275
307
|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
"(id:%s, length: %ld, buffer size: %ld\n", remainder,
|
280
|
-
src->name, length, BUFFER_SIZE);
|
308
|
+
// Verify that remainder is 0
|
309
|
+
if (remainder != 0)
|
310
|
+
RAISE(IO_ERROR, REMAINDER_ERROR_MSG);
|
281
311
|
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
" does not match the original file length ", diff, length);
|
312
|
+
// Verify that the output length diff is equal to original file
|
313
|
+
int end_ptr = os_pos(os);
|
314
|
+
int diff = end_ptr - start_ptr;
|
315
|
+
if (diff != length)
|
316
|
+
RAISE(IO_ERROR, FILE_OFFSET_MSG);
|
288
317
|
|
289
|
-
|
318
|
+
XFINALLY
|
319
|
+
is_close(is);
|
320
|
+
XENDTRY
|
290
321
|
}
|
291
322
|
|
292
323
|
void cw_close(CompoundWriter *cw)
|
293
324
|
{
|
294
|
-
if (cw->merged)
|
325
|
+
if (cw->merged) RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
|
295
326
|
if (cw->ids->size <= 0)
|
296
|
-
|
327
|
+
RAISE(STATE_ERROR, NO_FILES_TO_MERGE_MSG);
|
297
328
|
|
298
329
|
cw->merged = true;
|
299
330
|
|
300
|
-
OutStream *os =
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
331
|
+
OutStream * volatile os = NULL;
|
332
|
+
TRY
|
333
|
+
os = cw->store->create_output(cw->store, cw->name);
|
334
|
+
os_write_vint(os, cw->file_entries->size);
|
335
|
+
|
336
|
+
/* Write the directory with all offsets at 0.
|
337
|
+
* Remember the positions of directory entries so that we can adjust the
|
338
|
+
* offsets later */
|
339
|
+
int i;
|
340
|
+
WFileEntry *wfe;
|
341
|
+
for (i = 0; i < cw->file_entries->size; i++) {
|
342
|
+
wfe = (WFileEntry *)cw->file_entries->elems[i];
|
343
|
+
wfe->dir_offset = os_pos(os);
|
344
|
+
os_write_long(os, 0); // for now
|
345
|
+
os_write_string(os, wfe->name);
|
346
|
+
}
|
314
347
|
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
348
|
+
/* Open the files and copy their data into the stream. Remember the
|
349
|
+
* locations of each file's data section. */
|
350
|
+
for (i = 0; i < cw->file_entries->size; i++) {
|
351
|
+
wfe = (WFileEntry *)cw->file_entries->elems[i];
|
352
|
+
wfe->data_offset = os_pos(os);
|
353
|
+
cw_copy_file(cw, wfe, os);
|
354
|
+
}
|
322
355
|
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
356
|
+
/* Write the data offsets into the directory of the compound stream */
|
357
|
+
for (i = 0; i < cw->file_entries->size; i++) {
|
358
|
+
wfe = (WFileEntry *)cw->file_entries->elems[i];
|
359
|
+
os_seek(os, wfe->dir_offset);
|
360
|
+
os_write_long(os, wfe->data_offset);
|
361
|
+
}
|
329
362
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
363
|
+
XFINALLY
|
364
|
+
if (os) os_close(os);
|
365
|
+
hs_destroy(cw->ids);
|
366
|
+
ary_destroy(cw->file_entries);
|
367
|
+
free(cw);
|
368
|
+
break;
|
369
|
+
XENDTRY
|
334
370
|
}
|
335
371
|
|