ferret 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (187) hide show
  1. data/Rakefile +23 -5
  2. data/TODO +2 -1
  3. data/ext/analysis.c +838 -177
  4. data/ext/analysis.h +55 -7
  5. data/ext/api.c +69 -0
  6. data/ext/api.h +27 -0
  7. data/ext/array.c +8 -5
  8. data/ext/compound_io.c +132 -96
  9. data/ext/document.c +58 -28
  10. data/ext/except.c +59 -0
  11. data/ext/except.h +88 -0
  12. data/ext/ferret.c +47 -3
  13. data/ext/ferret.h +3 -0
  14. data/ext/field.c +15 -9
  15. data/ext/filter.c +1 -1
  16. data/ext/fs_store.c +215 -34
  17. data/ext/global.c +72 -3
  18. data/ext/global.h +4 -3
  19. data/ext/hash.c +44 -3
  20. data/ext/hash.h +9 -0
  21. data/ext/header.h +58 -0
  22. data/ext/inc/except.h +88 -0
  23. data/ext/inc/lang.h +23 -13
  24. data/ext/ind.c +16 -10
  25. data/ext/index.h +2 -22
  26. data/ext/index_io.c +3 -11
  27. data/ext/index_rw.c +245 -193
  28. data/ext/lang.h +23 -13
  29. data/ext/libstemmer.c +92 -0
  30. data/ext/libstemmer.h +79 -0
  31. data/ext/modules.h +162 -0
  32. data/ext/q_boolean.c +34 -21
  33. data/ext/q_const_score.c +6 -12
  34. data/ext/q_filtered_query.c +206 -0
  35. data/ext/q_fuzzy.c +18 -15
  36. data/ext/q_match_all.c +3 -7
  37. data/ext/q_multi_phrase.c +10 -14
  38. data/ext/q_parser.c +29 -2
  39. data/ext/q_phrase.c +14 -21
  40. data/ext/q_prefix.c +15 -12
  41. data/ext/q_range.c +30 -28
  42. data/ext/q_span.c +13 -21
  43. data/ext/q_term.c +17 -26
  44. data/ext/r_analysis.c +693 -21
  45. data/ext/r_doc.c +11 -12
  46. data/ext/r_index_io.c +4 -1
  47. data/ext/r_qparser.c +21 -2
  48. data/ext/r_search.c +285 -18
  49. data/ext/ram_store.c +5 -2
  50. data/ext/search.c +11 -17
  51. data/ext/search.h +21 -45
  52. data/ext/similarity.h +67 -0
  53. data/ext/sort.c +30 -25
  54. data/ext/stem_ISO_8859_1_danish.c +338 -0
  55. data/ext/stem_ISO_8859_1_danish.h +16 -0
  56. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  57. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  58. data/ext/stem_ISO_8859_1_english.c +1156 -0
  59. data/ext/stem_ISO_8859_1_english.h +16 -0
  60. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  61. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  62. data/ext/stem_ISO_8859_1_french.c +1276 -0
  63. data/ext/stem_ISO_8859_1_french.h +16 -0
  64. data/ext/stem_ISO_8859_1_german.c +512 -0
  65. data/ext/stem_ISO_8859_1_german.h +16 -0
  66. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  67. data/ext/stem_ISO_8859_1_italian.h +16 -0
  68. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  69. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  70. data/ext/stem_ISO_8859_1_porter.c +776 -0
  71. data/ext/stem_ISO_8859_1_porter.h +16 -0
  72. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  73. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  74. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  75. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  76. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  77. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  78. data/ext/stem_KOI8_R_russian.c +701 -0
  79. data/ext/stem_KOI8_R_russian.h +16 -0
  80. data/ext/stem_UTF_8_danish.c +344 -0
  81. data/ext/stem_UTF_8_danish.h +16 -0
  82. data/ext/stem_UTF_8_dutch.c +653 -0
  83. data/ext/stem_UTF_8_dutch.h +16 -0
  84. data/ext/stem_UTF_8_english.c +1176 -0
  85. data/ext/stem_UTF_8_english.h +16 -0
  86. data/ext/stem_UTF_8_finnish.c +808 -0
  87. data/ext/stem_UTF_8_finnish.h +16 -0
  88. data/ext/stem_UTF_8_french.c +1296 -0
  89. data/ext/stem_UTF_8_french.h +16 -0
  90. data/ext/stem_UTF_8_german.c +526 -0
  91. data/ext/stem_UTF_8_german.h +16 -0
  92. data/ext/stem_UTF_8_italian.c +1113 -0
  93. data/ext/stem_UTF_8_italian.h +16 -0
  94. data/ext/stem_UTF_8_norwegian.c +302 -0
  95. data/ext/stem_UTF_8_norwegian.h +16 -0
  96. data/ext/stem_UTF_8_porter.c +794 -0
  97. data/ext/stem_UTF_8_porter.h +16 -0
  98. data/ext/stem_UTF_8_portuguese.c +1055 -0
  99. data/ext/stem_UTF_8_portuguese.h +16 -0
  100. data/ext/stem_UTF_8_russian.c +709 -0
  101. data/ext/stem_UTF_8_russian.h +16 -0
  102. data/ext/stem_UTF_8_spanish.c +1137 -0
  103. data/ext/stem_UTF_8_spanish.h +16 -0
  104. data/ext/stem_UTF_8_swedish.c +313 -0
  105. data/ext/stem_UTF_8_swedish.h +16 -0
  106. data/ext/stopwords.c +325 -0
  107. data/ext/store.c +34 -2
  108. data/ext/tags +2953 -0
  109. data/ext/term.c +21 -15
  110. data/ext/termdocs.c +5 -3
  111. data/ext/utilities.c +446 -0
  112. data/ext/vector.c +27 -13
  113. data/lib/ferret/document/document.rb +1 -1
  114. data/lib/ferret/index/index.rb +44 -6
  115. data/lib/ferret/query_parser/query_parser.tab.rb +7 -3
  116. data/lib/rferret.rb +2 -1
  117. data/test/test_helper.rb +2 -2
  118. data/test/unit/analysis/ctc_analyzer.rb +401 -0
  119. data/test/unit/analysis/ctc_tokenstream.rb +423 -0
  120. data/test/unit/analysis/{tc_letter_tokenizer.rb → rtc_letter_tokenizer.rb} +0 -0
  121. data/test/unit/analysis/{tc_lower_case_filter.rb → rtc_lower_case_filter.rb} +0 -0
  122. data/test/unit/analysis/{tc_lower_case_tokenizer.rb → rtc_lower_case_tokenizer.rb} +0 -0
  123. data/test/unit/analysis/{tc_per_field_analyzer_wrapper.rb → rtc_per_field_analyzer_wrapper.rb} +0 -0
  124. data/test/unit/analysis/{tc_porter_stem_filter.rb → rtc_porter_stem_filter.rb} +0 -0
  125. data/test/unit/analysis/{tc_standard_analyzer.rb → rtc_standard_analyzer.rb} +0 -0
  126. data/test/unit/analysis/{tc_standard_tokenizer.rb → rtc_standard_tokenizer.rb} +0 -0
  127. data/test/unit/analysis/{tc_stop_analyzer.rb → rtc_stop_analyzer.rb} +0 -0
  128. data/test/unit/analysis/{tc_stop_filter.rb → rtc_stop_filter.rb} +0 -0
  129. data/test/unit/analysis/{tc_white_space_analyzer.rb → rtc_white_space_analyzer.rb} +0 -0
  130. data/test/unit/analysis/{tc_white_space_tokenizer.rb → rtc_white_space_tokenizer.rb} +0 -0
  131. data/test/unit/analysis/{tc_word_list_loader.rb → rtc_word_list_loader.rb} +0 -0
  132. data/test/unit/analysis/tc_analyzer.rb +1 -2
  133. data/test/unit/analysis/{c_token.rb → tc_token.rb} +0 -0
  134. data/test/unit/document/rtc_field.rb +28 -0
  135. data/test/unit/document/{c_document.rb → tc_document.rb} +0 -0
  136. data/test/unit/document/tc_field.rb +82 -12
  137. data/test/unit/index/{tc_compound_file_io.rb → rtc_compound_file_io.rb} +0 -0
  138. data/test/unit/index/{tc_field_infos.rb → rtc_field_infos.rb} +0 -0
  139. data/test/unit/index/{tc_fields_io.rb → rtc_fields_io.rb} +0 -0
  140. data/test/unit/index/{tc_multiple_term_doc_pos_enum.rb → rtc_multiple_term_doc_pos_enum.rb} +0 -0
  141. data/test/unit/index/{tc_segment_infos.rb → rtc_segment_infos.rb} +0 -0
  142. data/test/unit/index/{tc_segment_term_docs.rb → rtc_segment_term_docs.rb} +0 -0
  143. data/test/unit/index/{tc_segment_term_enum.rb → rtc_segment_term_enum.rb} +0 -0
  144. data/test/unit/index/{tc_segment_term_vector.rb → rtc_segment_term_vector.rb} +0 -0
  145. data/test/unit/index/{tc_term_buffer.rb → rtc_term_buffer.rb} +0 -0
  146. data/test/unit/index/{tc_term_info.rb → rtc_term_info.rb} +0 -0
  147. data/test/unit/index/{tc_term_infos_io.rb → rtc_term_infos_io.rb} +0 -0
  148. data/test/unit/index/{tc_term_vectors_io.rb → rtc_term_vectors_io.rb} +0 -0
  149. data/test/unit/index/{c_index.rb → tc_index.rb} +26 -6
  150. data/test/unit/index/{c_index_reader.rb → tc_index_reader.rb} +0 -0
  151. data/test/unit/index/{c_index_writer.rb → tc_index_writer.rb} +0 -0
  152. data/test/unit/index/{c_term.rb → tc_term.rb} +0 -0
  153. data/test/unit/index/{c_term_voi.rb → tc_term_voi.rb} +0 -0
  154. data/test/unit/query_parser/{c_query_parser.rb → rtc_query_parser.rb} +14 -14
  155. data/test/unit/query_parser/tc_query_parser.rb +24 -16
  156. data/test/unit/search/{tc_similarity.rb → rtc_similarity.rb} +0 -0
  157. data/test/unit/search/rtc_sort_field.rb +14 -0
  158. data/test/unit/search/{c_filter.rb → tc_filter.rb} +11 -11
  159. data/test/unit/search/{c_fuzzy_query.rb → tc_fuzzy_query.rb} +0 -0
  160. data/test/unit/search/{c_index_searcher.rb → tc_index_searcher.rb} +0 -0
  161. data/test/unit/search/{c_search_and_sort.rb → tc_search_and_sort.rb} +0 -0
  162. data/test/unit/search/{c_sort.rb → tc_sort.rb} +0 -0
  163. data/test/unit/search/tc_sort_field.rb +20 -7
  164. data/test/unit/search/{c_spans.rb → tc_spans.rb} +0 -0
  165. data/test/unit/store/rtc_fs_store.rb +62 -0
  166. data/test/unit/store/rtc_ram_store.rb +15 -0
  167. data/test/unit/store/rtm_store.rb +150 -0
  168. data/test/unit/store/rtm_store_lock.rb +2 -0
  169. data/test/unit/store/tc_fs_store.rb +54 -40
  170. data/test/unit/store/tc_ram_store.rb +20 -0
  171. data/test/unit/store/tm_store.rb +30 -146
  172. data/test/unit/store/tm_store_lock.rb +66 -0
  173. data/test/unit/utils/{tc_bit_vector.rb → rtc_bit_vector.rb} +0 -0
  174. data/test/unit/utils/{tc_date_tools.rb → rtc_date_tools.rb} +0 -0
  175. data/test/unit/utils/{tc_number_tools.rb → rtc_number_tools.rb} +0 -0
  176. data/test/unit/utils/{tc_parameter.rb → rtc_parameter.rb} +0 -0
  177. data/test/unit/utils/{tc_priority_queue.rb → rtc_priority_queue.rb} +0 -0
  178. data/test/unit/utils/{tc_string_helper.rb → rtc_string_helper.rb} +0 -0
  179. data/test/unit/utils/{tc_thread.rb → rtc_thread.rb} +0 -0
  180. data/test/unit/utils/{tc_weak_key_hash.rb → rtc_weak_key_hash.rb} +0 -0
  181. metadata +360 -289
  182. data/test/unit/document/c_field.rb +0 -98
  183. data/test/unit/search/c_sort_field.rb +0 -27
  184. data/test/unit/store/c_fs_store.rb +0 -76
  185. data/test/unit/store/c_ram_store.rb +0 -35
  186. data/test/unit/store/m_store.rb +0 -34
  187. data/test/unit/store/m_store_lock.rb +0 -68
@@ -30,27 +30,58 @@ int tk_cmp(Token *tk1, Token *tk2);
30
30
  *
31
31
  ****************************************************************************/
32
32
 
33
+
33
34
  typedef struct TokenStream TokenStream;
34
35
  struct TokenStream {
35
36
  void *data;
36
37
  char *text;
37
- int pos;
38
+ char *t; /* ptr used to scan text */
38
39
  Token *token;
39
40
  Token *(*next)(TokenStream *ts);
40
41
  void (*reset)(TokenStream *ts, char *text);
42
+ void (*clone_i)(TokenStream *orig_ts, TokenStream *new_ts);
41
43
  void (*destroy)(void *p);
42
- TokenStream *sub_ts; // used by filters
44
+ TokenStream *sub_ts; /* used by filters */
45
+ bool destroy_sub : 1;
43
46
  };
44
47
 
45
48
  #define ts_next(mts) mts->next(mts)
46
49
  #define ts_destroy(mts) mts->destroy(mts)
47
50
 
48
51
  TokenStream *whitespace_tokenizer_create();
52
+ TokenStream *mb_whitespace_tokenizer_create(bool lowercase);
53
+
49
54
  TokenStream *letter_tokenizer_create();
55
+ TokenStream *mb_letter_tokenizer_create(bool lowercase);
56
+
50
57
  TokenStream *standard_tokenizer_create();
58
+ TokenStream *mb_standard_tokenizer_create();
59
+
51
60
  TokenStream *lowercase_filter_create(TokenStream *ts);
52
- TokenStream *stop_filter_create_with_words(TokenStream *ts, char **words, int len);
61
+ TokenStream *mb_lowercase_filter_create(TokenStream *ts);
62
+
63
+ extern const char *ENGLISH_STOP_WORDS[];
64
+ extern const char *FULL_ENGLISH_STOP_WORDS[];
65
+ extern const char *EXTENDED_ENGLISH_STOP_WORDS[];
66
+ extern const char *FULL_FRENCH_STOP_WORDS[];
67
+ extern const char *FULL_SPANISH_STOP_WORDS[];
68
+ extern const char *FULL_PORTUGUESE_STOP_WORDS[];
69
+ extern const char *FULL_ITALIAN_STOP_WORDS[];
70
+ extern const char *FULL_GERMAN_STOP_WORDS[];
71
+ extern const char *FULL_DUTCH_STOP_WORDS[];
72
+ extern const char *FULL_SWEDISH_STOP_WORDS[];
73
+ extern const char *FULL_NORWEGIAN_STOP_WORDS[];
74
+ extern const char *FULL_DANISH_STOP_WORDS[];
75
+ extern const char *FULL_RUSSIAN_STOP_WORDS[];
76
+ extern const char *FULL_FINNISH_STOP_WORDS[];
77
+
78
+ TokenStream *stop_filter_create_with_words_len(TokenStream *ts,
79
+ const char **words, int len);
80
+ TokenStream *stop_filter_create_with_words(TokenStream *ts, const char **words);
53
81
  TokenStream *stop_filter_create(TokenStream *ts);
82
+ TokenStream *stem_filter_create(TokenStream *ts, const char * algorithm,
83
+ const char * charenc);
84
+ TokenStream *ts_clone(TokenStream *orig_ts);
54
85
 
55
86
  /****************************************************************************
56
87
  *
@@ -67,10 +98,27 @@ typedef struct Analyzer {
67
98
 
68
99
  #define a_destroy(ma) ma->destroy(ma)
69
100
  #define a_get_ts(ma, field, text) ma->get_ts(ma, field, text)
101
+ #define a_get_new_ts(ma, field, text) ts_clone(ma->get_ts(ma, field, text))
102
+
103
+ Analyzer *whitespace_analyzer_create(bool lowercase);
104
+ Analyzer *mb_whitespace_analyzer_create(bool lowercase);
105
+
106
+ Analyzer *letter_analyzer_create(bool lowercase);
107
+ Analyzer *mb_letter_analyzer_create(bool lowercase);
108
+
109
+ Analyzer *standard_analyzer_create(bool lowercase);
110
+ Analyzer *mb_standard_analyzer_create(bool lowercase);
111
+
112
+ Analyzer *standard_analyzer_create_with_words(
113
+ const char **words, bool lowercase);
114
+ Analyzer *standard_analyzer_create_with_words_len(
115
+ const char **words, int len, bool lowercase);
116
+ Analyzer *mb_standard_analyzer_create_with_words(
117
+ const char **words, bool lowercase);
118
+ Analyzer *mb_standard_analyzer_create_with_words_len(
119
+ const char **words, int len, bool lowercase);
70
120
 
71
- Analyzer *whitespace_analyzer_create();
72
- Analyzer *letter_analyzer_create();
73
- Analyzer *standard_analyzer_create();
74
- Analyzer *standard_analyzer_create_with_words(char **words, int len);
121
+ Analyzer *per_field_analyzer_create(Analyzer *def, bool destroy_subs);
122
+ void pfa_add_field(Analyzer *self, char *field, Analyzer *analyzer);
75
123
 
76
124
  #endif
@@ -0,0 +1,69 @@
1
+
2
+ #include <stdlib.h> /* for calloc, free */
3
+ #include "header.h"
4
+
5
+ extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
6
+ {
7
+ struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
8
+ if (z == NULL) return NULL;
9
+ z->p = create_s();
10
+ if (z->p == NULL) goto error;
11
+ if (S_size)
12
+ {
13
+ int i;
14
+ z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
15
+ if (z->S == NULL) goto error;
16
+
17
+ for (i = 0; i < S_size; i++)
18
+ {
19
+ z->S[i] = create_s();
20
+ if (z->S[i] == NULL) goto error;
21
+ }
22
+ z->S_size = S_size;
23
+ }
24
+
25
+ if (I_size)
26
+ {
27
+ z->I = (int *) calloc(I_size, sizeof(int));
28
+ if (z->I == NULL) goto error;
29
+ z->I_size = I_size;
30
+ }
31
+
32
+ if (B_size)
33
+ {
34
+ z->B = (symbol *) calloc(B_size, sizeof(symbol));
35
+ if (z->B == NULL) goto error;
36
+ z->B_size = B_size;
37
+ }
38
+
39
+ return z;
40
+ error:
41
+ SN_close_env(z);
42
+ return NULL;
43
+ }
44
+
45
+ extern void SN_close_env(struct SN_env * z)
46
+ {
47
+ if (z == NULL) return;
48
+ if (z->S_size)
49
+ {
50
+ int i;
51
+ for (i = 0; i < z->S_size; i++)
52
+ {
53
+ lose_s(z->S[i]);
54
+ }
55
+ free(z->S);
56
+ }
57
+ if (z->I_size) free(z->I);
58
+ if (z->B_size) free(z->B);
59
+ if (z->p) lose_s(z->p);
60
+ free(z);
61
+ }
62
+
63
+ extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
64
+ {
65
+ int err = replace_s(z, 0, z->l, size, s, NULL);
66
+ z->c = 0;
67
+ return err;
68
+ }
69
+
@@ -0,0 +1,27 @@
1
+
2
+ typedef unsigned char symbol;
3
+
4
+ /* Or replace 'char' above with 'short' for 16 bit characters.
5
+
6
+ More precisely, replace 'char' with whatever type guarantees the
7
+ character width you need. Note however that sizeof(symbol) should divide
8
+ HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
9
+ there is an alignment problem. In the unlikely event of a problem here,
10
+ consult Martin Porter.
11
+
12
+ */
13
+
14
+ struct SN_env {
15
+ symbol * p;
16
+ int c; int a; int l; int lb; int bra; int ket;
17
+ int S_size; int I_size; int B_size;
18
+ symbol * * S;
19
+ int * I;
20
+ symbol * B;
21
+ };
22
+
23
+ extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
24
+ extern void SN_close_env(struct SN_env * z);
25
+
26
+ extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
27
+
@@ -22,9 +22,11 @@ void ary_destroy(void *p)
22
22
  {
23
23
  Array *ary = (Array *)p;
24
24
  int i;
25
- for (i = 0; i < ary->size; i++) {
26
- if (ary->free_elem != NULL && ary->elems[i] != NULL)
27
- ary->free_elem(ary->elems[i]);
25
+ if (ary->free_elem) {
26
+ for (i = 0; i < ary->size; i++) {
27
+ if (ary->elems[i])
28
+ ary->free_elem(ary->elems[i]);
29
+ }
28
30
  }
29
31
  free(ary->elems);
30
32
  free(ary);
@@ -41,7 +43,7 @@ void ary_set(Array *ary, int index, void *value)
41
43
  if (index >= ary->size)
42
44
  ary->size = index + 1;
43
45
 
44
- if (ary->free_elem != NULL && ary->elems[index] != NULL)
46
+ if (ary->free_elem && ary->elems[index])
45
47
  ary->free_elem(ary->elems[index]);
46
48
 
47
49
  ary->elems[index] = value;
@@ -63,7 +65,8 @@ void ary_delete(Array *ary, int index)
63
65
  {
64
66
  if (index >= ary->size)
65
67
  return;
66
- ary->free_elem(ary->elems[index]);
68
+ if (ary->free_elem && ary->elems[index])
69
+ ary->free_elem(ary->elems[index]);
67
70
  ary->elems[index] = NULL;
68
71
  if (index == ary->size - 1)
69
72
  ary->size--;
@@ -1,4 +1,12 @@
1
- #include "index.h"
1
+ #include "index.h"
2
+ static char * const ALREADY_CLOSED_MSG = "Already closed";
3
+ static char * const STREAM_CLOSED_MSG = "Stream closed";
4
+ static char * const MISSING_FILE_MSG = "No sub-file found";
5
+ static char * const ALREADY_MERGED_MSG = "Already merged";
6
+ static char * const REMAINDER_ERROR_MSG = "Non-zero remainder length after copying";
7
+ static char * const FILE_OFFSET_MSG = "Difference in the output file offsets"
8
+ " does not match the original file length";
9
+ static char * const NO_FILES_TO_MERGE_MSG = "No Files to merge into the compound file";
2
10
 
3
11
  /****************************************************************************
4
12
  *
@@ -24,15 +32,21 @@ int cmpd_exists(Store *store, char *filename)
24
32
  return false;
25
33
  }
26
34
 
35
+ /**
36
+ * @throws UNSUPPORTED_ERROR
37
+ */
27
38
  int cmpd_remove(Store *store, char *filename)
28
39
  {
29
- eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
40
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
30
41
  return 0;
31
42
  }
32
43
 
44
+ /**
45
+ * @throws UNSUPPORTED_ERROR
46
+ */
33
47
  int cmpd_rename(Store *store, char *from, char *to)
34
48
  {
35
- eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
49
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
36
50
  return 0;
37
51
  }
38
52
 
@@ -41,9 +55,12 @@ int cmpd_count(Store *store)
41
55
  return store->dir.cmpd->entries->used;
42
56
  }
43
57
 
58
+ /**
59
+ * @throws UNSUPPORTED_ERROR
60
+ */
44
61
  void cmpd_clear(Store *store)
45
62
  {
46
- eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
63
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
47
64
  }
48
65
 
49
66
  void cmpd_close(Store *store)
@@ -51,7 +68,7 @@ void cmpd_close(Store *store)
51
68
  mutex_lock(&store->mutex);
52
69
  CompoundStore *cmpd = store->dir.cmpd;
53
70
  if (cmpd->stream == NULL)
54
- eprintf(IO_ERROR, "Already closed");
71
+ RAISE(IO_ERROR, ALREADY_CLOSED_MSG);
55
72
 
56
73
  h_destroy(cmpd->entries);
57
74
 
@@ -92,12 +109,15 @@ int cmpdi_length_internal(InStream *is)
92
109
  return (is->d.cis->length);
93
110
  }
94
111
 
112
+ /*
113
+ * raises: EOF_ERROR
114
+ */
95
115
  void cmpdi_read_internal(InStream *is, uchar *b, int offset, int len)
96
116
  {
97
117
  CompoundInStream *cis = is->d.cis;
98
118
  int start = is_pos(is);
99
119
  if ((start + len) > cis->length)
100
- eprintf(EOF_ERROR, "read past EOF");
120
+ RAISE(EOF_ERROR, EOF_ERROR_MSG);
101
121
  is_seek(cis->sub, cis->offset + start);
102
122
  is_read_bytes(cis->sub, b, offset, len);
103
123
  }
@@ -129,13 +149,13 @@ InStream *cmpd_open_input(Store *store, const char *filename)
129
149
  mutex_lock(&store->mutex);
130
150
  if (cmpd->stream == NULL) {
131
151
  mutex_unlock(&store->mutex);
132
- eprintf(IO_ERROR, "Stream closed");
152
+ RAISE(IO_ERROR, STREAM_CLOSED_MSG);
133
153
  }
134
154
 
135
155
  FileEntry *entry = (FileEntry *)h_get(cmpd->entries, filename);
136
156
  if (entry == NULL) {
137
157
  mutex_unlock(&store->mutex);
138
- eprintf(IO_ERROR, "No sub-file with id <%s> found", filename);
158
+ RAISE(IO_ERROR, MISSING_FILE_MSG);
139
159
  }
140
160
 
141
161
  is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
@@ -146,53 +166,64 @@ InStream *cmpd_open_input(Store *store, const char *filename)
146
166
 
147
167
  OutStream *cmpd_create_output(Store *store, const char *filename)
148
168
  {
149
- eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
169
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
150
170
  return NULL;
151
171
  }
152
172
 
153
173
  Lock *cmpd_open_lock(Store *store, char *lockname)
154
174
  {
155
- eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
175
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
156
176
  return NULL;
157
177
  }
158
178
 
159
179
  void cmpd_close_lock(Lock *lock)
160
180
  {
161
- eprintf(UNSUPPORTED_ERROR, "Unsupported operation");
181
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
162
182
  }
163
183
 
164
184
  Store *open_cmpd_store(Store *store, const char *name)
165
185
  {
166
- CompoundStore *cmpd = ALLOC(CompoundStore);
167
- Store *new_store = store_create();
168
-
169
- cmpd->store = store;
170
- cmpd->name = name;
171
- cmpd->entries = h_new_str(&efree, &efree);
172
- InStream *is = cmpd->stream = store->open_input(store, cmpd->name);
173
-
174
- // read the directory and init files
175
- int count = is_read_vint(is);
176
- FileEntry *entry = NULL;
177
- int i, offset;
178
- char *fname;
179
- for (i = 0; i < count; i++) {
180
- offset = is_read_long(is);
181
- fname = is_read_string(is);
182
-
183
- if (entry != NULL) {
184
- // set length of the previous entry
185
- entry->length = offset - entry->offset;
186
+ Store * volatile new_store = NULL;
187
+ CompoundStore * volatile cmpd = NULL;
188
+ InStream * volatile is = NULL;
189
+
190
+ TRY
191
+ new_store = store_create();
192
+ cmpd = ALLOC(CompoundStore);
193
+
194
+ cmpd->store = store;
195
+ cmpd->name = name;
196
+ cmpd->entries = h_new_str(&efree, &efree);
197
+ is = cmpd->stream = store->open_input(store, cmpd->name);
198
+
199
+ // read the directory and init files
200
+ int count = is_read_vint(is);
201
+ FileEntry *entry = NULL;
202
+ int i, offset;
203
+ char *fname;
204
+ for (i = 0; i < count; i++) {
205
+ offset = is_read_long(is);
206
+ fname = is_read_string(is);
207
+
208
+ if (entry != NULL) {
209
+ // set length of the previous entry
210
+ entry->length = offset - entry->offset;
211
+ }
212
+
213
+ entry = ALLOC(FileEntry);
214
+ entry->offset = offset;
215
+ h_set(cmpd->entries, fname, entry);
186
216
  }
187
217
 
188
- entry = ALLOC(FileEntry);
189
- entry->offset = offset;
190
- h_set(cmpd->entries, fname, entry);
191
- }
218
+ // set the length of the final entry
219
+ if (entry != NULL)
220
+ entry->length = is_length(is) - entry->offset;
221
+ XCATCHALL
222
+ free(new_store);
223
+ free(cmpd);
224
+ if (is) is_close(is);
225
+ XENDTRY
192
226
 
193
- // set the length of the final entry
194
- if (entry != NULL)
195
- entry->length = is_length(is) - entry->offset;
196
227
 
197
228
  new_store->dir.cmpd = cmpd;
198
229
  new_store->touch = &cmpd_touch;
@@ -248,9 +279,9 @@ CompoundWriter *open_cw(Store *store, char *name)
248
279
 
249
280
  void cw_add_file(CompoundWriter *cw, char *id)
250
281
  {
251
- if (cw->merged) eprintf(STATE_ERROR, "Already merged");
282
+ if (cw->merged) RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
252
283
  if (hs_add(cw->ids, id) != HASH_KEY_DOES_NOT_EXIST)
253
- eprintf(STATE_ERROR, "Already merged");
284
+ RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
254
285
 
255
286
  hs_add(cw->ids, id);
256
287
  ary_append(cw->file_entries, wfe_create(id));
@@ -258,78 +289,83 @@ void cw_add_file(CompoundWriter *cw, char *id)
258
289
 
259
290
  void cw_copy_file(CompoundWriter *cw, WFileEntry *src, OutStream *os)
260
291
  {
261
-
262
292
  int start_ptr = os_pos(os);
293
+ int remainder, length, len;
263
294
 
264
295
  InStream *is = cw->store->open_input(cw->store, src->name);
265
- int remainder, length, len;
266
- remainder = length = is_length(is);
267
-
268
- uchar buffer[BUFFER_SIZE];
269
- while (remainder > 0) {
270
- len = MIN(remainder, BUFFER_SIZE);
271
- is_read_bytes(is, buffer, 0, len);
272
- os_write_bytes(os, buffer, len);
273
- remainder -= len;
274
- }
296
+
297
+ TRY
298
+ remainder = length = is_length(is);
299
+
300
+ uchar buffer[BUFFER_SIZE];
301
+ while (remainder > 0) {
302
+ len = MIN(remainder, BUFFER_SIZE);
303
+ is_read_bytes(is, buffer, 0, len);
304
+ os_write_bytes(os, buffer, len);
305
+ remainder -= len;
306
+ }
275
307
 
276
- // Verify that remainder is 0
277
- if (remainder != 0)
278
- eprintf(IO_ERROR, "Non-zero remainder length after copying: %ld "
279
- "(id:%s, length: %ld, buffer size: %ld\n", remainder,
280
- src->name, length, BUFFER_SIZE);
308
+ // Verify that remainder is 0
309
+ if (remainder != 0)
310
+ RAISE(IO_ERROR, REMAINDER_ERROR_MSG);
281
311
 
282
- // Verify that the output length diff is equal to original file
283
- int end_ptr = os_pos(os);
284
- int diff = end_ptr - start_ptr;
285
- if (diff != length)
286
- eprintf(IO_ERROR, "Difference in the output file offsets %ld "
287
- " does not match the original file length ", diff, length);
312
+ // Verify that the output length diff is equal to original file
313
+ int end_ptr = os_pos(os);
314
+ int diff = end_ptr - start_ptr;
315
+ if (diff != length)
316
+ RAISE(IO_ERROR, FILE_OFFSET_MSG);
288
317
 
289
- is_close(is);
318
+ XFINALLY
319
+ is_close(is);
320
+ XENDTRY
290
321
  }
291
322
 
292
323
  void cw_close(CompoundWriter *cw)
293
324
  {
294
- if (cw->merged) eprintf(STATE_ERROR, "Already merged");
325
+ if (cw->merged) RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
295
326
  if (cw->ids->size <= 0)
296
- eprintf(STATE_ERROR, "No Files to merge into the compound file");
327
+ RAISE(STATE_ERROR, NO_FILES_TO_MERGE_MSG);
297
328
 
298
329
  cw->merged = true;
299
330
 
300
- OutStream *os = cw->store->create_output(cw->store, cw->name);
301
- os_write_vint(os, cw->file_entries->size);
302
-
303
- /* Write the directory with all offsets at 0.
304
- * Remember the positions of directory entries so that we can adjust the
305
- * offsets later */
306
- int i;
307
- WFileEntry *wfe;
308
- for (i = 0; i < cw->file_entries->size; i++) {
309
- wfe = (WFileEntry *)cw->file_entries->elems[i];
310
- wfe->dir_offset = os_pos(os);
311
- os_write_long(os, 0); // for now
312
- os_write_string(os, wfe->name);
313
- }
331
+ OutStream * volatile os = NULL;
332
+ TRY
333
+ os = cw->store->create_output(cw->store, cw->name);
334
+ os_write_vint(os, cw->file_entries->size);
335
+
336
+ /* Write the directory with all offsets at 0.
337
+ * Remember the positions of directory entries so that we can adjust the
338
+ * offsets later */
339
+ int i;
340
+ WFileEntry *wfe;
341
+ for (i = 0; i < cw->file_entries->size; i++) {
342
+ wfe = (WFileEntry *)cw->file_entries->elems[i];
343
+ wfe->dir_offset = os_pos(os);
344
+ os_write_long(os, 0); // for now
345
+ os_write_string(os, wfe->name);
346
+ }
314
347
 
315
- /* Open the files and copy their data into the stream. Remember the
316
- * locations of each file's data section. */
317
- for (i = 0; i < cw->file_entries->size; i++) {
318
- wfe = (WFileEntry *)cw->file_entries->elems[i];
319
- wfe->data_offset = os_pos(os);
320
- cw_copy_file(cw, wfe, os);
321
- }
348
+ /* Open the files and copy their data into the stream. Remember the
349
+ * locations of each file's data section. */
350
+ for (i = 0; i < cw->file_entries->size; i++) {
351
+ wfe = (WFileEntry *)cw->file_entries->elems[i];
352
+ wfe->data_offset = os_pos(os);
353
+ cw_copy_file(cw, wfe, os);
354
+ }
322
355
 
323
- /* Write the data offsets into the directory of the compound stream */
324
- for (i = 0; i < cw->file_entries->size; i++) {
325
- wfe = (WFileEntry *)cw->file_entries->elems[i];
326
- os_seek(os, wfe->dir_offset);
327
- os_write_long(os, wfe->data_offset);
328
- }
356
+ /* Write the data offsets into the directory of the compound stream */
357
+ for (i = 0; i < cw->file_entries->size; i++) {
358
+ wfe = (WFileEntry *)cw->file_entries->elems[i];
359
+ os_seek(os, wfe->dir_offset);
360
+ os_write_long(os, wfe->data_offset);
361
+ }
329
362
 
330
- os_close(os);
331
- hs_destroy(cw->ids);
332
- ary_destroy(cw->file_entries);
333
- free(cw);
363
+ XFINALLY
364
+ if (os) os_close(os);
365
+ hs_destroy(cw->ids);
366
+ ary_destroy(cw->file_entries);
367
+ free(cw);
368
+ break;
369
+ XENDTRY
334
370
  }
335
371