ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/store.c CHANGED
@@ -1,86 +1,584 @@
1
1
  #include "store.h"
2
+ #include <string.h>
2
3
 
3
- static char * const COULD_NOT_OBTAIN_LOCK = "Could not obtain lock";
4
+ #define VINT_MAX_LEN 10
5
+ #define VINT_END BUFFER_SIZE - VINT_MAX_LEN
4
6
 
5
- /**
6
- * Call the function +func+ with the +lock+ locked. The argument +arg+ will be
7
- * passed to +func+. If you need to pass more than one argument you should use
8
- * a struct. When the function is finished, release the lock.
9
- *
10
- * @param lock lock to be locked while func is called
11
- * @param func function to call with the lock locked
12
- * @param arg argument to pass to the function
13
- * @throws IO_ERROR if the lock is already locked
14
- * @see with_lock_name
7
+ /*
8
+ * TODO: add try finally
15
9
  */
16
10
  void with_lock(Lock *lock, void (*func)(void *arg), void *arg)
17
11
  {
18
- if (!lock->obtain(lock)) {
19
- RAISE(IO_ERROR, COULD_NOT_OBTAIN_LOCK);
20
- }
21
- func(arg);
22
- lock->release(lock);
12
+ if (!lock->obtain(lock)) {
13
+ RAISE(IO_ERROR, "couldn't obtain lock \"%s\"", lock->name);
14
+ }
15
+ func(arg);
16
+ lock->release(lock);
23
17
  }
24
18
 
25
- /**
26
- * Create a lock in the +store+ with the name +lock_name+. Call the function
27
- * +func+ with the lock locked. The argument +arg+ will be passed to +func+.
28
- * If you need to pass more than one argument you should use a struct. When
29
- * the function is finished, release and destroy the lock.
30
- *
31
- * @param store store to open the lock in
32
- * @param lock_name name of the lock to open
33
- * @param func function to call with the lock locked
34
- * @param arg argument to pass to the function
35
- * @throws IO_ERROR if the lock is already locked
36
- * @see with_lock
19
+ /*
20
+ * TODO: add try finally
37
21
  */
38
22
  void with_lock_name(Store *store, char *lock_name,
39
- void (*func)(void *arg), void *arg)
23
+ void (*func)(void *arg), void *arg)
40
24
  {
41
- Lock *lock = store->open_lock(store, lock_name);
42
- if (!lock->obtain(lock)) {
43
- RAISE(IO_ERROR, COULD_NOT_OBTAIN_LOCK);
44
- }
45
- func(arg);
46
- lock->release(lock);
47
- store->close_lock(lock);
25
+ Lock *lock = store->open_lock(store, lock_name);
26
+ if (!lock->obtain(lock)) {
27
+ RAISE(LOCK_ERROR, "couldn't obtain lock \"%s\"", lock->name);
28
+ }
29
+ func(arg);
30
+ lock->release(lock);
31
+ store->close_lock(lock);
48
32
  }
49
33
 
50
- /**
51
- * Remove a reference to the store. If the reference count gets to zero free
52
- * all resources used by the store.
53
- *
54
- * @param store the store to be dereferenced
55
- */
56
34
  void store_deref(Store *store)
57
35
  {
58
- mutex_lock(&store->mutex);
59
- if (--store->ref_cnt == 0) {
60
- store->close_i(store);
61
- } else {
62
- mutex_unlock(&store->mutex);
63
- }
36
+ mutex_lock(&store->mutex_i);
37
+ if (--store->ref_cnt == 0) {
38
+ store->close_i(store);
39
+ }
40
+ else {
41
+ mutex_unlock(&store->mutex_i);
42
+ }
64
43
  }
65
44
 
66
45
  /**
67
46
  * Create a store struct initializing the mutex.
68
47
  */
69
- Store *store_create()
48
+ Store *store_new()
70
49
  {
71
- Store *store = ALLOC(Store);
72
- store->ref_cnt = 1;
73
- mutex_init(&store->mutex, NULL);
74
- mutex_init(&store->ext_mutex, NULL);
75
- return store;
50
+ Store *store = ALLOC(Store);
51
+ store->ref_cnt = 1;
52
+ mutex_init(&store->mutex_i, NULL);
53
+ mutex_init(&store->mutex, NULL);
54
+ return store;
76
55
  }
77
56
 
78
57
  /**
79
- * Destroy the store, destroying the mutex.
58
+ * Destroy the store freeing allocated resources
59
+ *
60
+ * @param store the store struct to free
80
61
  */
81
62
  void store_destroy(Store *store)
82
63
  {
83
- mutex_destroy(&store->mutex);
84
- mutex_destroy(&store->ext_mutex);
85
- free(store);
64
+ mutex_destroy(&store->mutex_i);
65
+ mutex_destroy(&store->mutex);
66
+ free(store);
67
+ }
68
+
69
+ /**
70
+ * Create a newly allocated and initialized OutStream object
71
+ *
72
+ * @return a newly allocated and initialized OutStream object
73
+ */
74
+ OutStream *os_new()
75
+ {
76
+ OutStream *os = ALLOC(OutStream);
77
+ os->buf.start = 0;
78
+ os->buf.pos = 0;
79
+ os->buf.len = 0;
80
+ return os;
81
+ }
82
+
83
+ /**
84
+ * Flush the countents of the OutStream's buffers
85
+ *
86
+ * @param the OutStream to flush
87
+ */
88
+ inline void os_flush(OutStream *os)
89
+ {
90
+ os->m->flush_i(os, os->buf.buf, os->buf.pos);
91
+ os->buf.start += os->buf.pos;
92
+ os->buf.pos = 0;
93
+ }
94
+
95
+ void os_close(OutStream *os)
96
+ {
97
+ os_flush(os);
98
+ os->m->close_i(os);
99
+ free(os);
100
+ }
101
+
102
+ off_t os_pos(OutStream *os)
103
+ {
104
+ return os->buf.start + os->buf.pos;
105
+ }
106
+
107
+ void os_seek(OutStream *os, off_t new_pos)
108
+ {
109
+ os_flush(os);
110
+ os->buf.start = new_pos;
111
+ os->m->seek_i(os, new_pos);
112
+ }
113
+
114
+ /**
115
+ * Unsafe alternative to os_write_byte. Only use this method if you know there
116
+ * is no chance of buffer overflow.
117
+ */
118
+ #define write_byte(os, b) os->buf.buf[os->buf.pos++] = (uchar)b
119
+
120
+ /**
121
+ * Write a single byte +b+ to the OutStream +os+
122
+ *
123
+ * @param os the OutStream to write to
124
+ * @param b the byte to write
125
+ * @raise IO_ERROR if there is an IO error writing to the filesystem
126
+ */
127
+ inline void os_write_byte(OutStream *os, uchar b)
128
+ {
129
+ if (os->buf.pos >= BUFFER_SIZE) {
130
+ os_flush(os);
131
+ }
132
+ write_byte(os, b);
133
+ }
134
+
135
+ void os_write_bytes(OutStream *os, uchar *buf, int len)
136
+ {
137
+ if (os->buf.pos > 0) { /* flush buffer */
138
+ os_flush(os);
139
+ }
140
+
141
+ if (len < BUFFER_SIZE) {
142
+ os->m->flush_i(os, buf, len);
143
+ os->buf.start += len;
144
+ }
145
+ else {
146
+ int pos = 0;
147
+ int size;
148
+ while (pos < len) {
149
+ if (len - pos < BUFFER_SIZE) {
150
+ size = len - pos;
151
+ }
152
+ else {
153
+ size = BUFFER_SIZE;
154
+ }
155
+ os->m->flush_i(os, buf + pos, size);
156
+ pos += size;
157
+ os->buf.start += size;
158
+ }
159
+ }
160
+ }
161
+
162
+ /**
163
+ * Create a newly allocated and initialized InStream
164
+ *
165
+ * @return a newly allocated and initialized InStream
166
+ */
167
+ InStream *is_new()
168
+ {
169
+ InStream *is = ALLOC(InStream);
170
+ is->buf.start = 0;
171
+ is->buf.pos = 0;
172
+ is->buf.len = 0;
173
+ is->ref_cnt_ptr = ALLOC_AND_ZERO(int);
174
+ return is;
175
+ }
176
+
177
+ /**
178
+ * Refill the InStream's buffer from the store source (filesystem or memory).
179
+ *
180
+ * @param is the InStream to refill
181
+ * @raise IO_ERROR if there is a error reading from the filesystem
182
+ * @raise EOF_ERROR if there is an attempt to read past the end of the file
183
+ */
184
+ void is_refill(InStream *is)
185
+ {
186
+ off_t start = is->buf.start + is->buf.pos;
187
+ off_t last = start + BUFFER_SIZE;
188
+ off_t flen = is->m->length_i(is);
189
+
190
+ if (last > flen) { /* don't read past EOF */
191
+ last = flen;
192
+ }
193
+
194
+ is->buf.len = last - start;
195
+ if (is->buf.len <= 0) {
196
+ RAISE(EOF_ERROR, "current pos = %"F_OFF_T_PFX"d, "
197
+ "file length = %"F_OFF_T_PFX"d", start, flen);
198
+ }
199
+
200
+ is->m->read_i(is, is->buf.buf, is->buf.len);
201
+
202
+ is->buf.start = start;
203
+ is->buf.pos = 0;
204
+ }
205
+
206
+ /**
207
+ * Unsafe alternative to is_read_byte. Only use this method when you know
208
+ * there is no chance that you will read past the end of the InStream's
209
+ * buffer.
210
+ */
211
+ #define read_byte(is) is->buf.buf[is->buf.pos++]
212
+
213
+ /**
214
+ * Read a singly byte (unsigned char) from the InStream +is+.
215
+ *
216
+ * @param is the Instream to read from
217
+ * @return a single unsigned char read from the InStream +is+
218
+ * @raise IO_ERROR if there is a error reading from the filesystem
219
+ * @raise EOF_ERROR if there is an attempt to read past the end of the file
220
+ */
221
+ inline uchar is_read_byte(InStream *is)
222
+ {
223
+ if (is->buf.pos >= is->buf.len) {
224
+ is_refill(is);
225
+ }
226
+
227
+ return read_byte(is);
228
+ }
229
+
230
+ off_t is_pos(InStream *is)
231
+ {
232
+ return is->buf.start + is->buf.pos;
233
+ }
234
+
235
+ uchar *is_read_bytes(InStream *is, uchar *buf, int len)
236
+ {
237
+ int i;
238
+ off_t start;
239
+
240
+ if ((is->buf.pos + len) < is->buf.len) {
241
+ for (i = 0; i < len; i++) {
242
+ buf[i] = read_byte(is);
243
+ }
244
+ }
245
+ else { /* read all-at-once */
246
+ start = is_pos(is);
247
+ is->m->seek_i(is, start);
248
+ is->m->read_i(is, buf, len);
249
+
250
+ is->buf.start = start + len; /* adjust stream variables */
251
+ is->buf.pos = 0;
252
+ is->buf.len = 0; /* trigger refill on read */
253
+ }
254
+ return buf;
255
+ }
256
+
257
+ void is_seek(InStream *is, off_t pos)
258
+ {
259
+ if (pos >= is->buf.start && pos < (is->buf.start + is->buf.len)) {
260
+ is->buf.pos = pos - is->buf.start; /* seek within buffer */
261
+ }
262
+ else {
263
+ is->buf.start = pos;
264
+ is->buf.pos = 0;
265
+ is->buf.len = 0; /* trigger refill() on read() */
266
+ is->m->seek_i(is, pos);
267
+ }
268
+ }
269
+
270
+ void is_close(InStream *is)
271
+ {
272
+ if (--(*(is->ref_cnt_ptr)) < 0) {
273
+ is->m->close_i(is);
274
+ free(is->ref_cnt_ptr);
275
+ }
276
+ free(is);
277
+ }
278
+
279
+ InStream *is_clone(InStream *is)
280
+ {
281
+ InStream *new_index_i = ALLOC(InStream);
282
+ memcpy(new_index_i, is, sizeof(InStream));
283
+ (*(new_index_i->ref_cnt_ptr))++;
284
+ return new_index_i;
285
+ }
286
+
287
+ f_i32 is_read_i32(InStream *is)
288
+ {
289
+ return ((f_i32)is_read_byte(is) << 24) |
290
+ ((f_i32)is_read_byte(is) << 16) |
291
+ ((f_i32)is_read_byte(is) << 8) |
292
+ ((f_i32)is_read_byte(is));
86
293
  }
294
+
295
+ f_i64 is_read_i64(InStream *is)
296
+ {
297
+ return ((f_i64)is_read_byte(is) << 56) |
298
+ ((f_i64)is_read_byte(is) << 48) |
299
+ ((f_i64)is_read_byte(is) << 40) |
300
+ ((f_i64)is_read_byte(is) << 32) |
301
+ ((f_i64)is_read_byte(is) << 24) |
302
+ ((f_i64)is_read_byte(is) << 16) |
303
+ ((f_i64)is_read_byte(is) << 8) |
304
+ ((f_i64)is_read_byte(is));
305
+ }
306
+
307
+ f_u32 is_read_u32(InStream *is)
308
+ {
309
+ return ((f_u32)is_read_byte(is) << 24) |
310
+ ((f_u32)is_read_byte(is) << 16) |
311
+ ((f_u32)is_read_byte(is) << 8) |
312
+ ((f_u32)is_read_byte(is));
313
+ }
314
+
315
+ f_u64 is_read_u64(InStream *is)
316
+ {
317
+ return ((f_u64)is_read_byte(is) << 56) |
318
+ ((f_u64)is_read_byte(is) << 48) |
319
+ ((f_u64)is_read_byte(is) << 40) |
320
+ ((f_u64)is_read_byte(is) << 32) |
321
+ ((f_u64)is_read_byte(is) << 24) |
322
+ ((f_u64)is_read_byte(is) << 16) |
323
+ ((f_u64)is_read_byte(is) << 8) |
324
+ ((f_u64)is_read_byte(is));
325
+ }
326
+
327
+ /* optimized to use unchecked read_byte if there is definitely space */
328
+ inline unsigned int is_read_vint(InStream *is)
329
+ {
330
+ register unsigned int res, b;
331
+ register int shift = 7;
332
+
333
+ if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
334
+ b = is_read_byte(is);
335
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
336
+
337
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
338
+ b = is_read_byte(is);
339
+ res |= (b & 0x7F) << shift;
340
+ shift += 7;
341
+ }
342
+ }
343
+ else { /* unchecked optimization */
344
+ b = read_byte(is);
345
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
346
+
347
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
348
+ b = read_byte(is);
349
+ res |= (b & 0x7F) << shift;
350
+ shift += 7;
351
+ }
352
+ }
353
+
354
+ return res;
355
+ }
356
+
357
+ /* optimized to use unchecked read_byte if there is definitely space */
358
+ inline off_t is_read_voff_t(InStream *is)
359
+ {
360
+ register off_t res, b;
361
+ register int shift = 7;
362
+
363
+ if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
364
+ b = is_read_byte(is);
365
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
366
+
367
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
368
+ b = is_read_byte(is);
369
+ res |= (b & 0x7F) << shift;
370
+ shift += 7;
371
+ }
372
+ }
373
+ else { /* unchecked optimization */
374
+ b = read_byte(is);
375
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
376
+
377
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
378
+ b = read_byte(is);
379
+ res |= (b & 0x7F) << shift;
380
+ shift += 7;
381
+ }
382
+ }
383
+
384
+ return res;
385
+ }
386
+
387
+ inline void is_skip_vints(InStream *is, register int cnt)
388
+ {
389
+ for (; cnt > 0; cnt--) {
390
+ while ((is_read_byte(is) & 0x80) != 0) {
391
+ }
392
+ }
393
+ }
394
+
395
+ inline void is_read_chars(InStream *is, char *buffer,
396
+ int off, int len)
397
+ {
398
+ int end, i;
399
+
400
+ end = off + len;
401
+
402
+ for (i = off; i < end; i++) {
403
+ buffer[i] = is_read_byte(is);
404
+ }
405
+ }
406
+
407
+ char *is_read_string(InStream *is)
408
+ {
409
+ register int length = (int) is_read_vint(is);
410
+ char *str = ALLOC_N(char, length + 1);
411
+ str[length] = '\0';
412
+
413
+ if (is->buf.pos > (is->buf.len - length)) {
414
+ register int i;
415
+ for (i = 0; i < length; i++) {
416
+ str[i] = is_read_byte(is);
417
+ }
418
+ }
419
+ else { /* unchecked optimization */
420
+ memcpy(str, is->buf.buf + is->buf.pos, length);
421
+ is->buf.pos += length;
422
+ }
423
+
424
+ return str;
425
+ }
426
+
427
+ void os_write_i32(OutStream *os, f_i32 num)
428
+ {
429
+ os_write_byte(os, (uchar)((num >> 24) & 0xFF));
430
+ os_write_byte(os, (uchar)((num >> 16) & 0xFF));
431
+ os_write_byte(os, (uchar)((num >> 8) & 0xFF));
432
+ os_write_byte(os, (uchar)(num & 0xFF));
433
+ }
434
+
435
+ void os_write_i64(OutStream *os, f_i64 num)
436
+ {
437
+ os_write_byte(os, (uchar)((num >> 56) & 0xFF));
438
+ os_write_byte(os, (uchar)((num >> 48) & 0xFF));
439
+ os_write_byte(os, (uchar)((num >> 40) & 0xFF));
440
+ os_write_byte(os, (uchar)((num >> 32) & 0xFF));
441
+ os_write_byte(os, (uchar)((num >> 24) & 0xFF));
442
+ os_write_byte(os, (uchar)((num >> 16) & 0xFF));
443
+ os_write_byte(os, (uchar)((num >> 8) & 0xFF));
444
+ os_write_byte(os, (uchar)(num & 0xFF));
445
+ }
446
+
447
+ void os_write_u32(OutStream *os, f_u32 num)
448
+ {
449
+ os_write_byte(os, (uchar)((num >> 24) & 0xFF));
450
+ os_write_byte(os, (uchar)((num >> 16) & 0xFF));
451
+ os_write_byte(os, (uchar)((num >> 8) & 0xFF));
452
+ os_write_byte(os, (uchar)(num & 0xFF));
453
+ }
454
+
455
+ void os_write_u64(OutStream *os, f_u64 num)
456
+ {
457
+ os_write_byte(os, (uchar)((num >> 56) & 0xFF));
458
+ os_write_byte(os, (uchar)((num >> 48) & 0xFF));
459
+ os_write_byte(os, (uchar)((num >> 40) & 0xFF));
460
+ os_write_byte(os, (uchar)((num >> 32) & 0xFF));
461
+ os_write_byte(os, (uchar)((num >> 24) & 0xFF));
462
+ os_write_byte(os, (uchar)((num >> 16) & 0xFF));
463
+ os_write_byte(os, (uchar)((num >> 8) & 0xFF));
464
+ os_write_byte(os, (uchar)(num & 0xFF));
465
+ }
466
+
467
+ /* optimized to use an unchecked write if there is space */
468
+ inline void os_write_vint(OutStream *os, register unsigned int num)
469
+ {
470
+ if (os->buf.pos > VINT_END) {
471
+ while (num > 127) {
472
+ os_write_byte(os, (uchar)((num & 0x7f) | 0x80));
473
+ num >>= 7;
474
+ }
475
+ os_write_byte(os, (uchar)(num));
476
+ }
477
+ else {
478
+ while (num > 127) {
479
+ write_byte(os, (uchar)((num & 0x7f) | 0x80));
480
+ num >>= 7;
481
+ }
482
+ write_byte(os, (uchar)(num));
483
+ }
484
+ }
485
+
486
+ /* optimized to use an unchecked write if there is space */
487
+ inline void os_write_voff_t(OutStream *os, register off_t num)
488
+ {
489
+ if (os->buf.pos > VINT_END) {
490
+ while (num > 127) {
491
+ os_write_byte(os, (uchar)((num & 0x7f) | 0x80));
492
+ num >>= 7;
493
+ }
494
+ os_write_byte(os, (uchar)num);
495
+ }
496
+ else {
497
+ while (num > 127) {
498
+ write_byte(os, (uchar)((num & 0x7f) | 0x80));
499
+ num >>= 7;
500
+ }
501
+ write_byte(os, (uchar)num);
502
+ }
503
+ }
504
+
505
+ void os_write_string(OutStream *os, char *str)
506
+ {
507
+ int len = (int)strlen(str);
508
+ os_write_vint(os, len);
509
+
510
+ os_write_bytes(os, (uchar *)str, len);
511
+ }
512
+
513
+ /**
514
+ * Determine if the filename is the name of a lock file. Return 1 if it is, 0
515
+ * otherwise.
516
+ *
517
+ * @param filename the name of the file to check
518
+ * @return 1 (true) if the file is a lock file, 0 (false) otherwise
519
+ */
520
+ int file_is_lock(char *filename)
521
+ {
522
+ int start = (int) strlen(filename) - 4;
523
+ return ((start > 0) && (strcmp(LOCK_EXT, &filename[start]) == 0));
524
+ }
525
+
526
+ void is2os_copy_bytes(InStream *is, OutStream *os, int cnt)
527
+ {
528
+ int len;
529
+ uchar buf[BUFFER_SIZE];
530
+
531
+ for (; cnt > 0; cnt -= BUFFER_SIZE) {
532
+ len = ((cnt > BUFFER_SIZE) ? BUFFER_SIZE : cnt);
533
+ is_read_bytes(is, buf, len);
534
+ os_write_bytes(os, buf, len);
535
+ }
536
+ }
537
+
538
+ void is2os_copy_vints(InStream *is, OutStream *os, int cnt)
539
+ {
540
+ uchar b;
541
+ for (; cnt > 0; cnt--) {
542
+ while (((b = is_read_byte(is)) & 0x80) != 0) {
543
+ os_write_byte(os, b);
544
+ }
545
+ os_write_byte(os, b);
546
+ }
547
+ }
548
+
549
+ /**
550
+ * Test argument used to test the store->each function
551
+ */
552
+ struct FileNameConcatArg
553
+ {
554
+ char *p;
555
+ char *end;
556
+ };
557
+
558
+ /**
559
+ * Test function used to test store->each function
560
+ */
561
+ static void concat_filenames(char *fname, void *arg)
562
+ {
563
+ struct FileNameConcatArg *fnca = (struct FileNameConcatArg *)arg;
564
+ if (fnca->p + strlen(fname) + 2 < fnca->end) {
565
+ strcpy(fnca->p, fname);
566
+ fnca->p += strlen(fname);
567
+ *(fnca->p++) = ',';
568
+ *(fnca->p++) = ' ';
569
+ }
570
+ }
571
+
572
+ char *store_to_s(Store *store, char *buf, int buf_size)
573
+ {
574
+ struct FileNameConcatArg fnca;
575
+
576
+ fnca.p = buf;
577
+ fnca.end = buf + buf_size;
578
+ store->each(store, &concat_filenames, &fnca);
579
+ if (fnca.p > buf + 2) {
580
+ fnca.p[-2] = '\0';
581
+ }
582
+ return buf;
583
+ }
584
+