ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/store.c CHANGED
@@ -1,86 +1,584 @@
1
1
  #include "store.h"
2
+ #include <string.h>
2
3
 
3
- static char * const COULD_NOT_OBTAIN_LOCK = "Could not obtain lock";
4
+ #define VINT_MAX_LEN 10
5
+ #define VINT_END BUFFER_SIZE - VINT_MAX_LEN
4
6
 
5
- /**
6
- * Call the function +func+ with the +lock+ locked. The argument +arg+ will be
7
- * passed to +func+. If you need to pass more than one argument you should use
8
- * a struct. When the function is finished, release the lock.
9
- *
10
- * @param lock lock to be locked while func is called
11
- * @param func function to call with the lock locked
12
- * @param arg argument to pass to the function
13
- * @throws IO_ERROR if the lock is already locked
14
- * @see with_lock_name
7
+ /*
8
+ * TODO: add try finally
15
9
  */
16
10
  void with_lock(Lock *lock, void (*func)(void *arg), void *arg)
17
11
  {
18
- if (!lock->obtain(lock)) {
19
- RAISE(IO_ERROR, COULD_NOT_OBTAIN_LOCK);
20
- }
21
- func(arg);
22
- lock->release(lock);
12
+ if (!lock->obtain(lock)) {
13
+ RAISE(IO_ERROR, "couldn't obtain lock \"%s\"", lock->name);
14
+ }
15
+ func(arg);
16
+ lock->release(lock);
23
17
  }
24
18
 
25
- /**
26
- * Create a lock in the +store+ with the name +lock_name+. Call the function
27
- * +func+ with the lock locked. The argument +arg+ will be passed to +func+.
28
- * If you need to pass more than one argument you should use a struct. When
29
- * the function is finished, release and destroy the lock.
30
- *
31
- * @param store store to open the lock in
32
- * @param lock_name name of the lock to open
33
- * @param func function to call with the lock locked
34
- * @param arg argument to pass to the function
35
- * @throws IO_ERROR if the lock is already locked
36
- * @see with_lock
19
+ /*
20
+ * TODO: add try finally
37
21
  */
38
22
  void with_lock_name(Store *store, char *lock_name,
39
- void (*func)(void *arg), void *arg)
23
+ void (*func)(void *arg), void *arg)
40
24
  {
41
- Lock *lock = store->open_lock(store, lock_name);
42
- if (!lock->obtain(lock)) {
43
- RAISE(IO_ERROR, COULD_NOT_OBTAIN_LOCK);
44
- }
45
- func(arg);
46
- lock->release(lock);
47
- store->close_lock(lock);
25
+ Lock *lock = store->open_lock(store, lock_name);
26
+ if (!lock->obtain(lock)) {
27
+ RAISE(LOCK_ERROR, "couldn't obtain lock \"%s\"", lock->name);
28
+ }
29
+ func(arg);
30
+ lock->release(lock);
31
+ store->close_lock(lock);
48
32
  }
49
33
 
50
- /**
51
- * Remove a reference to the store. If the reference count gets to zero free
52
- * all resources used by the store.
53
- *
54
- * @param store the store to be dereferenced
55
- */
56
34
  void store_deref(Store *store)
57
35
  {
58
- mutex_lock(&store->mutex);
59
- if (--store->ref_cnt == 0) {
60
- store->close_i(store);
61
- } else {
62
- mutex_unlock(&store->mutex);
63
- }
36
+ mutex_lock(&store->mutex_i);
37
+ if (--store->ref_cnt == 0) {
38
+ store->close_i(store);
39
+ }
40
+ else {
41
+ mutex_unlock(&store->mutex_i);
42
+ }
64
43
  }
65
44
 
66
45
  /**
67
46
  * Create a store struct initializing the mutex.
68
47
  */
69
- Store *store_create()
48
+ Store *store_new()
70
49
  {
71
- Store *store = ALLOC(Store);
72
- store->ref_cnt = 1;
73
- mutex_init(&store->mutex, NULL);
74
- mutex_init(&store->ext_mutex, NULL);
75
- return store;
50
+ Store *store = ALLOC(Store);
51
+ store->ref_cnt = 1;
52
+ mutex_init(&store->mutex_i, NULL);
53
+ mutex_init(&store->mutex, NULL);
54
+ return store;
76
55
  }
77
56
 
78
57
  /**
79
- * Destroy the store, destroying the mutex.
58
+ * Destroy the store freeing allocated resources
59
+ *
60
+ * @param store the store struct to free
80
61
  */
81
62
  void store_destroy(Store *store)
82
63
  {
83
- mutex_destroy(&store->mutex);
84
- mutex_destroy(&store->ext_mutex);
85
- free(store);
64
+ mutex_destroy(&store->mutex_i);
65
+ mutex_destroy(&store->mutex);
66
+ free(store);
67
+ }
68
+
69
+ /**
70
+ * Create a newly allocated and initialized OutStream object
71
+ *
72
+ * @return a newly allocated and initialized OutStream object
73
+ */
74
+ OutStream *os_new()
75
+ {
76
+ OutStream *os = ALLOC(OutStream);
77
+ os->buf.start = 0;
78
+ os->buf.pos = 0;
79
+ os->buf.len = 0;
80
+ return os;
81
+ }
82
+
83
+ /**
84
+ * Flush the countents of the OutStream's buffers
85
+ *
86
+ * @param the OutStream to flush
87
+ */
88
+ inline void os_flush(OutStream *os)
89
+ {
90
+ os->m->flush_i(os, os->buf.buf, os->buf.pos);
91
+ os->buf.start += os->buf.pos;
92
+ os->buf.pos = 0;
93
+ }
94
+
95
+ void os_close(OutStream *os)
96
+ {
97
+ os_flush(os);
98
+ os->m->close_i(os);
99
+ free(os);
100
+ }
101
+
102
+ off_t os_pos(OutStream *os)
103
+ {
104
+ return os->buf.start + os->buf.pos;
105
+ }
106
+
107
+ void os_seek(OutStream *os, off_t new_pos)
108
+ {
109
+ os_flush(os);
110
+ os->buf.start = new_pos;
111
+ os->m->seek_i(os, new_pos);
112
+ }
113
+
114
+ /**
115
+ * Unsafe alternative to os_write_byte. Only use this method if you know there
116
+ * is no chance of buffer overflow.
117
+ */
118
+ #define write_byte(os, b) os->buf.buf[os->buf.pos++] = (uchar)b
119
+
120
+ /**
121
+ * Write a single byte +b+ to the OutStream +os+
122
+ *
123
+ * @param os the OutStream to write to
124
+ * @param b the byte to write
125
+ * @raise IO_ERROR if there is an IO error writing to the filesystem
126
+ */
127
+ inline void os_write_byte(OutStream *os, uchar b)
128
+ {
129
+ if (os->buf.pos >= BUFFER_SIZE) {
130
+ os_flush(os);
131
+ }
132
+ write_byte(os, b);
133
+ }
134
+
135
+ void os_write_bytes(OutStream *os, uchar *buf, int len)
136
+ {
137
+ if (os->buf.pos > 0) { /* flush buffer */
138
+ os_flush(os);
139
+ }
140
+
141
+ if (len < BUFFER_SIZE) {
142
+ os->m->flush_i(os, buf, len);
143
+ os->buf.start += len;
144
+ }
145
+ else {
146
+ int pos = 0;
147
+ int size;
148
+ while (pos < len) {
149
+ if (len - pos < BUFFER_SIZE) {
150
+ size = len - pos;
151
+ }
152
+ else {
153
+ size = BUFFER_SIZE;
154
+ }
155
+ os->m->flush_i(os, buf + pos, size);
156
+ pos += size;
157
+ os->buf.start += size;
158
+ }
159
+ }
160
+ }
161
+
162
+ /**
163
+ * Create a newly allocated and initialized InStream
164
+ *
165
+ * @return a newly allocated and initialized InStream
166
+ */
167
+ InStream *is_new()
168
+ {
169
+ InStream *is = ALLOC(InStream);
170
+ is->buf.start = 0;
171
+ is->buf.pos = 0;
172
+ is->buf.len = 0;
173
+ is->ref_cnt_ptr = ALLOC_AND_ZERO(int);
174
+ return is;
175
+ }
176
+
177
+ /**
178
+ * Refill the InStream's buffer from the store source (filesystem or memory).
179
+ *
180
+ * @param is the InStream to refill
181
+ * @raise IO_ERROR if there is a error reading from the filesystem
182
+ * @raise EOF_ERROR if there is an attempt to read past the end of the file
183
+ */
184
+ void is_refill(InStream *is)
185
+ {
186
+ off_t start = is->buf.start + is->buf.pos;
187
+ off_t last = start + BUFFER_SIZE;
188
+ off_t flen = is->m->length_i(is);
189
+
190
+ if (last > flen) { /* don't read past EOF */
191
+ last = flen;
192
+ }
193
+
194
+ is->buf.len = last - start;
195
+ if (is->buf.len <= 0) {
196
+ RAISE(EOF_ERROR, "current pos = %"F_OFF_T_PFX"d, "
197
+ "file length = %"F_OFF_T_PFX"d", start, flen);
198
+ }
199
+
200
+ is->m->read_i(is, is->buf.buf, is->buf.len);
201
+
202
+ is->buf.start = start;
203
+ is->buf.pos = 0;
204
+ }
205
+
206
+ /**
207
+ * Unsafe alternative to is_read_byte. Only use this method when you know
208
+ * there is no chance that you will read past the end of the InStream's
209
+ * buffer.
210
+ */
211
+ #define read_byte(is) is->buf.buf[is->buf.pos++]
212
+
213
+ /**
214
+ * Read a singly byte (unsigned char) from the InStream +is+.
215
+ *
216
+ * @param is the Instream to read from
217
+ * @return a single unsigned char read from the InStream +is+
218
+ * @raise IO_ERROR if there is a error reading from the filesystem
219
+ * @raise EOF_ERROR if there is an attempt to read past the end of the file
220
+ */
221
+ inline uchar is_read_byte(InStream *is)
222
+ {
223
+ if (is->buf.pos >= is->buf.len) {
224
+ is_refill(is);
225
+ }
226
+
227
+ return read_byte(is);
228
+ }
229
+
230
+ off_t is_pos(InStream *is)
231
+ {
232
+ return is->buf.start + is->buf.pos;
233
+ }
234
+
235
+ uchar *is_read_bytes(InStream *is, uchar *buf, int len)
236
+ {
237
+ int i;
238
+ off_t start;
239
+
240
+ if ((is->buf.pos + len) < is->buf.len) {
241
+ for (i = 0; i < len; i++) {
242
+ buf[i] = read_byte(is);
243
+ }
244
+ }
245
+ else { /* read all-at-once */
246
+ start = is_pos(is);
247
+ is->m->seek_i(is, start);
248
+ is->m->read_i(is, buf, len);
249
+
250
+ is->buf.start = start + len; /* adjust stream variables */
251
+ is->buf.pos = 0;
252
+ is->buf.len = 0; /* trigger refill on read */
253
+ }
254
+ return buf;
255
+ }
256
+
257
+ void is_seek(InStream *is, off_t pos)
258
+ {
259
+ if (pos >= is->buf.start && pos < (is->buf.start + is->buf.len)) {
260
+ is->buf.pos = pos - is->buf.start; /* seek within buffer */
261
+ }
262
+ else {
263
+ is->buf.start = pos;
264
+ is->buf.pos = 0;
265
+ is->buf.len = 0; /* trigger refill() on read() */
266
+ is->m->seek_i(is, pos);
267
+ }
268
+ }
269
+
270
+ void is_close(InStream *is)
271
+ {
272
+ if (--(*(is->ref_cnt_ptr)) < 0) {
273
+ is->m->close_i(is);
274
+ free(is->ref_cnt_ptr);
275
+ }
276
+ free(is);
277
+ }
278
+
279
+ InStream *is_clone(InStream *is)
280
+ {
281
+ InStream *new_index_i = ALLOC(InStream);
282
+ memcpy(new_index_i, is, sizeof(InStream));
283
+ (*(new_index_i->ref_cnt_ptr))++;
284
+ return new_index_i;
285
+ }
286
+
287
+ f_i32 is_read_i32(InStream *is)
288
+ {
289
+ return ((f_i32)is_read_byte(is) << 24) |
290
+ ((f_i32)is_read_byte(is) << 16) |
291
+ ((f_i32)is_read_byte(is) << 8) |
292
+ ((f_i32)is_read_byte(is));
86
293
  }
294
+
295
+ f_i64 is_read_i64(InStream *is)
296
+ {
297
+ return ((f_i64)is_read_byte(is) << 56) |
298
+ ((f_i64)is_read_byte(is) << 48) |
299
+ ((f_i64)is_read_byte(is) << 40) |
300
+ ((f_i64)is_read_byte(is) << 32) |
301
+ ((f_i64)is_read_byte(is) << 24) |
302
+ ((f_i64)is_read_byte(is) << 16) |
303
+ ((f_i64)is_read_byte(is) << 8) |
304
+ ((f_i64)is_read_byte(is));
305
+ }
306
+
307
+ f_u32 is_read_u32(InStream *is)
308
+ {
309
+ return ((f_u32)is_read_byte(is) << 24) |
310
+ ((f_u32)is_read_byte(is) << 16) |
311
+ ((f_u32)is_read_byte(is) << 8) |
312
+ ((f_u32)is_read_byte(is));
313
+ }
314
+
315
+ f_u64 is_read_u64(InStream *is)
316
+ {
317
+ return ((f_u64)is_read_byte(is) << 56) |
318
+ ((f_u64)is_read_byte(is) << 48) |
319
+ ((f_u64)is_read_byte(is) << 40) |
320
+ ((f_u64)is_read_byte(is) << 32) |
321
+ ((f_u64)is_read_byte(is) << 24) |
322
+ ((f_u64)is_read_byte(is) << 16) |
323
+ ((f_u64)is_read_byte(is) << 8) |
324
+ ((f_u64)is_read_byte(is));
325
+ }
326
+
327
+ /* optimized to use unchecked read_byte if there is definitely space */
328
+ inline unsigned int is_read_vint(InStream *is)
329
+ {
330
+ register unsigned int res, b;
331
+ register int shift = 7;
332
+
333
+ if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
334
+ b = is_read_byte(is);
335
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
336
+
337
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
338
+ b = is_read_byte(is);
339
+ res |= (b & 0x7F) << shift;
340
+ shift += 7;
341
+ }
342
+ }
343
+ else { /* unchecked optimization */
344
+ b = read_byte(is);
345
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
346
+
347
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
348
+ b = read_byte(is);
349
+ res |= (b & 0x7F) << shift;
350
+ shift += 7;
351
+ }
352
+ }
353
+
354
+ return res;
355
+ }
356
+
357
+ /* optimized to use unchecked read_byte if there is definitely space */
358
+ inline off_t is_read_voff_t(InStream *is)
359
+ {
360
+ register off_t res, b;
361
+ register int shift = 7;
362
+
363
+ if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
364
+ b = is_read_byte(is);
365
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
366
+
367
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
368
+ b = is_read_byte(is);
369
+ res |= (b & 0x7F) << shift;
370
+ shift += 7;
371
+ }
372
+ }
373
+ else { /* unchecked optimization */
374
+ b = read_byte(is);
375
+ res = b & 0x7F; /* 0x7F = 0b01111111 */
376
+
377
+ while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
378
+ b = read_byte(is);
379
+ res |= (b & 0x7F) << shift;
380
+ shift += 7;
381
+ }
382
+ }
383
+
384
+ return res;
385
+ }
386
+
387
+ inline void is_skip_vints(InStream *is, register int cnt)
388
+ {
389
+ for (; cnt > 0; cnt--) {
390
+ while ((is_read_byte(is) & 0x80) != 0) {
391
+ }
392
+ }
393
+ }
394
+
395
+ inline void is_read_chars(InStream *is, char *buffer,
396
+ int off, int len)
397
+ {
398
+ int end, i;
399
+
400
+ end = off + len;
401
+
402
+ for (i = off; i < end; i++) {
403
+ buffer[i] = is_read_byte(is);
404
+ }
405
+ }
406
+
407
+ char *is_read_string(InStream *is)
408
+ {
409
+ register int length = (int) is_read_vint(is);
410
+ char *str = ALLOC_N(char, length + 1);
411
+ str[length] = '\0';
412
+
413
+ if (is->buf.pos > (is->buf.len - length)) {
414
+ register int i;
415
+ for (i = 0; i < length; i++) {
416
+ str[i] = is_read_byte(is);
417
+ }
418
+ }
419
+ else { /* unchecked optimization */
420
+ memcpy(str, is->buf.buf + is->buf.pos, length);
421
+ is->buf.pos += length;
422
+ }
423
+
424
+ return str;
425
+ }
426
+
427
+ void os_write_i32(OutStream *os, f_i32 num)
428
+ {
429
+ os_write_byte(os, (uchar)((num >> 24) & 0xFF));
430
+ os_write_byte(os, (uchar)((num >> 16) & 0xFF));
431
+ os_write_byte(os, (uchar)((num >> 8) & 0xFF));
432
+ os_write_byte(os, (uchar)(num & 0xFF));
433
+ }
434
+
435
+ void os_write_i64(OutStream *os, f_i64 num)
436
+ {
437
+ os_write_byte(os, (uchar)((num >> 56) & 0xFF));
438
+ os_write_byte(os, (uchar)((num >> 48) & 0xFF));
439
+ os_write_byte(os, (uchar)((num >> 40) & 0xFF));
440
+ os_write_byte(os, (uchar)((num >> 32) & 0xFF));
441
+ os_write_byte(os, (uchar)((num >> 24) & 0xFF));
442
+ os_write_byte(os, (uchar)((num >> 16) & 0xFF));
443
+ os_write_byte(os, (uchar)((num >> 8) & 0xFF));
444
+ os_write_byte(os, (uchar)(num & 0xFF));
445
+ }
446
+
447
+ void os_write_u32(OutStream *os, f_u32 num)
448
+ {
449
+ os_write_byte(os, (uchar)((num >> 24) & 0xFF));
450
+ os_write_byte(os, (uchar)((num >> 16) & 0xFF));
451
+ os_write_byte(os, (uchar)((num >> 8) & 0xFF));
452
+ os_write_byte(os, (uchar)(num & 0xFF));
453
+ }
454
+
455
+ void os_write_u64(OutStream *os, f_u64 num)
456
+ {
457
+ os_write_byte(os, (uchar)((num >> 56) & 0xFF));
458
+ os_write_byte(os, (uchar)((num >> 48) & 0xFF));
459
+ os_write_byte(os, (uchar)((num >> 40) & 0xFF));
460
+ os_write_byte(os, (uchar)((num >> 32) & 0xFF));
461
+ os_write_byte(os, (uchar)((num >> 24) & 0xFF));
462
+ os_write_byte(os, (uchar)((num >> 16) & 0xFF));
463
+ os_write_byte(os, (uchar)((num >> 8) & 0xFF));
464
+ os_write_byte(os, (uchar)(num & 0xFF));
465
+ }
466
+
467
+ /* optimized to use an unchecked write if there is space */
468
+ inline void os_write_vint(OutStream *os, register unsigned int num)
469
+ {
470
+ if (os->buf.pos > VINT_END) {
471
+ while (num > 127) {
472
+ os_write_byte(os, (uchar)((num & 0x7f) | 0x80));
473
+ num >>= 7;
474
+ }
475
+ os_write_byte(os, (uchar)(num));
476
+ }
477
+ else {
478
+ while (num > 127) {
479
+ write_byte(os, (uchar)((num & 0x7f) | 0x80));
480
+ num >>= 7;
481
+ }
482
+ write_byte(os, (uchar)(num));
483
+ }
484
+ }
485
+
486
+ /* optimized to use an unchecked write if there is space */
487
+ inline void os_write_voff_t(OutStream *os, register off_t num)
488
+ {
489
+ if (os->buf.pos > VINT_END) {
490
+ while (num > 127) {
491
+ os_write_byte(os, (uchar)((num & 0x7f) | 0x80));
492
+ num >>= 7;
493
+ }
494
+ os_write_byte(os, (uchar)num);
495
+ }
496
+ else {
497
+ while (num > 127) {
498
+ write_byte(os, (uchar)((num & 0x7f) | 0x80));
499
+ num >>= 7;
500
+ }
501
+ write_byte(os, (uchar)num);
502
+ }
503
+ }
504
+
505
+ void os_write_string(OutStream *os, char *str)
506
+ {
507
+ int len = (int)strlen(str);
508
+ os_write_vint(os, len);
509
+
510
+ os_write_bytes(os, (uchar *)str, len);
511
+ }
512
+
513
+ /**
514
+ * Determine if the filename is the name of a lock file. Return 1 if it is, 0
515
+ * otherwise.
516
+ *
517
+ * @param filename the name of the file to check
518
+ * @return 1 (true) if the file is a lock file, 0 (false) otherwise
519
+ */
520
+ int file_is_lock(char *filename)
521
+ {
522
+ int start = (int) strlen(filename) - 4;
523
+ return ((start > 0) && (strcmp(LOCK_EXT, &filename[start]) == 0));
524
+ }
525
+
526
+ void is2os_copy_bytes(InStream *is, OutStream *os, int cnt)
527
+ {
528
+ int len;
529
+ uchar buf[BUFFER_SIZE];
530
+
531
+ for (; cnt > 0; cnt -= BUFFER_SIZE) {
532
+ len = ((cnt > BUFFER_SIZE) ? BUFFER_SIZE : cnt);
533
+ is_read_bytes(is, buf, len);
534
+ os_write_bytes(os, buf, len);
535
+ }
536
+ }
537
+
538
+ void is2os_copy_vints(InStream *is, OutStream *os, int cnt)
539
+ {
540
+ uchar b;
541
+ for (; cnt > 0; cnt--) {
542
+ while (((b = is_read_byte(is)) & 0x80) != 0) {
543
+ os_write_byte(os, b);
544
+ }
545
+ os_write_byte(os, b);
546
+ }
547
+ }
548
+
549
+ /**
550
+ * Test argument used to test the store->each function
551
+ */
552
+ struct FileNameConcatArg
553
+ {
554
+ char *p;
555
+ char *end;
556
+ };
557
+
558
+ /**
559
+ * Test function used to test store->each function
560
+ */
561
+ static void concat_filenames(char *fname, void *arg)
562
+ {
563
+ struct FileNameConcatArg *fnca = (struct FileNameConcatArg *)arg;
564
+ if (fnca->p + strlen(fname) + 2 < fnca->end) {
565
+ strcpy(fnca->p, fname);
566
+ fnca->p += strlen(fname);
567
+ *(fnca->p++) = ',';
568
+ *(fnca->p++) = ' ';
569
+ }
570
+ }
571
+
572
+ char *store_to_s(Store *store, char *buf, int buf_size)
573
+ {
574
+ struct FileNameConcatArg fnca;
575
+
576
+ fnca.p = buf;
577
+ fnca.end = buf + buf_size;
578
+ store->each(store, &concat_filenames, &fnca);
579
+ if (fnca.p > buf + 2) {
580
+ fnca.p[-2] = '\0';
581
+ }
582
+ return buf;
583
+ }
584
+