ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/priorityqueue.c CHANGED
@@ -1,228 +1,151 @@
1
- #include <priorityqueue.h>
2
-
3
- PriorityQueue *pq_create(int max_size, bool (*less_than)(void *p1, void *p2))
4
- {
5
- PriorityQueue *pq = ALLOC(PriorityQueue);
6
- pq->count = 0;
7
- pq->size = max_size;
8
- pq->heap = ALLOC_N(void *, (max_size + 1));
9
- pq->lt = less_than;
10
- pq->free_elem = &free;
11
- return pq;
12
- }
13
-
14
- void pq_destroy(PriorityQueue *pq)
15
- {
16
- free(pq->heap);
17
- free(pq);
18
- }
1
+ #include <string.h>
2
+ #include "priorityqueue.h"
19
3
 
20
- void pq_up(PriorityQueue *pq)
21
- {
22
- void **heap = pq->heap;
23
- void *node;
24
- int i = pq->count;
25
- int j = i >> 1;
26
-
27
- node = heap[i];
28
-
29
- while ((j > 0) && pq->lt(node, heap[j])) {
30
- heap[i] = heap[j];
31
- i = j;
32
- j = j >> 1;
33
- }
34
- heap[i] = node;
35
- }
4
+ #define START_CAPA 127
36
5
 
37
- void pq_down(PriorityQueue *pq)
6
+ PriorityQueue *pq_new(const int capa,
7
+ bool (*less_than)(const void *p1, const void *p2),
8
+ void (*free_elem)(void *elem))
38
9
  {
39
- register int i = 1;
40
- register int j = 2; //i << 1;
41
- register int k = 3; //j + 1;
42
- register int count = pq->count;
43
- void **heap = pq->heap;
44
- void *node = heap[i]; // save top node
45
-
46
- if ((k <= count) && (pq->lt(heap[k], heap[j])))
47
- j = k;
48
-
49
- while ((j <= count) && pq->lt(heap[j], node)) {
50
- heap[i] = heap[j]; // shift up child
51
- i = j;
52
- j = i << 1;
53
- k = j + 1;
54
- if ((k <= count) && pq->lt(heap[k], heap[j]))
55
- j = k;
56
- }
57
- heap[i] = node;
58
- }
10
+ PriorityQueue *pq = ALLOC(PriorityQueue);
11
+ pq->size = 0;
12
+ pq->capa = capa;
13
+ pq->mem_capa = (START_CAPA > capa ? capa : START_CAPA) + 1;
14
+ pq->heap = ALLOC_N(void *, pq->mem_capa);
15
+ pq->less_than_i = less_than;
59
16
 
60
- void pq_push(PriorityQueue *pq, void *elem)
61
- {
62
- pq->count++;
63
- pq->heap[pq->count] = elem;
64
- pq_up(pq);
17
+ /* need to set this yourself if you want to change it */
18
+ pq->free_elem_i = free_elem ? free_elem : &dummy_free;
19
+ return pq;
65
20
  }
66
21
 
67
- void *pq_top(PriorityQueue *pq)
22
+ PriorityQueue *pq_clone(PriorityQueue *pq)
68
23
  {
69
- return pq->heap[1];
70
- }
24
+ PriorityQueue *new_pq = ALLOC(PriorityQueue);
25
+ memcpy(new_pq, pq, sizeof(PriorityQueue));
26
+ new_pq->heap = ALLOC_N(void *, new_pq->mem_capa);
27
+ memcpy(new_pq->heap, pq->heap, sizeof(void *) * (new_pq->size + 1));
71
28
 
72
- void *pq_pop(PriorityQueue *pq)
73
- {
74
- if (pq->count > 0) {
75
- void *result = pq->heap[1]; // save first value
76
- pq->heap[1] = pq->heap[pq->count]; // move last to first
77
- pq->heap[pq->count] = NULL;
78
- pq->count--;
79
- pq_down(pq); // adjust heap
80
- return result;
81
- } else {
82
- return NULL;
83
- }
29
+ return new_pq;
84
30
  }
85
31
 
86
32
  void pq_clear(PriorityQueue *pq)
87
33
  {
88
- int i;
89
- for (i = 1; i <= pq->count; i++) {
90
- pq->free_elem(pq->heap[i]);
91
- pq->heap[i] = NULL;
92
- }
93
- pq->count = 0;
94
- }
95
-
96
- int pq_insert(PriorityQueue *pq, void *elem)
97
- {
98
- if (pq->count < pq->size) {
99
- pq_push(pq, elem);
100
- return true;
101
- } else if (pq->count > 0 && pq->lt(pq_top(pq), elem)) {
102
- pq->free_elem(pq->heap[1]);
103
- pq->heap[1] = elem;
104
- pq_down(pq);
105
- return true;
106
- } else {
107
- pq->free_elem(elem);
108
- return false;
109
- }
34
+ int i;
35
+ for (i = 1; i <= pq->size; i++) {
36
+ pq->free_elem_i(pq->heap[i]);
37
+ pq->heap[i] = NULL;
38
+ }
39
+ pq->size = 0;
110
40
  }
111
41
 
112
- /*****************************************************************************
113
- *
114
- * PriorityQueue2
115
- *
116
- *****************************************************************************/
117
-
118
- PriorityQueue2 *pq2_create(int max_size,
119
- bool (*less_than)(PriorityQueue2 *pq, void *p1, void *p2),
120
- free_ft destroy)
42
+ void pq_free(PriorityQueue *pq)
121
43
  {
122
- PriorityQueue2 *pq = ALLOC(PriorityQueue2);
123
- pq->count = 0;
124
- pq->size = max_size;
125
- pq->heap = ALLOC_N(void *, (max_size + 1));
126
- pq->lt = less_than;
127
- pq->free_elem = &free;
128
- pq->destroy = destroy;
129
- return pq;
44
+ free(pq->heap);
45
+ free(pq);
130
46
  }
131
47
 
132
- void pq2_destroy(PriorityQueue2 *pq)
48
+ void pq_destroy(PriorityQueue *pq)
133
49
  {
134
- free(pq->heap);
135
- free(pq);
50
+ pq_clear(pq);
51
+ pq_free(pq);
136
52
  }
137
53
 
138
- void pq2_up(PriorityQueue2 *pq)
54
+ /**
55
+ * This method is used internally by pq_push. It is similar to pq_down except
56
+ * that where pq_down reorders the elements from the top, pq_up reorders from
57
+ * the bottom.
58
+ *
59
+ * @param pq the PriorityQueue to reorder
60
+ */
61
+ static void pq_up(PriorityQueue *pq)
139
62
  {
140
- void **heap = pq->heap;
141
- void *node;
142
- int i = pq->count;
143
- int j = i >> 1;
144
-
145
- node = heap[i];
146
-
147
- while ((j > 0) && pq->lt(pq, node, heap[j])) {
148
- heap[i] = heap[j];
149
- i = j;
150
- j = j >> 1;
151
- }
152
- heap[i] = node;
153
- }
63
+ void **heap = pq->heap;
64
+ void *node;
65
+ int i = pq->size;
66
+ int j = i >> 1;
154
67
 
155
- void pq2_down(PriorityQueue2 *pq)
156
- {
157
- register int i = 1;
158
- register int j = 2; //i << 1;
159
- register int k = 3; //j + 1;
160
- register int count = pq->count;
161
- void **heap = pq->heap;
162
- void *node = heap[i]; // save top node
163
-
164
- if ((k <= count) && (pq->lt(pq, heap[k], heap[j])))
165
- j = k;
166
-
167
- while ((j <= count) && pq->lt(pq, heap[j], node)) {
168
- heap[i] = heap[j]; // shift up child
169
- i = j;
170
- j = i << 1;
171
- k = j + 1;
172
- if ((k <= count) && pq->lt(pq, heap[k], heap[j]))
173
- j = k;
174
- }
175
- heap[i] = node;
176
- }
68
+ node = heap[i];
177
69
 
178
- void pq2_push(PriorityQueue2 *pq, void *elem)
179
- {
180
- pq->count++;
181
- pq->heap[pq->count] = elem;
182
- pq2_up(pq);
70
+ while ((j > 0) && pq->less_than_i(node, heap[j])) {
71
+ heap[i] = heap[j];
72
+ i = j;
73
+ j = j >> 1;
74
+ }
75
+ heap[i] = node;
183
76
  }
184
77
 
185
- void *pq2_top(PriorityQueue2 *pq)
78
+ void pq_down(PriorityQueue *pq)
186
79
  {
187
- return pq->heap[1];
80
+ register int i = 1;
81
+ register int j = 2; /* i << 1; */
82
+ register int k = 3; /* j + 1; */
83
+ register int size = pq->size;
84
+ void **heap = pq->heap;
85
+ void *node = heap[i]; /* save top node */
86
+
87
+ if ((k <= size) && (pq->less_than_i(heap[k], heap[j]))) {
88
+ j = k;
89
+ }
90
+
91
+ while ((j <= size) && pq->less_than_i(heap[j], node)) {
92
+ heap[i] = heap[j]; /* shift up child */
93
+ i = j;
94
+ j = i << 1;
95
+ k = j + 1;
96
+ if ((k <= size) && pq->less_than_i(heap[k], heap[j])) {
97
+ j = k;
98
+ }
99
+ }
100
+ heap[i] = node;
101
+ }
102
+
103
+ void pq_push(PriorityQueue *pq, void *elem)
104
+ {
105
+ pq->size++;
106
+ if (pq->size >= pq->mem_capa) {
107
+ pq->mem_capa <<= 1;
108
+ REALLOC_N(pq->heap, void *, pq->mem_capa);
109
+ }
110
+ pq->heap[pq->size] = elem;
111
+ pq_up(pq);
112
+ }
113
+
114
+ int pq_insert(PriorityQueue *pq, void *elem)
115
+ {
116
+ if (pq->size < pq->capa) {
117
+ pq_push(pq, elem);
118
+ return PQ_ADDED;
119
+ }
120
+ else if (pq->size > 0 && pq->less_than_i(pq->heap[1], elem)) {
121
+ pq->free_elem_i(pq->heap[1]);
122
+ pq->heap[1] = elem;
123
+ pq_down(pq);
124
+ return PQ_INSERTED;
125
+ }
126
+ else {
127
+ pq->free_elem_i(elem);
128
+ return PQ_DROPPED;
129
+ }
188
130
  }
189
131
 
190
- void *pq2_pop(PriorityQueue2 *pq)
132
+ void *pq_top(PriorityQueue *pq)
191
133
  {
192
- if (pq->count > 0) {
193
- void *result = pq->heap[1]; // save first value
194
- pq->heap[1] = pq->heap[pq->count]; // move last to first
195
- pq->heap[pq->count] = NULL;
196
- pq->count--;
197
- pq2_down(pq); // adjust heap
198
- return result;
199
- } else {
200
- return NULL;
201
- }
134
+ return pq->size ? pq->heap[1] : NULL;
202
135
  }
203
136
 
204
- void pq2_clear(PriorityQueue2 *pq)
137
+ void *pq_pop(PriorityQueue *pq)
205
138
  {
206
- int i;
207
- for (i = 1; i <= pq->count; i++) {
208
- pq->free_elem(pq->heap[i]);
209
- pq->heap[i] = NULL;
210
- }
211
- pq->count = 0;
139
+ if (pq->size > 0) {
140
+ void *result = pq->heap[1]; /* save first value */
141
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
142
+ pq->heap[pq->size] = NULL;
143
+ pq->size--;
144
+ pq_down(pq); /* adjust heap */
145
+ return result;
146
+ }
147
+ else {
148
+ return NULL;
149
+ }
212
150
  }
213
151
 
214
- int pq2_insert(PriorityQueue2 *pq, void *elem)
215
- {
216
- if (pq->count < pq->size) {
217
- pq2_push(pq, elem);
218
- return true;
219
- } else if (pq->count > 0 && pq->lt(pq, pq2_top(pq), elem)) {
220
- pq->free_elem(pq->heap[1]);
221
- pq->heap[1] = elem;
222
- pq2_down(pq);
223
- return true;
224
- } else {
225
- pq->free_elem(elem);
226
- return false;
227
- }
228
- }
data/ext/priorityqueue.h CHANGED
@@ -3,45 +3,141 @@
3
3
 
4
4
  #include "global.h"
5
5
 
6
- typedef bool (*lt_ft)(void *p1, void *p2);
7
-
8
- typedef struct PriorityQueue {
9
- int count;
10
- int size;
11
- void **heap;
12
- lt_ft lt;
13
- //bool (*lt)(void *p1, void *p2);
14
- free_ft free_elem;
6
+ typedef bool(*lt_ft) (const void *p1, const void *p2);
7
+
8
+ /**
9
+ * A PriorityQueue has a fixed size and contains a less_than function and a
10
+ * free_elem function specific to the data type to be stored in the queue.
11
+ */
12
+ typedef struct PriorityQueue
13
+ {
14
+ int size;
15
+ int capa;
16
+ int mem_capa;
17
+ void **heap;
18
+ lt_ft less_than_i;
19
+ free_ft free_elem_i;
15
20
  } PriorityQueue;
16
21
 
17
- PriorityQueue *pq_create(int max_size, bool (*less_than)(void *p1, void *p2));
18
- void pq_destroy(PriorityQueue *pq);
19
- void pq_push(PriorityQueue *pq, void *elem);
20
- void *pq_top(PriorityQueue *pq);
21
- void *pq_pop(PriorityQueue *pq);
22
- void pq_down(PriorityQueue *pq);
23
- void pq_clear(PriorityQueue *pq);
24
- int pq_insert(PriorityQueue *pq, void *elem);
25
- #define pq_full(pq) ((pq)->count == (pq)->size)
26
-
27
- typedef struct PriorityQueue2 {
28
- int count;
29
- int size;
30
- void **heap;
31
- void *data;
32
- bool (*lt)(struct PriorityQueue2 *pq, void *p1, void *p2);
33
- void (*free_elem)(void *p);
34
- free_ft destroy;
35
- } PriorityQueue2;
36
-
37
- PriorityQueue2 *pq2_create(int max_size,
38
- bool (*less_than)(PriorityQueue2 *pq, void *p1, void *p2),
39
- free_ft destroy);
40
- void pq2_destroy(PriorityQueue2 *pq);
41
- void pq2_push(PriorityQueue2 *pq, void *elem);
42
- void *pq2_top(PriorityQueue2 *pq);
43
- void *pq2_pop(PriorityQueue2 *pq);
44
- void pq2_down(PriorityQueue2 *pq);
45
- void pq2_clear(PriorityQueue2 *pq);
46
- int pq2_insert(PriorityQueue2 *pq, void *elem);
22
+ /**
23
+ * Create a new PriorityQueue setting the less_than and free_elem for this
24
+ * specific PriorityQueue.
25
+ *
26
+ * @param capa the capacity of the PriorityQueue. As more than the capacity is
27
+ * added to the queue the least valued elements drop off the bottom.
28
+ * @param less_than the function to determine whether one value is less than
29
+ * another for this particular PriorityQueue
30
+ * @param free_elem the function to free the elements in the PriorityQueue
31
+ * when it is destroyed or there is insertion overflow
32
+ * @return a newly allocated PriorityQueue
33
+ */
34
+ extern PriorityQueue *pq_new(int capa,
35
+ bool (*less_than)(const void *p1, const void *p2),
36
+ void (*free_elem)(void *elem));
37
+
38
+ /**
39
+ * Allocate a clone of the PriorityQueue. This can be used if you want to scan
40
+ * through all elements of the PriorityQueue but you don't want to have to
41
+ * remove the all and add them all again.
42
+ *
43
+ * @param pq the priority queue to clone
44
+ * @return a clone of the original priority queue
45
+ */
46
+ extern PriorityQueue *pq_clone(PriorityQueue *pq);
47
+
48
+ /**
49
+ * Clear all elements from the PriorityQueue and reset the size to 0. When
50
+ * the elements are removed from the PriorityQueue, free_elem is used to free
51
+ * them, unless it was set to NULL in which case nothing will happen to them.
52
+ *
53
+ * @param self the PriorityQueue to clear
54
+ */
55
+ extern void pq_clear(PriorityQueue *self);
56
+
57
+ /**
58
+ * Free the memory allocated to the PriorityQueue. This function does nothing
59
+ * to the elements in the PriorityQueue itself. To destroy them also, use
60
+ * pq_destroy.
61
+ *
62
+ * @param self the PriorityQueue to free
63
+ */
64
+ extern void pq_free(PriorityQueue *self);
65
+
66
+ /**
67
+ * Destroy the PriorityQueue, freeing all memory allocated to it and also
68
+ * destroying all the elements contained by it. This method is equivalent to
69
+ * calling pq_clear followed by pq_free.
70
+ *
71
+ * @param the PriorityQueue to destroy
72
+ */
73
+ extern void pq_destroy(PriorityQueue *self);
74
+
75
+ /**
76
+ * Reorder the PriorityQueue after the top element has been modified. This
77
+ * method is used especially when the PriorityQueue contains a queue of
78
+ * iterators. When the top iterator is incremented you should call this
79
+ * method.
80
+ *
81
+ * @param self the PriorityQueue to reorder
82
+ */
83
+ extern void pq_down(PriorityQueue *self);
84
+
85
+ /**
86
+ * Add another element to the PriorityQueue. This method should only be used
87
+ * when the PriorityQueue has enough space allocated to hold all elements
88
+ * added. If there is a chance that you will add more than the amount you have
89
+ * allocated then you should use pq_insert. pq_insert will handle insertion
90
+ * overflow.
91
+ *
92
+ * @param self the PriorityQueue to add the element to
93
+ * @param elem the element to add to the PriorityQueue
94
+ */
95
+ extern void pq_push(PriorityQueue *self, void *elem);
96
+
97
+ #define PQ_DROPPED 0
98
+ #define PQ_ADDED 1
99
+ #define PQ_INSERTED 2
100
+ /**
101
+ * Add another element to the PriorityQueue. Unlike pq_push, this method
102
+ * handles insertion overflow. That is, when you insert more elements than the
103
+ * capacity of the PriorityQueue, the elements are dropped off the bottom and
104
+ * freed using the free_elem function.
105
+ *
106
+ * @param self the PriorityQueue to add the element to
107
+ * @param elem the element to add to the PriorityQueue
108
+ * @returns one of three values;
109
+ * <pre>
110
+ * 0 == PQ_DROPPED the element was too small (according to the less_than
111
+ * function) so it was destroyed
112
+ * 1 == PQ_ADDED the element was successfully added
113
+ * 2 == PQ_INSERTED the element was successfully added after another
114
+ * element was dropped and destroyed
115
+ * </pre>
116
+ */
117
+ extern int pq_insert(PriorityQueue *self, void *elem);
118
+
119
+ /**
120
+ * Get the top element in the PriorityQueue.
121
+ *
122
+ * @param self the PriorityQueue to get the top from
123
+ * @return the top element in the PriorityQueue
124
+ */
125
+ extern void *pq_top(PriorityQueue *self);
126
+
127
+ /**
128
+ * Remove and return the top element in the PriorityQueue.
129
+ *
130
+ * @param self the PriorityQueue to get the top from
131
+ * @return the top element in the PriorityQueue
132
+ */
133
+ extern void *pq_pop(PriorityQueue *self);
134
+
135
+ /**
136
+ * Return true if the PriorityQueue is full.
137
+ *
138
+ * @param self the PriorityQueue to test
139
+ * @return true if the PriorityQueue is full.
140
+ */
141
+ #define pq_full(pq) ((pq)->size == (pq)->capa)
142
+
47
143
  #endif