ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/r_utils.c ADDED
@@ -0,0 +1,941 @@
1
+ #include "ferret.h"
2
+ #include "bitvector.h"
3
+
4
+ /*****************
5
+ *** BitVector ***
6
+ *****************/
7
+ static VALUE cBitVector;
8
+
9
+ static VALUE
10
+ frt_bv_alloc(VALUE klass)
11
+ {
12
+ return Data_Wrap_Struct(klass, NULL, &bv_destroy, bv_new());
13
+ }
14
+
15
+ #define GET_BV(bv, self) Data_Get_Struct(self, BitVector, bv)
16
+
17
+ /*
18
+ * call-seq:
19
+ * BitVector.new() -> new_bv
20
+ *
21
+ * Returns a new empty bit-vector object
22
+ */
23
+ static VALUE
24
+ frt_bv_init(VALUE self)
25
+ {
26
+ return self;
27
+ }
28
+
29
+ /*
30
+ * call-seq:
31
+ * bv[i] = bool -> bool
32
+ *
33
+ * Set the bit and _i_ to *val* (+true+ or
34
+ * +false+).
35
+ */
36
+ VALUE
37
+ frt_bv_set(VALUE self, VALUE rindex, VALUE rstate)
38
+ {
39
+ BitVector *bv;
40
+ int index = FIX2INT(rindex);
41
+ GET_BV(bv, self);
42
+ if (index < 0) {
43
+ rb_raise(rb_eIndexError, "%d < 0", index);
44
+ }
45
+ if (RTEST(rstate)) {
46
+ bv_set(bv, index);
47
+ }
48
+ else {
49
+ bv_unset(bv, index);
50
+ }
51
+
52
+ return rstate;
53
+ }
54
+
55
+ /*
56
+ * call-seq:
57
+ * bv.set(i) -> self
58
+ *
59
+ * Set the bit at _i_ to *on* (+true+)
60
+ */
61
+ VALUE
62
+ frt_bv_set_on(VALUE self, VALUE rindex)
63
+ {
64
+ frt_bv_set(self, rindex, Qtrue);
65
+ return self;
66
+ }
67
+
68
+ /*
69
+ * call-seq:
70
+ * bv.unset(i) -> self
71
+ *
72
+ * Set the bit at _i_ to *off* (+false+)
73
+ */
74
+ VALUE
75
+ frt_bv_set_off(VALUE self, VALUE rindex)
76
+ {
77
+ frt_bv_set(self, rindex, Qfalse);
78
+ return self;
79
+ }
80
+
81
+ /*
82
+ * call-seq:
83
+ * bv.get(i) -> bool
84
+ * bv[i] -> bool
85
+ *
86
+ * Get the bit value at _i_
87
+ */
88
+ VALUE
89
+ frt_bv_get(VALUE self, VALUE rindex)
90
+ {
91
+ BitVector *bv;
92
+ int index = FIX2INT(rindex);
93
+ GET_BV(bv, self);
94
+ if (index < 0) {
95
+ rb_raise(rb_eIndexError, "%d < 0", index);
96
+ }
97
+
98
+ return bv_get(bv, index) ? Qtrue : Qfalse;
99
+ }
100
+
101
+ /*
102
+ * call-seq:
103
+ * bv.count -> bit_count
104
+ *
105
+ * Count the number of bits set in the bit-vector. If the bit-vector has been
106
+ * negated using +#not+ then count the number of unset bits
107
+ * instead.
108
+ */
109
+ VALUE
110
+ frt_bv_count(VALUE self)
111
+ {
112
+ BitVector *bv;
113
+ GET_BV(bv, self);
114
+ return INT2FIX(bv->count);
115
+ }
116
+
117
+ /*
118
+ * call-seq:
119
+ * bv.clear -> self
120
+ *
121
+ * Clears all set bits in the bit-vector. Negated bit-vectors will still have
122
+ * all bits set to *off*.
123
+ */
124
+ VALUE
125
+ frt_bv_clear(VALUE self)
126
+ {
127
+ BitVector *bv;
128
+ GET_BV(bv, self);
129
+ bv_clear(bv);
130
+ bv_scan_reset(bv);
131
+ return self;
132
+ }
133
+
134
+ /*
135
+ * call-seq:
136
+ * bv1 == bv2 -> bool
137
+ * bv1 != bv2 -> bool
138
+ * bv1.eql(bv2) -> bool
139
+ *
140
+ * Compares two bit vectors and returns true if both bitvectors have the same
141
+ * bits set.
142
+ */
143
+ VALUE
144
+ frt_bv_eql(VALUE self, VALUE other)
145
+ {
146
+ BitVector *bv1, *bv2;
147
+ GET_BV(bv1, self);
148
+ GET_BV(bv2, other);
149
+ return bv_eq(bv1, bv2) ? Qtrue : Qfalse;
150
+ }
151
+
152
+ /*
153
+ * call-seq:
154
+ * bv.hash -> int
155
+ *
156
+ * Used to store bit vectors in Hashes. Especially useful if you want to
157
+ * cache them.
158
+ */
159
+ VALUE
160
+ frt_bv_hash(VALUE self)
161
+ {
162
+ BitVector *bv;
163
+ GET_BV(bv, self);
164
+ return LONG2NUM(bv_hash(bv));
165
+ }
166
+
167
+ /*
168
+ * call-seq:
169
+ * bv1 & bv2 -> anded_bv
170
+ * bv1.and(bv2) -> anded_bv
171
+ *
172
+ * Perform a boolean _and_ operation on +bv1+ and
173
+ * +bv2+
174
+ */
175
+ VALUE
176
+ frt_bv_and(VALUE self, VALUE other)
177
+ {
178
+ BitVector *bv1, *bv2;
179
+ GET_BV(bv1, self);
180
+ GET_BV(bv2, other);
181
+ return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_and(bv1, bv2));
182
+ }
183
+
184
+ /*
185
+ * call-seq:
186
+ * bv1.and!(bv2) -> self
187
+ *
188
+ * Perform a boolean _and_ operation on +bv1+ and
189
+ * +bv2+ in place on +bv1+
190
+ */
191
+ VALUE
192
+ frt_bv_and_x(VALUE self, VALUE other)
193
+ {
194
+ BitVector *bv1, *bv2;
195
+ GET_BV(bv1, self);
196
+ GET_BV(bv2, other);
197
+ bv_and_x(bv1, bv2);
198
+ return self;
199
+ }
200
+
201
+ /*
202
+ * call-seq:
203
+ * bv1 | bv2 -> ored_bv
204
+ * bv1.or(bv2) -> ored_bv
205
+ *
206
+ * Perform a boolean _or_ operation on +bv1+ and
207
+ * +bv2+
208
+ */
209
+ VALUE
210
+ frt_bv_or(VALUE self, VALUE other)
211
+ {
212
+ BitVector *bv1, *bv2;
213
+ GET_BV(bv1, self);
214
+ GET_BV(bv2, other);
215
+ return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_or(bv1, bv2));
216
+ }
217
+
218
+ /*
219
+ * call-seq:
220
+ * bv1.or!(bv2) -> self
221
+ *
222
+ * Perform a boolean _or_ operation on +bv1+ and
223
+ * +bv2+ in place on +bv1+
224
+ */
225
+ VALUE
226
+ frt_bv_or_x(VALUE self, VALUE other)
227
+ {
228
+ BitVector *bv1, *bv2;
229
+ GET_BV(bv1, self);
230
+ GET_BV(bv2, other);
231
+ bv_or_x(bv1, bv2);
232
+ return self;
233
+ }
234
+
235
+ /*
236
+ * call-seq:
237
+ * bv1 ^ bv2 -> xored_bv
238
+ * bv1.xor(bv2) -> xored_bv
239
+ *
240
+ * Perform a boolean _xor_ operation on +bv1+ and
241
+ * +bv2+
242
+ */
243
+ VALUE
244
+ frt_bv_xor(VALUE self, VALUE other)
245
+ {
246
+ BitVector *bv1, *bv2;
247
+ GET_BV(bv1, self);
248
+ GET_BV(bv2, other);
249
+ return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_xor(bv1, bv2));
250
+ }
251
+
252
+ /*
253
+ * call-seq:
254
+ * bv1.xor!(bv2) -> self
255
+ *
256
+ * Perform a boolean _xor_ operation on +bv1+ and
257
+ * +bv2+ in place on +bv1+
258
+ */
259
+ VALUE
260
+ frt_bv_xor_x(VALUE self, VALUE other)
261
+ {
262
+ BitVector *bv1, *bv2;
263
+ GET_BV(bv1, self);
264
+ GET_BV(bv2, other);
265
+ bv_xor_x(bv1, bv2);
266
+ return self;
267
+ }
268
+
269
+ /*
270
+ * call-seq:
271
+ * ~bv -> bv
272
+ * bv.not -> bv
273
+ *
274
+ * Perform a boolean _not_ operation on +bv+
275
+ * */
276
+ VALUE
277
+ frt_bv_not(VALUE self)
278
+ {
279
+ BitVector *bv;
280
+ GET_BV(bv, self);
281
+ return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_not(bv));
282
+ }
283
+
284
+ /*
285
+ * call-seq:
286
+ * bv.not! -> self
287
+ *
288
+ * Perform a boolean _not_ operation on +bv+ in-place
289
+ */
290
+ VALUE
291
+ frt_bv_not_x(VALUE self)
292
+ {
293
+ BitVector *bv;
294
+ GET_BV(bv, self);
295
+ bv_not_x(bv);
296
+ return self;
297
+ }
298
+
299
+ /*
300
+ * call-seq:
301
+ * bv.reset_scan -> self
302
+ *
303
+ * Resets the BitVector ready for scanning. You should call this method
304
+ * before calling +#next+ or +#next_unset+. It isn't
305
+ * necessary for the other scan methods or for the +#each+ method.
306
+ */
307
+ VALUE
308
+ frt_bv_reset_scan(VALUE self)
309
+ {
310
+ BitVector *bv;
311
+ GET_BV(bv, self);
312
+ bv_scan_reset(bv);
313
+ return self;
314
+ }
315
+
316
+ /*
317
+ * call-seq:
318
+ * bv.next -> bit_num
319
+ *
320
+ * Returns the next set bit in the bit-vector scanning from low order to high
321
+ * order. You should call +#reset_scan+ before calling this method
322
+ * if you want to scan from the beginning. It is automatically reset when you
323
+ * first create the bit-vector.
324
+ */
325
+ VALUE
326
+ frt_bv_next(VALUE self)
327
+ {
328
+ BitVector *bv;
329
+ GET_BV(bv, self);
330
+ return INT2FIX(bv_scan_next(bv));
331
+ }
332
+
333
+ /*
334
+ * call-seq:
335
+ * bv.next_unset -> bit_num
336
+ *
337
+ * Returns the next unset bit in the bit-vector scanning from low order to
338
+ * high order. This method should only be called on bit-vectors which have
339
+ * been flipped (negated). You should call +#reset_scan+ before
340
+ * calling this method if you want to scan from the beginning. It is
341
+ * automatically reset when you first create the bit-vector.
342
+ */
343
+ VALUE
344
+ frt_bv_next_unset(VALUE self)
345
+ {
346
+ BitVector *bv;
347
+ GET_BV(bv, self);
348
+ return INT2FIX(bv_scan_next_unset(bv));
349
+ }
350
+
351
+ /*
352
+ * call-seq:
353
+ * bv.next_from(from) -> bit_num
354
+ *
355
+ * Returns the next set bit in the bit-vector scanning from low order to
356
+ * high order and starting at +from+. The scan is inclusive so if
357
+ * +from+ is equal to 10 and +bv[10]+ is set it will
358
+ * return the number 10. If the bit-vector has been negated than you should
359
+ * use the +#next_unset_from+ method.
360
+ */
361
+ VALUE
362
+ frt_bv_next_from(VALUE self, VALUE rfrom)
363
+ {
364
+ BitVector *bv;
365
+ int from = FIX2INT(rfrom);
366
+ GET_BV(bv, self);
367
+ if (from < 0) {
368
+ from = 0;
369
+ }
370
+ return INT2FIX(bv_scan_next_from(bv, from));
371
+ }
372
+
373
+ /*
374
+ * call-seq:
375
+ * bv.next_unset_from(from) -> bit_num
376
+ *
377
+ * Returns the next unset bit in the bit-vector scanning from low order to
378
+ * high order and starting at +from+. The scan is inclusive so if
379
+ * +from+ is equal to 10 and +bv[10]+ is unset it will
380
+ * return the number 10. If the bit-vector has not been negated than you
381
+ * should use the +#next_from+ method.
382
+ */
383
+ VALUE
384
+ frt_bv_next_unset_from(VALUE self, VALUE rfrom)
385
+ {
386
+ BitVector *bv;
387
+ int from = FIX2INT(rfrom);
388
+ GET_BV(bv, self);
389
+ if (from < 0) {
390
+ from = 0;
391
+ }
392
+ return INT2FIX(bv_scan_next_unset_from(bv, from));
393
+ }
394
+
395
+ /*
396
+ * call-seq:
397
+ * bv.each { |bit_num| }
398
+ *
399
+ * Iterate through all the set bits in the bit-vector yeilding each one in
400
+ * order
401
+ */
402
+ VALUE
403
+ frt_bv_each(VALUE self)
404
+ {
405
+ BitVector *bv;
406
+ int bit;
407
+ GET_BV(bv, self);
408
+ bv_scan_reset(bv);
409
+ if (bv->extends_as_ones) {
410
+ while ((bit = bv_scan_next_unset(bv)) >= 0) {
411
+ rb_yield(INT2FIX(bit));
412
+ }
413
+ }
414
+ else {
415
+ while ((bit = bv_scan_next(bv)) >= 0) {
416
+ rb_yield(INT2FIX(bit));
417
+ }
418
+ }
419
+ return self;
420
+ }
421
+
422
+ /*
423
+ * call-seq:
424
+ * bv.to_a
425
+ *
426
+ * Iterate through all the set bits in the bit-vector adding the index of
427
+ * each set bit to an array. This is useful if you want to perform array
428
+ * methods on the bit-vecter. If you want to convert an array to a bit_vector
429
+ * simply do this;
430
+ *
431
+ * bv = [1, 12, 45, 367, 455].inject(BitVector.new) {|bv, i| bv.set(i)}
432
+ */
433
+ VALUE
434
+ frt_bv_to_a(VALUE self)
435
+ {
436
+ BitVector *bv;
437
+ int bit;
438
+ VALUE ary;
439
+ GET_BV(bv, self);
440
+ ary = rb_ary_new();
441
+ bv_scan_reset(bv);
442
+ if (bv->extends_as_ones) {
443
+ while ((bit = bv_scan_next_unset(bv)) >= 0) {
444
+ rb_ary_push(ary, INT2FIX(bit));
445
+ }
446
+ }
447
+ else {
448
+ while ((bit = bv_scan_next(bv)) >= 0) {
449
+ rb_ary_push(ary, INT2FIX(bit));
450
+ }
451
+ }
452
+ return ary;
453
+ }
454
+
455
+ static VALUE mUtils;
456
+
457
+ /*
458
+ * Document-class: Ferret::Utils::BitVector
459
+ *
460
+ * == Summary
461
+ *
462
+ * A BitVector is pretty easy to implement in Ruby using Ruby's BigNum class.
463
+ * This BitVector however allows you to count the set bits with the
464
+ * +#count+ method (or unset bits of flipped bit vectors) and also
465
+ * to quickly scan the set bits.
466
+ *
467
+ * == Boolean Operations
468
+ *
469
+ * BitVector handles four boolean operations;
470
+ *
471
+ * * +&+
472
+ * * +|+
473
+ * * +^+
474
+ * * +~+
475
+ *
476
+ * bv1 = BitVector.new
477
+ * bv2 = BitVector.new
478
+ * bv3 = BitVector.new
479
+ *
480
+ * bv4 = (bv1 & bv2) | ~bv3
481
+ *
482
+ * You can also do the operations in-place;
483
+ *
484
+ * * +and!+
485
+ * * +or!+
486
+ * * +xor!+
487
+ * * +not!+
488
+ *
489
+ * bv4.and!(bv5).not!
490
+ *
491
+ * == Set Bit Scanning
492
+ *
493
+ * Perhaps the most useful functionality in BitVector is the ability to
494
+ * quickly scan for set bits. To print all set bits;
495
+ *
496
+ * bv.each {|bit| puts bit }
497
+ *
498
+ * Alternatively you could use the lower level +next+ or
499
+ * +next_unset+ methods. Note that the +each+ method will
500
+ * automatically scan unset bits if the BitVector has been flipped (using
501
+ * +not+).
502
+ */
503
+ static void
504
+ Init_BitVector(void)
505
+ {
506
+ /* BitVector */
507
+ cBitVector = rb_define_class_under(mUtils, "BitVector", rb_cObject);
508
+ rb_define_alloc_func(cBitVector, frt_bv_alloc);
509
+
510
+ rb_define_method(cBitVector, "initialize", frt_bv_init, 0);
511
+ rb_define_method(cBitVector, "set", frt_bv_set_on, 1);
512
+ rb_define_method(cBitVector, "unset", frt_bv_set_off, 1);
513
+ rb_define_method(cBitVector, "[]=", frt_bv_set, 2);
514
+ rb_define_method(cBitVector, "get", frt_bv_get, 1);
515
+ rb_define_method(cBitVector, "[]", frt_bv_get, 1);
516
+ rb_define_method(cBitVector, "count", frt_bv_count, 0);
517
+ rb_define_method(cBitVector, "clear", frt_bv_clear, 0);
518
+ rb_define_method(cBitVector, "eql?", frt_bv_eql, 1);
519
+ rb_define_method(cBitVector, "==", frt_bv_eql, 1);
520
+ rb_define_method(cBitVector, "hash", frt_bv_hash, 0);
521
+ rb_define_method(cBitVector, "and!", frt_bv_and_x, 1);
522
+ rb_define_method(cBitVector, "and", frt_bv_and, 1);
523
+ rb_define_method(cBitVector, "&", frt_bv_and, 1);
524
+ rb_define_method(cBitVector, "or!", frt_bv_or_x, 1);
525
+ rb_define_method(cBitVector, "or", frt_bv_or, 1);
526
+ rb_define_method(cBitVector, "|", frt_bv_or, 1);
527
+ rb_define_method(cBitVector, "xor!", frt_bv_xor_x, 1);
528
+ rb_define_method(cBitVector, "xor", frt_bv_xor, 1);
529
+ rb_define_method(cBitVector, "^", frt_bv_xor, 1);
530
+ rb_define_method(cBitVector, "not!", frt_bv_not_x, 0);
531
+ rb_define_method(cBitVector, "not", frt_bv_not, 0);
532
+ rb_define_method(cBitVector, "~", frt_bv_not, 0);
533
+ rb_define_method(cBitVector, "reset_scan", frt_bv_reset_scan, 0);
534
+ rb_define_method(cBitVector, "next", frt_bv_next, 0);
535
+ rb_define_method(cBitVector, "next_unset", frt_bv_next_unset, 0);
536
+ rb_define_method(cBitVector, "next_from", frt_bv_next_from, 1);
537
+ rb_define_method(cBitVector, "next_unset_from", frt_bv_next_unset_from, 1);
538
+ rb_define_method(cBitVector, "each", frt_bv_each, 0);
539
+ rb_define_method(cBitVector, "to_a", frt_bv_to_a, 0);
540
+ }
541
+
542
+ /*********************
543
+ *** PriorityQueue ***
544
+ *********************/
545
+ typedef struct PriQ
546
+ {
547
+ int size;
548
+ int capa;
549
+ int mem_capa;
550
+ VALUE *heap;
551
+ VALUE proc;
552
+ } PriQ;
553
+
554
+ #define PQ_START_CAPA 32
555
+
556
+ static bool frt_pq_lt(VALUE proc, VALUE v1, VALUE v2)
557
+ {
558
+ if (proc == Qnil) {
559
+ return RTEST(rb_funcall(v1, id_lt, 1, v2));
560
+ }
561
+ else {
562
+ return RTEST(rb_funcall(proc, id_call, 2, v1, v2));
563
+ }
564
+ }
565
+
566
+ static void pq_up(PriQ *pq)
567
+ {
568
+ VALUE *heap = pq->heap;
569
+ VALUE node;
570
+ int i = pq->size;
571
+ int j = i >> 1;
572
+
573
+ node = heap[i];
574
+
575
+ while ((j > 0) && frt_pq_lt(pq->proc, node, heap[j])) {
576
+ heap[i] = heap[j];
577
+ i = j;
578
+ j = j >> 1;
579
+ }
580
+ heap[i] = node;
581
+ }
582
+
583
+ static void pq_down(PriQ *pq)
584
+ {
585
+ register int i = 1;
586
+ register int j = 2; /* i << 1; */
587
+ register int k = 3; /* j + 1; */
588
+ register int size = pq->size;
589
+ VALUE *heap = pq->heap;
590
+ VALUE node = heap[i]; /* save top node */
591
+
592
+ if ((k <= size) && (frt_pq_lt(pq->proc, heap[k], heap[j]))) {
593
+ j = k;
594
+ }
595
+
596
+ while ((j <= size) && frt_pq_lt(pq->proc, heap[j], node)) {
597
+ heap[i] = heap[j]; /* shift up child */
598
+ i = j;
599
+ j = i << 1;
600
+ k = j + 1;
601
+ if ((k <= size) && frt_pq_lt(pq->proc, heap[k], heap[j])) {
602
+ j = k;
603
+ }
604
+ }
605
+ heap[i] = node;
606
+ }
607
+
608
+ static void pq_push(PriQ *pq, VALUE elem)
609
+ {
610
+ pq->size++;
611
+ if (pq->size >= pq->mem_capa) {
612
+ pq->mem_capa <<= 1;
613
+ REALLOC_N(pq->heap, VALUE, pq->mem_capa);
614
+ }
615
+ pq->heap[pq->size] = elem;
616
+ pq_up(pq);
617
+ }
618
+
619
+ static VALUE cPriorityQueue;
620
+
621
+ static void
622
+ frt_pq_mark(void *p)
623
+ {
624
+ PriQ *pq = (PriQ *)p;
625
+ int i;
626
+ for (i = pq->size; i > 0; i--) {
627
+ rb_gc_mark_maybe(pq->heap[i]);
628
+ }
629
+ }
630
+
631
+ static void frt_pq_free(PriQ *pq)
632
+ {
633
+ free(pq->heap);
634
+ free(pq);
635
+ }
636
+
637
+ static VALUE
638
+ frt_pq_alloc(VALUE klass)
639
+ {
640
+ PriQ *pq = ALLOC_AND_ZERO(PriQ);
641
+ pq->capa = PQ_START_CAPA;
642
+ pq->mem_capa = PQ_START_CAPA;
643
+ pq->heap = ALLOC_N(VALUE, PQ_START_CAPA);
644
+ pq->proc = Qnil;
645
+ return Data_Wrap_Struct(klass, &frt_pq_mark, &frt_pq_free, pq);
646
+ }
647
+
648
+ #define GET_PQ(pq, self) Data_Get_Struct(self, PriQ, pq)
649
+ /*
650
+ * call-seq:
651
+ * PriorityQueue.new(capacity = 32) -> new_pq
652
+ * PriorityQueue.new({:capacity => 32,
653
+ * :less_than_proc => lambda{|a, b| a < b}) -> new_pq
654
+ * PriorityQueue.new({:capacity => 32}) {|a, b| a < b} -> new_pq
655
+ *
656
+ * Returns a new empty priority queue object with an optional capacity.
657
+ * Once the capacity is filled, the lowest valued elements will be
658
+ * automatically popped off the top of the queue as more elements are
659
+ * inserted into the queue.
660
+ */
661
+ static VALUE
662
+ frt_pq_init(int argc, VALUE *argv, VALUE self)
663
+ {
664
+ if (argc >= 1) {
665
+ PriQ *pq;
666
+ VALUE options = argv[0];
667
+ VALUE param;
668
+ int capa = PQ_START_CAPA;
669
+ GET_PQ(pq, self);
670
+ switch (TYPE(options)) {
671
+ case T_FIXNUM:
672
+ capa = FIX2INT(options);
673
+ break;
674
+ case T_HASH:
675
+ if (!NIL_P(param = rb_hash_aref(options,
676
+ ID2SYM(id_capacity)))) {
677
+ capa = FIX2INT(param);
678
+ }
679
+ if (!NIL_P(param = rb_hash_aref(options,
680
+ ID2SYM(id_less_than)))) {
681
+ pq->proc = param;
682
+ }
683
+ break;
684
+ default:
685
+ rb_raise(rb_eArgError,
686
+ "PriorityQueue#initialize only takes a Hash or "
687
+ "an integer");
688
+
689
+ break;
690
+ }
691
+ if (capa < 0) {
692
+ rb_raise(rb_eIndexError,
693
+ "PriorityQueue must have a capacity > 0. %d < 0",
694
+ index);
695
+ }
696
+ pq->capa = capa;
697
+ if (rb_block_given_p()) {
698
+ pq->proc = rb_block_proc();
699
+ }
700
+ if (argc > 1) {
701
+ rb_raise(rb_eArgError,
702
+ "PriorityQueue#initialize only takes one parameter");
703
+ }
704
+ }
705
+
706
+ return self;
707
+ }
708
+
709
+ /*
710
+ * call-seq:
711
+ * pq.clone -> pq_clone
712
+ *
713
+ * Returns a shallow clone of the priority queue. That is only the priority
714
+ * queue is cloned, its contents are not cloned.
715
+ */
716
+ static VALUE
717
+ frt_pq_clone(VALUE self)
718
+ {
719
+ PriQ *pq, *new_pq = ALLOC(PriQ);
720
+ GET_PQ(pq, self);
721
+ memcpy(new_pq, pq, sizeof(PriQ));
722
+ new_pq->heap = ALLOC_N(VALUE, new_pq->mem_capa);
723
+ memcpy(new_pq->heap, pq->heap, sizeof(VALUE) * (new_pq->size + 1));
724
+
725
+ return Data_Wrap_Struct(cPriorityQueue, &frt_pq_mark, &frt_pq_free, new_pq);
726
+ }
727
+
728
+ /*
729
+ * call-seq:
730
+ * pq.clear -> self
731
+ *
732
+ * Clears all elements from the priority queue. The size will be reset to 0.
733
+ */
734
+ static VALUE
735
+ frt_pq_clear(VALUE self)
736
+ {
737
+ PriQ *pq;
738
+ GET_PQ(pq, self);
739
+ pq->size = 0;
740
+ return self;
741
+ }
742
+
743
+ /*
744
+ * call-seq:
745
+ * pq.insert(elem) -> self
746
+ * pq << elem -> self
747
+ *
748
+ * Insert an element into a queue. It will be inserted into the correct
749
+ * position in the queue according to its priority.
750
+ */
751
+ static VALUE
752
+ frt_pq_insert(VALUE self, VALUE elem)
753
+ {
754
+ PriQ *pq;
755
+ GET_PQ(pq, self);
756
+ if (pq->size < pq->capa) {
757
+ pq_push(pq, elem);
758
+ }
759
+ else if (pq->size > 0 && frt_pq_lt(pq->proc, pq->heap[1], elem)) {
760
+ pq->heap[1] = elem;
761
+ pq_down(pq);
762
+ }
763
+ /* else ignore the element */
764
+ return self;
765
+ }
766
+
767
+ /*
768
+ * call-seq:
769
+ * pq.adjust -> self
770
+ *
771
+ * Sometimes you modify the top element in the priority queue so that its
772
+ * priority changes. When you do this you need to reorder the queue and you
773
+ * do this by calling the adjust method.
774
+ */
775
+ static VALUE
776
+ frt_pq_adjust(VALUE self)
777
+ {
778
+ PriQ *pq;
779
+ GET_PQ(pq, self);
780
+ pq_down(pq);
781
+ return self;
782
+ }
783
+
784
+ /*
785
+ * call-seq:
786
+ * pq.top -> elem
787
+ *
788
+ * Returns the top element in the queue but does not remove it from the
789
+ * queue.
790
+ */
791
+ static VALUE
792
+ frt_pq_top(VALUE self)
793
+ {
794
+ PriQ *pq;
795
+ GET_PQ(pq, self);
796
+ return (pq->size > 0) ? pq->heap[1] : Qnil;
797
+ }
798
+
799
+ /*
800
+ * call-seq:
801
+ * pq.pop -> elem
802
+ *
803
+ * Returns the top element in the queue removing it from the queue.
804
+ */
805
+ static VALUE
806
+ frt_pq_pop(VALUE self)
807
+ {
808
+ PriQ *pq;
809
+ GET_PQ(pq, self);
810
+ if (pq->size > 0) {
811
+ VALUE result = pq->heap[1]; /* save first value */
812
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
813
+ pq->heap[pq->size] = Qnil;
814
+ pq->size--;
815
+ pq_down(pq); /* adjust heap */
816
+ return result;
817
+ }
818
+ else {
819
+ return Qnil;
820
+ }
821
+ }
822
+
823
+ /*
824
+ * call-seq:
825
+ * pq.size -> integer
826
+ *
827
+ * Returns the size of the queue, ie. the number of elements currently stored
828
+ * in the queue. The _size_ of a PriorityQueue can never be greater than
829
+ * its _capacity_
830
+ */
831
+ static VALUE
832
+ frt_pq_size(VALUE self)
833
+ {
834
+ PriQ *pq;
835
+ GET_PQ(pq, self);
836
+ return INT2FIX(pq->size);
837
+ }
838
+
839
+ /*
840
+ * call-seq:
841
+ * pq.capacity -> integer
842
+ *
843
+ * Returns the capacity of the queue, ie. the number of elements that can be
844
+ * stored in a Priority queue before they start to drop off the end. The
845
+ * _size_ of a PriorityQueue can never be greater than its
846
+ * _capacity_
847
+ */
848
+ static VALUE
849
+ frt_pq_capa(VALUE self)
850
+ {
851
+ PriQ *pq;
852
+ GET_PQ(pq, self);
853
+ return INT2FIX(pq->capa);
854
+ }
855
+
856
+ /*
857
+ * Document-class: Ferret::Utils::PriorityQueue
858
+ *
859
+ * == Summary
860
+ *
861
+ * A PriorityQueue is a very useful data structure and one that needs a fast
862
+ * implementation. Hence this priority queue is implemented in C. It is
863
+ * pretty easy to use; basically you just insert elements into the queue and
864
+ * pop them off.
865
+ *
866
+ * The elements are sorted with the lowest valued elements on the top of
867
+ * the heap, ie the first to be popped off. Elements are ordered using the
868
+ * less_than '<' method. To change the order of the queue you can either
869
+ * reimplement the '<' method pass a block when you initialize the queue.
870
+ *
871
+ * You can also set the capacity of the PriorityQueue. Once you hit the
872
+ * capacity, the lowest values elements are automatically popped of the top
873
+ * of the queue as more elements are added.
874
+ *
875
+ * == Example
876
+ *
877
+ * Here is a toy example that sorts strings by their lenth and has a capicity
878
+ * of 5;
879
+ *
880
+ * q = PriorityQueue.new(5) {|a, b| a.size < b.size}
881
+ * q << "x"
882
+ * q << "xxxxx"
883
+ * q << "xxx"
884
+ * q << "xxxx"
885
+ * q << "xxxxxx"
886
+ * q << "xx" # hit capacity so "x" will be popped off the top
887
+ *
888
+ * puts q.size #=> 5
889
+ * word = q.pop #=> "xx"
890
+ * q.top << "yyyy" # "xxxyyyy" will still be at the top of the queue
891
+ * q.adjust # move "xxxyyyy" to its correct location in queue
892
+ * word = q.pop #=> "xxxx"
893
+ * word = q.pop #=> "xxxxx"
894
+ * word = q.pop #=> "xxxxxx"
895
+ * word = q.pop #=> "xxxyyyy"
896
+ * word = q.pop #=> nil
897
+ */
898
+ static void
899
+ Init_PriorityQueue(void)
900
+ {
901
+ /* PriorityQueue */
902
+ cPriorityQueue = rb_define_class_under(mUtils, "PriorityQueue", rb_cObject);
903
+ rb_define_alloc_func(cPriorityQueue, frt_pq_alloc);
904
+
905
+ rb_define_method(cPriorityQueue, "initialize", frt_pq_init, -1);
906
+ rb_define_method(cPriorityQueue, "clone", frt_pq_clone, 0);
907
+ rb_define_method(cPriorityQueue, "clear", frt_pq_clear, 0);
908
+ rb_define_method(cPriorityQueue, "insert", frt_pq_insert, 1);
909
+ rb_define_method(cPriorityQueue, "<<", frt_pq_insert, 1);
910
+ rb_define_method(cPriorityQueue, "top", frt_pq_top, 0);
911
+ rb_define_method(cPriorityQueue, "pop", frt_pq_pop, 0);
912
+ rb_define_method(cPriorityQueue, "size", frt_pq_size, 0);
913
+ rb_define_method(cPriorityQueue, "capacity", frt_pq_capa, 0);
914
+ rb_define_method(cPriorityQueue, "adjust", frt_pq_adjust, 0);
915
+ }
916
+
917
+ /* rdoc hack
918
+ extern VALUE mFerret = rb_define_module("Ferret");
919
+ */
920
+
921
+ /*
922
+ * Document-module: Ferret::Utils
923
+ *
924
+ * The Utils module contains a number of helper classes and modules that are
925
+ * useful when indexing with Ferret. They are;
926
+ *
927
+ * * BitVector
928
+ * * PriorityQueue
929
+ * * => more to come
930
+ *
931
+ * These helper classes could also be quite useful outside of Ferret and may
932
+ * one day find themselves in their own separate library.
933
+ */
934
+ void
935
+ Init_Utils(void)
936
+ {
937
+ mUtils = rb_define_module_under(mFerret, "Utils");
938
+
939
+ Init_BitVector();
940
+ Init_PriorityQueue();
941
+ }