ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/r_utils.c ADDED
@@ -0,0 +1,941 @@
1
+ #include "ferret.h"
2
+ #include "bitvector.h"
3
+
4
+ /*****************
5
+ *** BitVector ***
6
+ *****************/
7
+ static VALUE cBitVector;
8
+
9
+ static VALUE
10
+ frt_bv_alloc(VALUE klass)
11
+ {
12
+ return Data_Wrap_Struct(klass, NULL, &bv_destroy, bv_new());
13
+ }
14
+
15
+ #define GET_BV(bv, self) Data_Get_Struct(self, BitVector, bv)
16
+
17
+ /*
18
+ * call-seq:
19
+ * BitVector.new() -> new_bv
20
+ *
21
+ * Returns a new empty bit-vector object
22
+ */
23
+ static VALUE
24
+ frt_bv_init(VALUE self)
25
+ {
26
+ return self;
27
+ }
28
+
29
+ /*
30
+ * call-seq:
31
+ * bv[i] = bool -> bool
32
+ *
33
+ * Set the bit and _i_ to *val* (+true+ or
34
+ * +false+).
35
+ */
36
+ VALUE
37
+ frt_bv_set(VALUE self, VALUE rindex, VALUE rstate)
38
+ {
39
+ BitVector *bv;
40
+ int index = FIX2INT(rindex);
41
+ GET_BV(bv, self);
42
+ if (index < 0) {
43
+ rb_raise(rb_eIndexError, "%d < 0", index);
44
+ }
45
+ if (RTEST(rstate)) {
46
+ bv_set(bv, index);
47
+ }
48
+ else {
49
+ bv_unset(bv, index);
50
+ }
51
+
52
+ return rstate;
53
+ }
54
+
55
+ /*
56
+ * call-seq:
57
+ * bv.set(i) -> self
58
+ *
59
+ * Set the bit at _i_ to *on* (+true+)
60
+ */
61
+ VALUE
62
+ frt_bv_set_on(VALUE self, VALUE rindex)
63
+ {
64
+ frt_bv_set(self, rindex, Qtrue);
65
+ return self;
66
+ }
67
+
68
+ /*
69
+ * call-seq:
70
+ * bv.unset(i) -> self
71
+ *
72
+ * Set the bit at _i_ to *off* (+false+)
73
+ */
74
+ VALUE
75
+ frt_bv_set_off(VALUE self, VALUE rindex)
76
+ {
77
+ frt_bv_set(self, rindex, Qfalse);
78
+ return self;
79
+ }
80
+
81
+ /*
82
+ * call-seq:
83
+ * bv.get(i) -> bool
84
+ * bv[i] -> bool
85
+ *
86
+ * Get the bit value at _i_
87
+ */
88
+ VALUE
89
+ frt_bv_get(VALUE self, VALUE rindex)
90
+ {
91
+ BitVector *bv;
92
+ int index = FIX2INT(rindex);
93
+ GET_BV(bv, self);
94
+ if (index < 0) {
95
+ rb_raise(rb_eIndexError, "%d < 0", index);
96
+ }
97
+
98
+ return bv_get(bv, index) ? Qtrue : Qfalse;
99
+ }
100
+
101
+ /*
102
+ * call-seq:
103
+ * bv.count -> bit_count
104
+ *
105
+ * Count the number of bits set in the bit-vector. If the bit-vector has been
106
+ * negated using +#not+ then count the number of unset bits
107
+ * instead.
108
+ */
109
+ VALUE
110
+ frt_bv_count(VALUE self)
111
+ {
112
+ BitVector *bv;
113
+ GET_BV(bv, self);
114
+ return INT2FIX(bv->count);
115
+ }
116
+
117
+ /*
118
+ * call-seq:
119
+ * bv.clear -> self
120
+ *
121
+ * Clears all set bits in the bit-vector. Negated bit-vectors will still have
122
+ * all bits set to *off*.
123
+ */
124
+ VALUE
125
+ frt_bv_clear(VALUE self)
126
+ {
127
+ BitVector *bv;
128
+ GET_BV(bv, self);
129
+ bv_clear(bv);
130
+ bv_scan_reset(bv);
131
+ return self;
132
+ }
133
+
134
+ /*
135
+ * call-seq:
136
+ * bv1 == bv2 -> bool
137
+ * bv1 != bv2 -> bool
138
+ * bv1.eql(bv2) -> bool
139
+ *
140
+ * Compares two bit vectors and returns true if both bitvectors have the same
141
+ * bits set.
142
+ */
143
+ VALUE
144
+ frt_bv_eql(VALUE self, VALUE other)
145
+ {
146
+ BitVector *bv1, *bv2;
147
+ GET_BV(bv1, self);
148
+ GET_BV(bv2, other);
149
+ return bv_eq(bv1, bv2) ? Qtrue : Qfalse;
150
+ }
151
+
152
+ /*
153
+ * call-seq:
154
+ * bv.hash -> int
155
+ *
156
+ * Used to store bit vectors in Hashes. Especially useful if you want to
157
+ * cache them.
158
+ */
159
+ VALUE
160
+ frt_bv_hash(VALUE self)
161
+ {
162
+ BitVector *bv;
163
+ GET_BV(bv, self);
164
+ return LONG2NUM(bv_hash(bv));
165
+ }
166
+
167
+ /*
168
+ * call-seq:
169
+ * bv1 & bv2 -> anded_bv
170
+ * bv1.and(bv2) -> anded_bv
171
+ *
172
+ * Perform a boolean _and_ operation on +bv1+ and
173
+ * +bv2+
174
+ */
175
+ VALUE
176
+ frt_bv_and(VALUE self, VALUE other)
177
+ {
178
+ BitVector *bv1, *bv2;
179
+ GET_BV(bv1, self);
180
+ GET_BV(bv2, other);
181
+ return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_and(bv1, bv2));
182
+ }
183
+
184
+ /*
185
+ * call-seq:
186
+ * bv1.and!(bv2) -> self
187
+ *
188
+ * Perform a boolean _and_ operation on +bv1+ and
189
+ * +bv2+ in place on +bv1+
190
+ */
191
+ VALUE
192
+ frt_bv_and_x(VALUE self, VALUE other)
193
+ {
194
+ BitVector *bv1, *bv2;
195
+ GET_BV(bv1, self);
196
+ GET_BV(bv2, other);
197
+ bv_and_x(bv1, bv2);
198
+ return self;
199
+ }
200
+
201
+ /*
202
+ * call-seq:
203
+ * bv1 | bv2 -> ored_bv
204
+ * bv1.or(bv2) -> ored_bv
205
+ *
206
+ * Perform a boolean _or_ operation on +bv1+ and
207
+ * +bv2+
208
+ */
209
+ VALUE
210
+ frt_bv_or(VALUE self, VALUE other)
211
+ {
212
+ BitVector *bv1, *bv2;
213
+ GET_BV(bv1, self);
214
+ GET_BV(bv2, other);
215
+ return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_or(bv1, bv2));
216
+ }
217
+
218
+ /*
219
+ * call-seq:
220
+ * bv1.or!(bv2) -> self
221
+ *
222
+ * Perform a boolean _or_ operation on +bv1+ and
223
+ * +bv2+ in place on +bv1+
224
+ */
225
+ VALUE
226
+ frt_bv_or_x(VALUE self, VALUE other)
227
+ {
228
+ BitVector *bv1, *bv2;
229
+ GET_BV(bv1, self);
230
+ GET_BV(bv2, other);
231
+ bv_or_x(bv1, bv2);
232
+ return self;
233
+ }
234
+
235
+ /*
236
+ * call-seq:
237
+ * bv1 ^ bv2 -> xored_bv
238
+ * bv1.xor(bv2) -> xored_bv
239
+ *
240
+ * Perform a boolean _xor_ operation on +bv1+ and
241
+ * +bv2+
242
+ */
243
+ VALUE
244
+ frt_bv_xor(VALUE self, VALUE other)
245
+ {
246
+ BitVector *bv1, *bv2;
247
+ GET_BV(bv1, self);
248
+ GET_BV(bv2, other);
249
+ return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_xor(bv1, bv2));
250
+ }
251
+
252
+ /*
253
+ * call-seq:
254
+ * bv1.xor!(bv2) -> self
255
+ *
256
+ * Perform a boolean _xor_ operation on +bv1+ and
257
+ * +bv2+ in place on +bv1+
258
+ */
259
+ VALUE
260
+ frt_bv_xor_x(VALUE self, VALUE other)
261
+ {
262
+ BitVector *bv1, *bv2;
263
+ GET_BV(bv1, self);
264
+ GET_BV(bv2, other);
265
+ bv_xor_x(bv1, bv2);
266
+ return self;
267
+ }
268
+
269
+ /*
270
+ * call-seq:
271
+ * ~bv -> bv
272
+ * bv.not -> bv
273
+ *
274
+ * Perform a boolean _not_ operation on +bv+
275
+ * */
276
+ VALUE
277
+ frt_bv_not(VALUE self)
278
+ {
279
+ BitVector *bv;
280
+ GET_BV(bv, self);
281
+ return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_not(bv));
282
+ }
283
+
284
+ /*
285
+ * call-seq:
286
+ * bv.not! -> self
287
+ *
288
+ * Perform a boolean _not_ operation on +bv+ in-place
289
+ */
290
+ VALUE
291
+ frt_bv_not_x(VALUE self)
292
+ {
293
+ BitVector *bv;
294
+ GET_BV(bv, self);
295
+ bv_not_x(bv);
296
+ return self;
297
+ }
298
+
299
+ /*
300
+ * call-seq:
301
+ * bv.reset_scan -> self
302
+ *
303
+ * Resets the BitVector ready for scanning. You should call this method
304
+ * before calling +#next+ or +#next_unset+. It isn't
305
+ * necessary for the other scan methods or for the +#each+ method.
306
+ */
307
+ VALUE
308
+ frt_bv_reset_scan(VALUE self)
309
+ {
310
+ BitVector *bv;
311
+ GET_BV(bv, self);
312
+ bv_scan_reset(bv);
313
+ return self;
314
+ }
315
+
316
+ /*
317
+ * call-seq:
318
+ * bv.next -> bit_num
319
+ *
320
+ * Returns the next set bit in the bit-vector scanning from low order to high
321
+ * order. You should call +#reset_scan+ before calling this method
322
+ * if you want to scan from the beginning. It is automatically reset when you
323
+ * first create the bit-vector.
324
+ */
325
+ VALUE
326
+ frt_bv_next(VALUE self)
327
+ {
328
+ BitVector *bv;
329
+ GET_BV(bv, self);
330
+ return INT2FIX(bv_scan_next(bv));
331
+ }
332
+
333
+ /*
334
+ * call-seq:
335
+ * bv.next_unset -> bit_num
336
+ *
337
+ * Returns the next unset bit in the bit-vector scanning from low order to
338
+ * high order. This method should only be called on bit-vectors which have
339
+ * been flipped (negated). You should call +#reset_scan+ before
340
+ * calling this method if you want to scan from the beginning. It is
341
+ * automatically reset when you first create the bit-vector.
342
+ */
343
+ VALUE
344
+ frt_bv_next_unset(VALUE self)
345
+ {
346
+ BitVector *bv;
347
+ GET_BV(bv, self);
348
+ return INT2FIX(bv_scan_next_unset(bv));
349
+ }
350
+
351
+ /*
352
+ * call-seq:
353
+ * bv.next_from(from) -> bit_num
354
+ *
355
+ * Returns the next set bit in the bit-vector scanning from low order to
356
+ * high order and starting at +from+. The scan is inclusive so if
357
+ * +from+ is equal to 10 and +bv[10]+ is set it will
358
+ * return the number 10. If the bit-vector has been negated than you should
359
+ * use the +#next_unset_from+ method.
360
+ */
361
+ VALUE
362
+ frt_bv_next_from(VALUE self, VALUE rfrom)
363
+ {
364
+ BitVector *bv;
365
+ int from = FIX2INT(rfrom);
366
+ GET_BV(bv, self);
367
+ if (from < 0) {
368
+ from = 0;
369
+ }
370
+ return INT2FIX(bv_scan_next_from(bv, from));
371
+ }
372
+
373
+ /*
374
+ * call-seq:
375
+ * bv.next_unset_from(from) -> bit_num
376
+ *
377
+ * Returns the next unset bit in the bit-vector scanning from low order to
378
+ * high order and starting at +from+. The scan is inclusive so if
379
+ * +from+ is equal to 10 and +bv[10]+ is unset it will
380
+ * return the number 10. If the bit-vector has not been negated than you
381
+ * should use the +#next_from+ method.
382
+ */
383
+ VALUE
384
+ frt_bv_next_unset_from(VALUE self, VALUE rfrom)
385
+ {
386
+ BitVector *bv;
387
+ int from = FIX2INT(rfrom);
388
+ GET_BV(bv, self);
389
+ if (from < 0) {
390
+ from = 0;
391
+ }
392
+ return INT2FIX(bv_scan_next_unset_from(bv, from));
393
+ }
394
+
395
+ /*
396
+ * call-seq:
397
+ * bv.each { |bit_num| }
398
+ *
399
+ * Iterate through all the set bits in the bit-vector yeilding each one in
400
+ * order
401
+ */
402
+ VALUE
403
+ frt_bv_each(VALUE self)
404
+ {
405
+ BitVector *bv;
406
+ int bit;
407
+ GET_BV(bv, self);
408
+ bv_scan_reset(bv);
409
+ if (bv->extends_as_ones) {
410
+ while ((bit = bv_scan_next_unset(bv)) >= 0) {
411
+ rb_yield(INT2FIX(bit));
412
+ }
413
+ }
414
+ else {
415
+ while ((bit = bv_scan_next(bv)) >= 0) {
416
+ rb_yield(INT2FIX(bit));
417
+ }
418
+ }
419
+ return self;
420
+ }
421
+
422
+ /*
423
+ * call-seq:
424
+ * bv.to_a
425
+ *
426
+ * Iterate through all the set bits in the bit-vector adding the index of
427
+ * each set bit to an array. This is useful if you want to perform array
428
+ * methods on the bit-vecter. If you want to convert an array to a bit_vector
429
+ * simply do this;
430
+ *
431
+ * bv = [1, 12, 45, 367, 455].inject(BitVector.new) {|bv, i| bv.set(i)}
432
+ */
433
+ VALUE
434
+ frt_bv_to_a(VALUE self)
435
+ {
436
+ BitVector *bv;
437
+ int bit;
438
+ VALUE ary;
439
+ GET_BV(bv, self);
440
+ ary = rb_ary_new();
441
+ bv_scan_reset(bv);
442
+ if (bv->extends_as_ones) {
443
+ while ((bit = bv_scan_next_unset(bv)) >= 0) {
444
+ rb_ary_push(ary, INT2FIX(bit));
445
+ }
446
+ }
447
+ else {
448
+ while ((bit = bv_scan_next(bv)) >= 0) {
449
+ rb_ary_push(ary, INT2FIX(bit));
450
+ }
451
+ }
452
+ return ary;
453
+ }
454
+
455
+ static VALUE mUtils;
456
+
457
+ /*
458
+ * Document-class: Ferret::Utils::BitVector
459
+ *
460
+ * == Summary
461
+ *
462
+ * A BitVector is pretty easy to implement in Ruby using Ruby's BigNum class.
463
+ * This BitVector however allows you to count the set bits with the
464
+ * +#count+ method (or unset bits of flipped bit vectors) and also
465
+ * to quickly scan the set bits.
466
+ *
467
+ * == Boolean Operations
468
+ *
469
+ * BitVector handles four boolean operations;
470
+ *
471
+ * * +&+
472
+ * * +|+
473
+ * * +^+
474
+ * * +~+
475
+ *
476
+ * bv1 = BitVector.new
477
+ * bv2 = BitVector.new
478
+ * bv3 = BitVector.new
479
+ *
480
+ * bv4 = (bv1 & bv2) | ~bv3
481
+ *
482
+ * You can also do the operations in-place;
483
+ *
484
+ * * +and!+
485
+ * * +or!+
486
+ * * +xor!+
487
+ * * +not!+
488
+ *
489
+ * bv4.and!(bv5).not!
490
+ *
491
+ * == Set Bit Scanning
492
+ *
493
+ * Perhaps the most useful functionality in BitVector is the ability to
494
+ * quickly scan for set bits. To print all set bits;
495
+ *
496
+ * bv.each {|bit| puts bit }
497
+ *
498
+ * Alternatively you could use the lower level +next+ or
499
+ * +next_unset+ methods. Note that the +each+ method will
500
+ * automatically scan unset bits if the BitVector has been flipped (using
501
+ * +not+).
502
+ */
503
+ static void
504
+ Init_BitVector(void)
505
+ {
506
+ /* BitVector */
507
+ cBitVector = rb_define_class_under(mUtils, "BitVector", rb_cObject);
508
+ rb_define_alloc_func(cBitVector, frt_bv_alloc);
509
+
510
+ rb_define_method(cBitVector, "initialize", frt_bv_init, 0);
511
+ rb_define_method(cBitVector, "set", frt_bv_set_on, 1);
512
+ rb_define_method(cBitVector, "unset", frt_bv_set_off, 1);
513
+ rb_define_method(cBitVector, "[]=", frt_bv_set, 2);
514
+ rb_define_method(cBitVector, "get", frt_bv_get, 1);
515
+ rb_define_method(cBitVector, "[]", frt_bv_get, 1);
516
+ rb_define_method(cBitVector, "count", frt_bv_count, 0);
517
+ rb_define_method(cBitVector, "clear", frt_bv_clear, 0);
518
+ rb_define_method(cBitVector, "eql?", frt_bv_eql, 1);
519
+ rb_define_method(cBitVector, "==", frt_bv_eql, 1);
520
+ rb_define_method(cBitVector, "hash", frt_bv_hash, 0);
521
+ rb_define_method(cBitVector, "and!", frt_bv_and_x, 1);
522
+ rb_define_method(cBitVector, "and", frt_bv_and, 1);
523
+ rb_define_method(cBitVector, "&", frt_bv_and, 1);
524
+ rb_define_method(cBitVector, "or!", frt_bv_or_x, 1);
525
+ rb_define_method(cBitVector, "or", frt_bv_or, 1);
526
+ rb_define_method(cBitVector, "|", frt_bv_or, 1);
527
+ rb_define_method(cBitVector, "xor!", frt_bv_xor_x, 1);
528
+ rb_define_method(cBitVector, "xor", frt_bv_xor, 1);
529
+ rb_define_method(cBitVector, "^", frt_bv_xor, 1);
530
+ rb_define_method(cBitVector, "not!", frt_bv_not_x, 0);
531
+ rb_define_method(cBitVector, "not", frt_bv_not, 0);
532
+ rb_define_method(cBitVector, "~", frt_bv_not, 0);
533
+ rb_define_method(cBitVector, "reset_scan", frt_bv_reset_scan, 0);
534
+ rb_define_method(cBitVector, "next", frt_bv_next, 0);
535
+ rb_define_method(cBitVector, "next_unset", frt_bv_next_unset, 0);
536
+ rb_define_method(cBitVector, "next_from", frt_bv_next_from, 1);
537
+ rb_define_method(cBitVector, "next_unset_from", frt_bv_next_unset_from, 1);
538
+ rb_define_method(cBitVector, "each", frt_bv_each, 0);
539
+ rb_define_method(cBitVector, "to_a", frt_bv_to_a, 0);
540
+ }
541
+
542
+ /*********************
543
+ *** PriorityQueue ***
544
+ *********************/
545
+ typedef struct PriQ
546
+ {
547
+ int size;
548
+ int capa;
549
+ int mem_capa;
550
+ VALUE *heap;
551
+ VALUE proc;
552
+ } PriQ;
553
+
554
+ #define PQ_START_CAPA 32
555
+
556
+ static bool frt_pq_lt(VALUE proc, VALUE v1, VALUE v2)
557
+ {
558
+ if (proc == Qnil) {
559
+ return RTEST(rb_funcall(v1, id_lt, 1, v2));
560
+ }
561
+ else {
562
+ return RTEST(rb_funcall(proc, id_call, 2, v1, v2));
563
+ }
564
+ }
565
+
566
+ static void pq_up(PriQ *pq)
567
+ {
568
+ VALUE *heap = pq->heap;
569
+ VALUE node;
570
+ int i = pq->size;
571
+ int j = i >> 1;
572
+
573
+ node = heap[i];
574
+
575
+ while ((j > 0) && frt_pq_lt(pq->proc, node, heap[j])) {
576
+ heap[i] = heap[j];
577
+ i = j;
578
+ j = j >> 1;
579
+ }
580
+ heap[i] = node;
581
+ }
582
+
583
+ static void pq_down(PriQ *pq)
584
+ {
585
+ register int i = 1;
586
+ register int j = 2; /* i << 1; */
587
+ register int k = 3; /* j + 1; */
588
+ register int size = pq->size;
589
+ VALUE *heap = pq->heap;
590
+ VALUE node = heap[i]; /* save top node */
591
+
592
+ if ((k <= size) && (frt_pq_lt(pq->proc, heap[k], heap[j]))) {
593
+ j = k;
594
+ }
595
+
596
+ while ((j <= size) && frt_pq_lt(pq->proc, heap[j], node)) {
597
+ heap[i] = heap[j]; /* shift up child */
598
+ i = j;
599
+ j = i << 1;
600
+ k = j + 1;
601
+ if ((k <= size) && frt_pq_lt(pq->proc, heap[k], heap[j])) {
602
+ j = k;
603
+ }
604
+ }
605
+ heap[i] = node;
606
+ }
607
+
608
+ static void pq_push(PriQ *pq, VALUE elem)
609
+ {
610
+ pq->size++;
611
+ if (pq->size >= pq->mem_capa) {
612
+ pq->mem_capa <<= 1;
613
+ REALLOC_N(pq->heap, VALUE, pq->mem_capa);
614
+ }
615
+ pq->heap[pq->size] = elem;
616
+ pq_up(pq);
617
+ }
618
+
619
+ static VALUE cPriorityQueue;
620
+
621
+ static void
622
+ frt_pq_mark(void *p)
623
+ {
624
+ PriQ *pq = (PriQ *)p;
625
+ int i;
626
+ for (i = pq->size; i > 0; i--) {
627
+ rb_gc_mark_maybe(pq->heap[i]);
628
+ }
629
+ }
630
+
631
+ static void frt_pq_free(PriQ *pq)
632
+ {
633
+ free(pq->heap);
634
+ free(pq);
635
+ }
636
+
637
+ static VALUE
638
+ frt_pq_alloc(VALUE klass)
639
+ {
640
+ PriQ *pq = ALLOC_AND_ZERO(PriQ);
641
+ pq->capa = PQ_START_CAPA;
642
+ pq->mem_capa = PQ_START_CAPA;
643
+ pq->heap = ALLOC_N(VALUE, PQ_START_CAPA);
644
+ pq->proc = Qnil;
645
+ return Data_Wrap_Struct(klass, &frt_pq_mark, &frt_pq_free, pq);
646
+ }
647
+
648
+ #define GET_PQ(pq, self) Data_Get_Struct(self, PriQ, pq)
649
+ /*
650
+ * call-seq:
651
+ * PriorityQueue.new(capacity = 32) -> new_pq
652
+ * PriorityQueue.new({:capacity => 32,
653
+ * :less_than_proc => lambda{|a, b| a < b}) -> new_pq
654
+ * PriorityQueue.new({:capacity => 32}) {|a, b| a < b} -> new_pq
655
+ *
656
+ * Returns a new empty priority queue object with an optional capacity.
657
+ * Once the capacity is filled, the lowest valued elements will be
658
+ * automatically popped off the top of the queue as more elements are
659
+ * inserted into the queue.
660
+ */
661
+ static VALUE
662
+ frt_pq_init(int argc, VALUE *argv, VALUE self)
663
+ {
664
+ if (argc >= 1) {
665
+ PriQ *pq;
666
+ VALUE options = argv[0];
667
+ VALUE param;
668
+ int capa = PQ_START_CAPA;
669
+ GET_PQ(pq, self);
670
+ switch (TYPE(options)) {
671
+ case T_FIXNUM:
672
+ capa = FIX2INT(options);
673
+ break;
674
+ case T_HASH:
675
+ if (!NIL_P(param = rb_hash_aref(options,
676
+ ID2SYM(id_capacity)))) {
677
+ capa = FIX2INT(param);
678
+ }
679
+ if (!NIL_P(param = rb_hash_aref(options,
680
+ ID2SYM(id_less_than)))) {
681
+ pq->proc = param;
682
+ }
683
+ break;
684
+ default:
685
+ rb_raise(rb_eArgError,
686
+ "PriorityQueue#initialize only takes a Hash or "
687
+ "an integer");
688
+
689
+ break;
690
+ }
691
+ if (capa < 0) {
692
+ rb_raise(rb_eIndexError,
693
+ "PriorityQueue must have a capacity > 0. %d < 0",
694
+ index);
695
+ }
696
+ pq->capa = capa;
697
+ if (rb_block_given_p()) {
698
+ pq->proc = rb_block_proc();
699
+ }
700
+ if (argc > 1) {
701
+ rb_raise(rb_eArgError,
702
+ "PriorityQueue#initialize only takes one parameter");
703
+ }
704
+ }
705
+
706
+ return self;
707
+ }
708
+
709
+ /*
710
+ * call-seq:
711
+ * pq.clone -> pq_clone
712
+ *
713
+ * Returns a shallow clone of the priority queue. That is only the priority
714
+ * queue is cloned, its contents are not cloned.
715
+ */
716
+ static VALUE
717
+ frt_pq_clone(VALUE self)
718
+ {
719
+ PriQ *pq, *new_pq = ALLOC(PriQ);
720
+ GET_PQ(pq, self);
721
+ memcpy(new_pq, pq, sizeof(PriQ));
722
+ new_pq->heap = ALLOC_N(VALUE, new_pq->mem_capa);
723
+ memcpy(new_pq->heap, pq->heap, sizeof(VALUE) * (new_pq->size + 1));
724
+
725
+ return Data_Wrap_Struct(cPriorityQueue, &frt_pq_mark, &frt_pq_free, new_pq);
726
+ }
727
+
728
+ /*
729
+ * call-seq:
730
+ * pq.clear -> self
731
+ *
732
+ * Clears all elements from the priority queue. The size will be reset to 0.
733
+ */
734
+ static VALUE
735
+ frt_pq_clear(VALUE self)
736
+ {
737
+ PriQ *pq;
738
+ GET_PQ(pq, self);
739
+ pq->size = 0;
740
+ return self;
741
+ }
742
+
743
+ /*
744
+ * call-seq:
745
+ * pq.insert(elem) -> self
746
+ * pq << elem -> self
747
+ *
748
+ * Insert an element into a queue. It will be inserted into the correct
749
+ * position in the queue according to its priority.
750
+ */
751
+ static VALUE
752
+ frt_pq_insert(VALUE self, VALUE elem)
753
+ {
754
+ PriQ *pq;
755
+ GET_PQ(pq, self);
756
+ if (pq->size < pq->capa) {
757
+ pq_push(pq, elem);
758
+ }
759
+ else if (pq->size > 0 && frt_pq_lt(pq->proc, pq->heap[1], elem)) {
760
+ pq->heap[1] = elem;
761
+ pq_down(pq);
762
+ }
763
+ /* else ignore the element */
764
+ return self;
765
+ }
766
+
767
+ /*
768
+ * call-seq:
769
+ * pq.adjust -> self
770
+ *
771
+ * Sometimes you modify the top element in the priority queue so that its
772
+ * priority changes. When you do this you need to reorder the queue and you
773
+ * do this by calling the adjust method.
774
+ */
775
+ static VALUE
776
+ frt_pq_adjust(VALUE self)
777
+ {
778
+ PriQ *pq;
779
+ GET_PQ(pq, self);
780
+ pq_down(pq);
781
+ return self;
782
+ }
783
+
784
+ /*
785
+ * call-seq:
786
+ * pq.top -> elem
787
+ *
788
+ * Returns the top element in the queue but does not remove it from the
789
+ * queue.
790
+ */
791
+ static VALUE
792
+ frt_pq_top(VALUE self)
793
+ {
794
+ PriQ *pq;
795
+ GET_PQ(pq, self);
796
+ return (pq->size > 0) ? pq->heap[1] : Qnil;
797
+ }
798
+
799
+ /*
800
+ * call-seq:
801
+ * pq.pop -> elem
802
+ *
803
+ * Returns the top element in the queue removing it from the queue.
804
+ */
805
+ static VALUE
806
+ frt_pq_pop(VALUE self)
807
+ {
808
+ PriQ *pq;
809
+ GET_PQ(pq, self);
810
+ if (pq->size > 0) {
811
+ VALUE result = pq->heap[1]; /* save first value */
812
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
813
+ pq->heap[pq->size] = Qnil;
814
+ pq->size--;
815
+ pq_down(pq); /* adjust heap */
816
+ return result;
817
+ }
818
+ else {
819
+ return Qnil;
820
+ }
821
+ }
822
+
823
+ /*
824
+ * call-seq:
825
+ * pq.size -> integer
826
+ *
827
+ * Returns the size of the queue, ie. the number of elements currently stored
828
+ * in the queue. The _size_ of a PriorityQueue can never be greater than
829
+ * its _capacity_
830
+ */
831
+ static VALUE
832
+ frt_pq_size(VALUE self)
833
+ {
834
+ PriQ *pq;
835
+ GET_PQ(pq, self);
836
+ return INT2FIX(pq->size);
837
+ }
838
+
839
+ /*
840
+ * call-seq:
841
+ * pq.capacity -> integer
842
+ *
843
+ * Returns the capacity of the queue, ie. the number of elements that can be
844
+ * stored in a Priority queue before they start to drop off the end. The
845
+ * _size_ of a PriorityQueue can never be greater than its
846
+ * _capacity_
847
+ */
848
+ static VALUE
849
+ frt_pq_capa(VALUE self)
850
+ {
851
+ PriQ *pq;
852
+ GET_PQ(pq, self);
853
+ return INT2FIX(pq->capa);
854
+ }
855
+
856
+ /*
857
+ * Document-class: Ferret::Utils::PriorityQueue
858
+ *
859
+ * == Summary
860
+ *
861
+ * A PriorityQueue is a very useful data structure and one that needs a fast
862
+ * implementation. Hence this priority queue is implemented in C. It is
863
+ * pretty easy to use; basically you just insert elements into the queue and
864
+ * pop them off.
865
+ *
866
+ * The elements are sorted with the lowest valued elements on the top of
867
+ * the heap, ie the first to be popped off. Elements are ordered using the
868
+ * less_than '<' method. To change the order of the queue you can either
869
+ * reimplement the '<' method pass a block when you initialize the queue.
870
+ *
871
+ * You can also set the capacity of the PriorityQueue. Once you hit the
872
+ * capacity, the lowest values elements are automatically popped of the top
873
+ * of the queue as more elements are added.
874
+ *
875
+ * == Example
876
+ *
877
+ * Here is a toy example that sorts strings by their lenth and has a capicity
878
+ * of 5;
879
+ *
880
+ * q = PriorityQueue.new(5) {|a, b| a.size < b.size}
881
+ * q << "x"
882
+ * q << "xxxxx"
883
+ * q << "xxx"
884
+ * q << "xxxx"
885
+ * q << "xxxxxx"
886
+ * q << "xx" # hit capacity so "x" will be popped off the top
887
+ *
888
+ * puts q.size #=> 5
889
+ * word = q.pop #=> "xx"
890
+ * q.top << "yyyy" # "xxxyyyy" will still be at the top of the queue
891
+ * q.adjust # move "xxxyyyy" to its correct location in queue
892
+ * word = q.pop #=> "xxxx"
893
+ * word = q.pop #=> "xxxxx"
894
+ * word = q.pop #=> "xxxxxx"
895
+ * word = q.pop #=> "xxxyyyy"
896
+ * word = q.pop #=> nil
897
+ */
898
+ static void
899
+ Init_PriorityQueue(void)
900
+ {
901
+ /* PriorityQueue */
902
+ cPriorityQueue = rb_define_class_under(mUtils, "PriorityQueue", rb_cObject);
903
+ rb_define_alloc_func(cPriorityQueue, frt_pq_alloc);
904
+
905
+ rb_define_method(cPriorityQueue, "initialize", frt_pq_init, -1);
906
+ rb_define_method(cPriorityQueue, "clone", frt_pq_clone, 0);
907
+ rb_define_method(cPriorityQueue, "clear", frt_pq_clear, 0);
908
+ rb_define_method(cPriorityQueue, "insert", frt_pq_insert, 1);
909
+ rb_define_method(cPriorityQueue, "<<", frt_pq_insert, 1);
910
+ rb_define_method(cPriorityQueue, "top", frt_pq_top, 0);
911
+ rb_define_method(cPriorityQueue, "pop", frt_pq_pop, 0);
912
+ rb_define_method(cPriorityQueue, "size", frt_pq_size, 0);
913
+ rb_define_method(cPriorityQueue, "capacity", frt_pq_capa, 0);
914
+ rb_define_method(cPriorityQueue, "adjust", frt_pq_adjust, 0);
915
+ }
916
+
917
+ /* rdoc hack
918
+ extern VALUE mFerret = rb_define_module("Ferret");
919
+ */
920
+
921
+ /*
922
+ * Document-module: Ferret::Utils
923
+ *
924
+ * The Utils module contains a number of helper classes and modules that are
925
+ * useful when indexing with Ferret. They are;
926
+ *
927
+ * * BitVector
928
+ * * PriorityQueue
929
+ * * => more to come
930
+ *
931
+ * These helper classes could also be quite useful outside of Ferret and may
932
+ * one day find themselves in their own separate library.
933
+ */
934
+ void
935
+ Init_Utils(void)
936
+ {
937
+ mUtils = rb_define_module_under(mFerret, "Utils");
938
+
939
+ Init_BitVector();
940
+ Init_PriorityQueue();
941
+ }