jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
data/ext/r_utils.c ADDED
@@ -0,0 +1,1131 @@
1
+ #include "ferret.h"
2
+ #include "bitvector.h"
3
+ #include "multimapper.h"
4
+ #ifdef FRT_RUBY_VERSION_1_9
5
+ # include <ruby/st.h>
6
+ #else
7
+ # include <st.h>
8
+ #endif
9
+
10
+ /*****************
11
+ *** BitVector ***
12
+ *****************/
13
+ static VALUE cBitVector;
14
+
15
+ static void
16
+ frb_bv_free(void *p)
17
+ {
18
+ object_del(p);
19
+ bv_destroy((BitVector *)p);
20
+ }
21
+
22
+ static VALUE
23
+ frb_bv_alloc(VALUE klass)
24
+ {
25
+ BitVector *bv = bv_new();
26
+ VALUE rbv = Data_Wrap_Struct(klass, NULL, &frb_bv_free, bv);
27
+ object_add(bv, rbv);
28
+ return rbv;
29
+ }
30
+
31
+ #define GET_BV(bv, self) Data_Get_Struct(self, BitVector, bv)
32
+
33
+ VALUE
34
+ frb_get_bv(BitVector *bv)
35
+ {
36
+ VALUE rbv;
37
+ if ((rbv = object_get(bv)) == Qnil) {
38
+ rbv = Data_Wrap_Struct(cBitVector, NULL, &frb_bv_free, bv);
39
+ REF(bv);
40
+ object_add(bv, rbv);
41
+ }
42
+ return rbv;
43
+ }
44
+
45
+ /*
46
+ * call-seq:
47
+ * BitVector.new() -> new_bit_vector
48
+ *
49
+ * Returns a new empty bit vector object
50
+ */
51
+ static VALUE
52
+ frb_bv_init(VALUE self)
53
+ {
54
+ return self;
55
+ }
56
+
57
+ /*
58
+ * call-seq:
59
+ * bv[i] = bool -> bool
60
+ *
61
+ * Set the bit and _i_ to *val* (+true+ or
62
+ * +false+).
63
+ */
64
+ VALUE
65
+ frb_bv_set(VALUE self, VALUE rindex, VALUE rstate)
66
+ {
67
+ BitVector *bv;
68
+ int index = FIX2INT(rindex);
69
+ GET_BV(bv, self);
70
+ if (index < 0) {
71
+ rb_raise(rb_eIndexError, "%d < 0", index);
72
+ }
73
+ if (RTEST(rstate)) {
74
+ bv_set(bv, index);
75
+ }
76
+ else {
77
+ bv_unset(bv, index);
78
+ }
79
+
80
+ return rstate;
81
+ }
82
+
83
+ /*
84
+ * call-seq:
85
+ * bv.set(i) -> self
86
+ *
87
+ * Set the bit at _i_ to *on* (+true+)
88
+ */
89
+ VALUE
90
+ frb_bv_set_on(VALUE self, VALUE rindex)
91
+ {
92
+ frb_bv_set(self, rindex, Qtrue);
93
+ return self;
94
+ }
95
+
96
+ /*
97
+ * call-seq:
98
+ * bv.unset(i) -> self
99
+ *
100
+ * Set the bit at _i_ to *off* (+false+)
101
+ */
102
+ VALUE
103
+ frb_bv_set_off(VALUE self, VALUE rindex)
104
+ {
105
+ frb_bv_set(self, rindex, Qfalse);
106
+ return self;
107
+ }
108
+
109
+ /*
110
+ * call-seq:
111
+ * bv.get(i) -> bool
112
+ * bv[i] -> bool
113
+ *
114
+ * Get the bit value at _i_
115
+ */
116
+ VALUE
117
+ frb_bv_get(VALUE self, VALUE rindex)
118
+ {
119
+ BitVector *bv;
120
+ int index = FIX2INT(rindex);
121
+ GET_BV(bv, self);
122
+ if (index < 0) {
123
+ rb_raise(rb_eIndexError, "%d < 0", index);
124
+ }
125
+
126
+ return bv_get(bv, index) ? Qtrue : Qfalse;
127
+ }
128
+
129
+ /*
130
+ * call-seq:
131
+ * bv.count -> bit_count
132
+ *
133
+ * Count the number of bits set in the bit vector. If the bit vector has been
134
+ * negated using +#not+ then count the number of unset bits
135
+ * instead.
136
+ */
137
+ VALUE
138
+ frb_bv_count(VALUE self)
139
+ {
140
+ BitVector *bv;
141
+ GET_BV(bv, self);
142
+ return INT2FIX(bv->count);
143
+ }
144
+
145
+ /*
146
+ * call-seq:
147
+ * bv.clear -> self
148
+ *
149
+ * Clears all set bits in the bit vector. Negated bit vectors will still have
150
+ * all bits set to *off*.
151
+ */
152
+ VALUE
153
+ frb_bv_clear(VALUE self)
154
+ {
155
+ BitVector *bv;
156
+ GET_BV(bv, self);
157
+ bv_clear(bv);
158
+ bv_scan_reset(bv);
159
+ return self;
160
+ }
161
+
162
+ /*
163
+ * call-seq:
164
+ * bv1 == bv2 -> bool
165
+ * bv1 != bv2 -> bool
166
+ * bv1.eql(bv2) -> bool
167
+ *
168
+ * Compares two bit vectors and returns true if both bit vectors have the same
169
+ * bits set.
170
+ */
171
+ VALUE
172
+ frb_bv_eql(VALUE self, VALUE other)
173
+ {
174
+ BitVector *bv1, *bv2;
175
+ GET_BV(bv1, self);
176
+ GET_BV(bv2, other);
177
+ return bv_eq(bv1, bv2) ? Qtrue : Qfalse;
178
+ }
179
+
180
+ /*
181
+ * call-seq:
182
+ * bv.hash -> int
183
+ *
184
+ * Used to store bit vectors in Hashes. Especially useful if you want to
185
+ * cache them.
186
+ */
187
+ VALUE
188
+ frb_bv_hash(VALUE self)
189
+ {
190
+ BitVector *bv;
191
+ GET_BV(bv, self);
192
+ return LONG2NUM(bv_hash(bv));
193
+ }
194
+
195
+ /*
196
+ * call-seq:
197
+ * bv1 & bv2 -> anded_bv
198
+ * bv1.and(bv2) -> anded_bv
199
+ *
200
+ * Perform a boolean _and_ operation on +bv1+ and
201
+ * +bv2+
202
+ */
203
+ VALUE
204
+ frb_bv_and(VALUE self, VALUE other)
205
+ {
206
+ BitVector *bv1, *bv2;
207
+ GET_BV(bv1, self);
208
+ GET_BV(bv2, other);
209
+ return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_and(bv1, bv2));
210
+ }
211
+
212
+ /*
213
+ * call-seq:
214
+ * bv1.and!(bv2) -> self
215
+ *
216
+ * Perform a boolean _and_ operation on +bv1+ and
217
+ * +bv2+ in place on +bv1+
218
+ */
219
+ VALUE
220
+ frb_bv_and_x(VALUE self, VALUE other)
221
+ {
222
+ BitVector *bv1, *bv2;
223
+ GET_BV(bv1, self);
224
+ GET_BV(bv2, other);
225
+ bv_and_x(bv1, bv2);
226
+ return self;
227
+ }
228
+
229
+ /*
230
+ * call-seq:
231
+ * bv1 | bv2 -> ored_bv
232
+ * bv1.or(bv2) -> ored_bv
233
+ *
234
+ * Perform a boolean _or_ operation on +bv1+ and
235
+ * +bv2+
236
+ */
237
+ VALUE
238
+ frb_bv_or(VALUE self, VALUE other)
239
+ {
240
+ BitVector *bv1, *bv2;
241
+ GET_BV(bv1, self);
242
+ GET_BV(bv2, other);
243
+ return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_or(bv1, bv2));
244
+ }
245
+
246
+ /*
247
+ * call-seq:
248
+ * bv1.or!(bv2) -> self
249
+ *
250
+ * Perform a boolean _or_ operation on +bv1+ and
251
+ * +bv2+ in place on +bv1+
252
+ */
253
+ VALUE
254
+ frb_bv_or_x(VALUE self, VALUE other)
255
+ {
256
+ BitVector *bv1, *bv2;
257
+ GET_BV(bv1, self);
258
+ GET_BV(bv2, other);
259
+ bv_or_x(bv1, bv2);
260
+ return self;
261
+ }
262
+
263
+ /*
264
+ * call-seq:
265
+ * bv1 ^ bv2 -> xored_bv
266
+ * bv1.xor(bv2) -> xored_bv
267
+ *
268
+ * Perform a boolean _xor_ operation on +bv1+ and
269
+ * +bv2+
270
+ */
271
+ VALUE
272
+ frb_bv_xor(VALUE self, VALUE other)
273
+ {
274
+ BitVector *bv1, *bv2;
275
+ GET_BV(bv1, self);
276
+ GET_BV(bv2, other);
277
+ return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_xor(bv1, bv2));
278
+ }
279
+
280
+ /*
281
+ * call-seq:
282
+ * bv1.xor!(bv2) -> self
283
+ *
284
+ * Perform a boolean _xor_ operation on +bv1+ and
285
+ * +bv2+ in place on +bv1+
286
+ */
287
+ VALUE
288
+ frb_bv_xor_x(VALUE self, VALUE other)
289
+ {
290
+ BitVector *bv1, *bv2;
291
+ GET_BV(bv1, self);
292
+ GET_BV(bv2, other);
293
+ bv_xor_x(bv1, bv2);
294
+ return self;
295
+ }
296
+
297
+ /*
298
+ * call-seq:
299
+ * ~bv -> bv
300
+ * bv.not -> bv
301
+ *
302
+ * Perform a boolean _not_ operation on +bv+
303
+ * */
304
+ VALUE
305
+ frb_bv_not(VALUE self)
306
+ {
307
+ BitVector *bv;
308
+ GET_BV(bv, self);
309
+ return Data_Wrap_Struct(cBitVector, NULL, &bv_destroy, bv_not(bv));
310
+ }
311
+
312
+ /*
313
+ * call-seq:
314
+ * bv.not! -> self
315
+ *
316
+ * Perform a boolean _not_ operation on +bv+ in-place
317
+ */
318
+ VALUE
319
+ frb_bv_not_x(VALUE self)
320
+ {
321
+ BitVector *bv;
322
+ GET_BV(bv, self);
323
+ bv_not_x(bv);
324
+ return self;
325
+ }
326
+
327
+ /*
328
+ * call-seq:
329
+ * bv.reset_scan -> self
330
+ *
331
+ * Resets the BitVector ready for scanning. You should call this method
332
+ * before calling +#next+ or +#next_unset+. It isn't
333
+ * necessary for the other scan methods or for the +#each+ method.
334
+ */
335
+ VALUE
336
+ frb_bv_reset_scan(VALUE self)
337
+ {
338
+ BitVector *bv;
339
+ GET_BV(bv, self);
340
+ bv_scan_reset(bv);
341
+ return self;
342
+ }
343
+
344
+ /*
345
+ * call-seq:
346
+ * bv.next -> bit_num
347
+ *
348
+ * Returns the next set bit in the bit vector scanning from low order to high
349
+ * order. You should call +#reset_scan+ before calling this method
350
+ * if you want to scan from the beginning. It is automatically reset when you
351
+ * first create the bit vector.
352
+ */
353
+ VALUE
354
+ frb_bv_next(VALUE self)
355
+ {
356
+ BitVector *bv;
357
+ GET_BV(bv, self);
358
+ return INT2FIX(bv_scan_next(bv));
359
+ }
360
+
361
+ /*
362
+ * call-seq:
363
+ * bv.next_unset -> bit_num
364
+ *
365
+ * Returns the next unset bit in the bit vector scanning from low order to
366
+ * high order. This method should only be called on bit vectors which have
367
+ * been flipped (negated). You should call +#reset_scan+ before
368
+ * calling this method if you want to scan from the beginning. It is
369
+ * automatically reset when you first create the bit vector.
370
+ */
371
+ VALUE
372
+ frb_bv_next_unset(VALUE self)
373
+ {
374
+ BitVector *bv;
375
+ GET_BV(bv, self);
376
+ return INT2FIX(bv_scan_next_unset(bv));
377
+ }
378
+
379
+ /*
380
+ * call-seq:
381
+ * bv.next_from(from) -> bit_num
382
+ *
383
+ * Returns the next set bit in the bit vector scanning from low order to
384
+ * high order and starting at +from+. The scan is inclusive so if
385
+ * +from+ is equal to 10 and +bv[10]+ is set it will
386
+ * return the number 10. If the bit vector has been negated than you should
387
+ * use the +#next_unset_from+ method.
388
+ */
389
+ VALUE
390
+ frb_bv_next_from(VALUE self, VALUE rfrom)
391
+ {
392
+ BitVector *bv;
393
+ int from = FIX2INT(rfrom);
394
+ GET_BV(bv, self);
395
+ if (from < 0) {
396
+ from = 0;
397
+ }
398
+ return INT2FIX(bv_scan_next_from(bv, from));
399
+ }
400
+
401
+ /*
402
+ * call-seq:
403
+ * bv.next_unset_from(from) -> bit_num
404
+ *
405
+ * Returns the next unset bit in the bit vector scanning from low order to
406
+ * high order and starting at +from+. The scan is inclusive so if
407
+ * +from+ is equal to 10 and +bv[10]+ is unset it will
408
+ * return the number 10. If the bit vector has not been negated than you
409
+ * should use the +#next_from+ method.
410
+ */
411
+ VALUE
412
+ frb_bv_next_unset_from(VALUE self, VALUE rfrom)
413
+ {
414
+ BitVector *bv;
415
+ int from = FIX2INT(rfrom);
416
+ GET_BV(bv, self);
417
+ if (from < 0) {
418
+ from = 0;
419
+ }
420
+ return INT2FIX(bv_scan_next_unset_from(bv, from));
421
+ }
422
+
423
+ /*
424
+ * call-seq:
425
+ * bv.each { |bit_num| }
426
+ *
427
+ * Iterate through all the set bits in the bit vector yielding each one in
428
+ * order
429
+ */
430
+ VALUE
431
+ frb_bv_each(VALUE self)
432
+ {
433
+ BitVector *bv;
434
+ int bit;
435
+ GET_BV(bv, self);
436
+ bv_scan_reset(bv);
437
+ if (bv->extends_as_ones) {
438
+ while ((bit = bv_scan_next_unset(bv)) >= 0) {
439
+ rb_yield(INT2FIX(bit));
440
+ }
441
+ }
442
+ else {
443
+ while ((bit = bv_scan_next(bv)) >= 0) {
444
+ rb_yield(INT2FIX(bit));
445
+ }
446
+ }
447
+ return self;
448
+ }
449
+
450
+ /*
451
+ * call-seq:
452
+ * bv.to_a
453
+ *
454
+ * Iterate through all the set bits in the bit vector adding the index of
455
+ * each set bit to an array. This is useful if you want to perform array
456
+ * methods on the bit vector. If you want to convert an array to a bit_vector
457
+ * simply do this;
458
+ *
459
+ * bv = [1, 12, 45, 367, 455].inject(BitVector.new) {|bv, i| bv.set(i)}
460
+ */
461
+ VALUE
462
+ frb_bv_to_a(VALUE self)
463
+ {
464
+ BitVector *bv;
465
+ int bit;
466
+ VALUE ary;
467
+ GET_BV(bv, self);
468
+ ary = rb_ary_new();
469
+ bv_scan_reset(bv);
470
+ if (bv->extends_as_ones) {
471
+ while ((bit = bv_scan_next_unset(bv)) >= 0) {
472
+ rb_ary_push(ary, INT2FIX(bit));
473
+ }
474
+ }
475
+ else {
476
+ while ((bit = bv_scan_next(bv)) >= 0) {
477
+ rb_ary_push(ary, INT2FIX(bit));
478
+ }
479
+ }
480
+ return ary;
481
+ }
482
+
483
+ static VALUE mUtils;
484
+
485
+ /*
486
+ * Document-class: Ferret::Utils::BitVector
487
+ *
488
+ * == Summary
489
+ *
490
+ * A BitVector is pretty easy to implement in Ruby using Ruby's BigNum class.
491
+ * This BitVector however allows you to count the set bits with the
492
+ * +#count+ method (or unset bits of flipped bit vectors) and also
493
+ * to quickly scan the set bits.
494
+ *
495
+ * == Boolean Operations
496
+ *
497
+ * BitVector handles four boolean operations;
498
+ *
499
+ * * +&+
500
+ * * +|+
501
+ * * +^+
502
+ * * +~+
503
+ *
504
+ * bv1 = BitVector.new
505
+ * bv2 = BitVector.new
506
+ * bv3 = BitVector.new
507
+ *
508
+ * bv4 = (bv1 & bv2) | ~bv3
509
+ *
510
+ * You can also do the operations in-place;
511
+ *
512
+ * * +and!+
513
+ * * +or!+
514
+ * * +xor!+
515
+ * * +not!+
516
+ *
517
+ * bv4.and!(bv5).not!
518
+ *
519
+ * == Set Bit Scanning
520
+ *
521
+ * Perhaps the most useful functionality in BitVector is the ability to
522
+ * quickly scan for set bits. To print all set bits;
523
+ *
524
+ * bv.each {|bit| puts bit }
525
+ *
526
+ * Alternatively you could use the lower level +next+ or
527
+ * +next_unset+ methods. Note that the +each+ method will
528
+ * automatically scan unset bits if the BitVector has been flipped (using
529
+ * +not+).
530
+ */
531
+ static void
532
+ Init_BitVector(void)
533
+ {
534
+ /* BitVector */
535
+ cBitVector = rb_define_class_under(mUtils, "BitVector", rb_cObject);
536
+ rb_define_alloc_func(cBitVector, frb_bv_alloc);
537
+
538
+ rb_define_method(cBitVector, "initialize", frb_bv_init, 0);
539
+ rb_define_method(cBitVector, "set", frb_bv_set_on, 1);
540
+ rb_define_method(cBitVector, "unset", frb_bv_set_off, 1);
541
+ rb_define_method(cBitVector, "[]=", frb_bv_set, 2);
542
+ rb_define_method(cBitVector, "get", frb_bv_get, 1);
543
+ rb_define_method(cBitVector, "[]", frb_bv_get, 1);
544
+ rb_define_method(cBitVector, "count", frb_bv_count, 0);
545
+ rb_define_method(cBitVector, "clear", frb_bv_clear, 0);
546
+ rb_define_method(cBitVector, "eql?", frb_bv_eql, 1);
547
+ rb_define_method(cBitVector, "==", frb_bv_eql, 1);
548
+ rb_define_method(cBitVector, "hash", frb_bv_hash, 0);
549
+ rb_define_method(cBitVector, "and!", frb_bv_and_x, 1);
550
+ rb_define_method(cBitVector, "and", frb_bv_and, 1);
551
+ rb_define_method(cBitVector, "&", frb_bv_and, 1);
552
+ rb_define_method(cBitVector, "or!", frb_bv_or_x, 1);
553
+ rb_define_method(cBitVector, "or", frb_bv_or, 1);
554
+ rb_define_method(cBitVector, "|", frb_bv_or, 1);
555
+ rb_define_method(cBitVector, "xor!", frb_bv_xor_x, 1);
556
+ rb_define_method(cBitVector, "xor", frb_bv_xor, 1);
557
+ rb_define_method(cBitVector, "^", frb_bv_xor, 1);
558
+ rb_define_method(cBitVector, "not!", frb_bv_not_x, 0);
559
+ rb_define_method(cBitVector, "not", frb_bv_not, 0);
560
+ rb_define_method(cBitVector, "~", frb_bv_not, 0);
561
+ rb_define_method(cBitVector, "reset_scan", frb_bv_reset_scan, 0);
562
+ rb_define_method(cBitVector, "next", frb_bv_next, 0);
563
+ rb_define_method(cBitVector, "next_unset", frb_bv_next_unset, 0);
564
+ rb_define_method(cBitVector, "next_from", frb_bv_next_from, 1);
565
+ rb_define_method(cBitVector, "next_unset_from", frb_bv_next_unset_from, 1);
566
+ rb_define_method(cBitVector, "each", frb_bv_each, 0);
567
+ rb_define_method(cBitVector, "to_a", frb_bv_to_a, 0);
568
+ }
569
+
570
+ /*******************
571
+ *** MultiMapper ***
572
+ *******************/
573
+ static VALUE cMultiMapper;
574
+
575
+ static void
576
+ frb_mulmap_free(void *p)
577
+ {
578
+ object_del(p);
579
+ mulmap_destroy((MultiMapper *)p);
580
+ }
581
+
582
+ static VALUE
583
+ frb_mulmap_alloc(VALUE klass)
584
+ {
585
+ MultiMapper *mulmap = mulmap_new();
586
+ VALUE rmulmap = Data_Wrap_Struct(klass, NULL, &frb_mulmap_free, mulmap);
587
+ object_add(mulmap, rmulmap);
588
+ return rmulmap;
589
+ }
590
+
591
+ /* XXX: Duplication from frb_add_mapping_i in r_analysis.c */
592
+ static INLINE void frb_mulmap_add_mapping_i(MultiMapper *mulmap, VALUE from,
593
+ const char *to)
594
+ {
595
+ switch (TYPE(from)) {
596
+ case T_STRING:
597
+ mulmap_add_mapping(mulmap, rs2s(from), to);
598
+ break;
599
+ case T_SYMBOL:
600
+ mulmap_add_mapping(mulmap, rb_id2name(SYM2ID(from)), to);
601
+ break;
602
+ default:
603
+ rb_raise(rb_eArgError,
604
+ "cannot map from %s with MappingFilter",
605
+ rs2s(rb_obj_as_string(from)));
606
+ break;
607
+ }
608
+ }
609
+
610
+ /* XXX: Duplication from frb_add_mappings_i in r_analysis.c */
611
+ static int frb_mulmap_add_mappings_i(VALUE key, VALUE value, VALUE arg)
612
+ {
613
+ if (key == Qundef) {
614
+ return ST_CONTINUE;
615
+ } else {
616
+ MultiMapper *mulmap = (MultiMapper *)arg;
617
+ const char *to;
618
+ switch (TYPE(value)) {
619
+ case T_STRING:
620
+ to = rs2s(value);
621
+ break;
622
+ case T_SYMBOL:
623
+ to = rb_id2name(SYM2ID(value));
624
+ break;
625
+ default:
626
+ rb_raise(rb_eArgError,
627
+ "cannot map to %s with MultiMapper",
628
+ rs2s(rb_obj_as_string(key)));
629
+ break;
630
+ }
631
+ if (TYPE(key) == T_ARRAY) {
632
+ int i;
633
+ for (i = RARRAY_LEN(key) - 1; i >= 0; i--) {
634
+ frb_mulmap_add_mapping_i(mulmap, RARRAY_PTR(key)[i], to);
635
+ }
636
+ }
637
+ else {
638
+ frb_mulmap_add_mapping_i(mulmap, key, to);
639
+ }
640
+ }
641
+ return ST_CONTINUE;
642
+ }
643
+
644
+ /*
645
+ * call-seq:
646
+ * MultiMapper.new() -> new_multi_mapper
647
+ *
648
+ * Returns a new multi-mapper object and compiles it for optimization.
649
+ *
650
+ * Note that MultiMapper is immutable.
651
+ */
652
+ static VALUE
653
+ frb_mulmap_init(VALUE self, VALUE rmappings)
654
+ {
655
+ MultiMapper *mulmap = DATA_PTR(self);
656
+ rb_hash_foreach(rmappings, frb_mulmap_add_mappings_i, (VALUE)mulmap);
657
+ mulmap_compile(mulmap);
658
+
659
+ return self;
660
+ }
661
+
662
+ /*
663
+ * call-seq:
664
+ * multi_mapper.map(string) -> mapped_string
665
+ *
666
+ * Performs all the mappings on the string.
667
+ */
668
+ VALUE
669
+ frb_mulmap_map(VALUE self, VALUE rstring)
670
+ {
671
+ MultiMapper *mulmap = DATA_PTR(self);
672
+ char *string = rs2s(rb_obj_as_string(rstring));
673
+ char *mapped_string = mulmap_dynamic_map(mulmap, string);
674
+ VALUE rmapped_string = rb_str_new2(mapped_string);
675
+ free(mapped_string);
676
+ return rmapped_string;
677
+ }
678
+
679
+ /*
680
+ * Document-class: Ferret::Utils::MultiMapper
681
+ *
682
+ * == Summary
683
+ *
684
+ * A MultiMapper performs a list of mappings from one string to another. You
685
+ * could of course just use gsub to do this but when you are just mapping
686
+ * strings, this is much faster.
687
+ *
688
+ * Note that MultiMapper is immutable.
689
+ *
690
+ * == Example
691
+ *
692
+ * mapping = {
693
+ * ['à','á','â','ã','ä','å','ā','ă'] => 'a',
694
+ * 'æ' => 'ae',
695
+ * ['ď','đ'] => 'd',
696
+ * ['ç','ć','č','ĉ','ċ'] => 'c',
697
+ * ['è','é','ê','ë','ē','ę','ě','ĕ','ė',] => 'e',
698
+ * ['ƒ'] => 'f',
699
+ * ['ĝ','ğ','ġ','ģ'] => 'g',
700
+ * ['ĥ','ħ'] => 'h',
701
+ * ['ì','ì','í','î','ï','ī','ĩ','ĭ'] => 'i',
702
+ * ['į','ı','ij','ĵ'] => 'j',
703
+ * ['ķ','ĸ'] => 'k',
704
+ * ['ł','ľ','ĺ','ļ','ŀ'] => 'l',
705
+ * ['ñ','ń','ň','ņ','ʼn','ŋ'] => 'n',
706
+ * ['ò','ó','ô','õ','ö','ø','ō','ő','ŏ','ŏ'] => 'o',
707
+ * ['œ'] => 'oek',
708
+ * ['ą'] => 'q',
709
+ * ['ŕ','ř','ŗ'] => 'r',
710
+ * ['ś','š','ş','ŝ','ș'] => 's',
711
+ * ['ť','ţ','ŧ','ț'] => 't',
712
+ * ['ù','ú','û','ü','ū','ů','ű','ŭ','ũ','ų'] => 'u',
713
+ * ['ŵ'] => 'w',
714
+ * ['ý','ÿ','ŷ'] => 'y',
715
+ * ['ž','ż','ź'] => 'z'
716
+ * mapper = MultiMapper.new(mapping)
717
+ * mapped_string = mapper.map(string)
718
+ */
719
+ static void
720
+ Init_MultiMapper(void)
721
+ {
722
+ /* MultiMapper */
723
+ cMultiMapper = rb_define_class_under(mUtils, "MultiMapper", rb_cObject);
724
+ rb_define_alloc_func(cMultiMapper, frb_mulmap_alloc);
725
+
726
+ rb_define_method(cMultiMapper, "initialize", frb_mulmap_init, 1);
727
+ rb_define_method(cMultiMapper, "map", frb_mulmap_map, 1);
728
+ }
729
+
730
+ /*********************
731
+ *** PriorityQueue ***
732
+ *********************/
733
+ typedef struct PriQ
734
+ {
735
+ int size;
736
+ int capa;
737
+ int mem_capa;
738
+ VALUE *heap;
739
+ VALUE proc;
740
+ } PriQ;
741
+
742
+ #define PQ_START_CAPA 32
743
+
744
+ static bool frb_pq_lt(VALUE proc, VALUE v1, VALUE v2)
745
+ {
746
+ if (proc == Qnil) {
747
+ return RTEST(rb_funcall(v1, id_lt, 1, v2));
748
+ }
749
+ else {
750
+ return RTEST(rb_funcall(proc, id_call, 2, v1, v2));
751
+ }
752
+ }
753
+
754
+ static void pq_up(PriQ *pq)
755
+ {
756
+ VALUE *heap = pq->heap;
757
+ VALUE node;
758
+ int i = pq->size;
759
+ int j = i >> 1;
760
+
761
+ node = heap[i];
762
+
763
+ while ((j > 0) && frb_pq_lt(pq->proc, node, heap[j])) {
764
+ heap[i] = heap[j];
765
+ i = j;
766
+ j = j >> 1;
767
+ }
768
+ heap[i] = node;
769
+ }
770
+
771
+ static void pq_down(PriQ *pq)
772
+ {
773
+ register int i = 1;
774
+ register int j = 2; /* i << 1; */
775
+ register int k = 3; /* j + 1; */
776
+ register int size = pq->size;
777
+ VALUE *heap = pq->heap;
778
+ VALUE node = heap[i]; /* save top node */
779
+
780
+ if ((k <= size) && (frb_pq_lt(pq->proc, heap[k], heap[j]))) {
781
+ j = k;
782
+ }
783
+
784
+ while ((j <= size) && frb_pq_lt(pq->proc, heap[j], node)) {
785
+ heap[i] = heap[j]; /* shift up child */
786
+ i = j;
787
+ j = i << 1;
788
+ k = j + 1;
789
+ if ((k <= size) && frb_pq_lt(pq->proc, heap[k], heap[j])) {
790
+ j = k;
791
+ }
792
+ }
793
+ heap[i] = node;
794
+ }
795
+
796
+ static void pq_push(PriQ *pq, VALUE elem)
797
+ {
798
+ pq->size++;
799
+ if (pq->size >= pq->mem_capa) {
800
+ pq->mem_capa <<= 1;
801
+ REALLOC_N(pq->heap, VALUE, pq->mem_capa);
802
+ }
803
+ pq->heap[pq->size] = elem;
804
+ pq_up(pq);
805
+ }
806
+
807
+ static VALUE cPriorityQueue;
808
+
809
+ static void
810
+ frb_pq_mark(void *p)
811
+ {
812
+ PriQ *pq = (PriQ *)p;
813
+ int i;
814
+ for (i = pq->size; i > 0; i--) {
815
+ rb_gc_mark_maybe(pq->heap[i]);
816
+ }
817
+ }
818
+
819
+ static void frb_pq_free(PriQ *pq)
820
+ {
821
+ free(pq->heap);
822
+ free(pq);
823
+ }
824
+
825
+ static VALUE
826
+ frb_pq_alloc(VALUE klass)
827
+ {
828
+ PriQ *pq = ALLOC_AND_ZERO(PriQ);
829
+ pq->capa = PQ_START_CAPA;
830
+ pq->mem_capa = PQ_START_CAPA;
831
+ pq->heap = ALLOC_N(VALUE, PQ_START_CAPA);
832
+ pq->proc = Qnil;
833
+ return Data_Wrap_Struct(klass, &frb_pq_mark, &frb_pq_free, pq);
834
+ }
835
+
836
+ #define GET_PQ(pq, self) Data_Get_Struct(self, PriQ, pq)
837
+ /*
838
+ * call-seq:
839
+ * PriorityQueue.new(capacity = 32) -> new_pq
840
+ * PriorityQueue.new({:capacity => 32,
841
+ * :less_than_proc => lambda{|a, b| a < b}) -> new_pq
842
+ * PriorityQueue.new({:capacity => 32}) {|a, b| a < b} -> new_pq
843
+ *
844
+ * Returns a new empty priority queue object with an optional capacity.
845
+ * Once the capacity is filled, the lowest valued elements will be
846
+ * automatically popped off the top of the queue as more elements are
847
+ * inserted into the queue.
848
+ */
849
+ static VALUE
850
+ frb_pq_init(int argc, VALUE *argv, VALUE self)
851
+ {
852
+ if (argc >= 1) {
853
+ PriQ *pq;
854
+ VALUE options = argv[0];
855
+ VALUE param;
856
+ int capa = PQ_START_CAPA;
857
+ GET_PQ(pq, self);
858
+ switch (TYPE(options)) {
859
+ case T_FIXNUM:
860
+ capa = FIX2INT(options);
861
+ break;
862
+ case T_HASH:
863
+ if (!NIL_P(param = rb_hash_aref(options,
864
+ ID2SYM(id_capacity)))) {
865
+ capa = FIX2INT(param);
866
+ }
867
+ if (!NIL_P(param = rb_hash_aref(options,
868
+ ID2SYM(id_less_than)))) {
869
+ pq->proc = param;
870
+ }
871
+ break;
872
+ default:
873
+ rb_raise(rb_eArgError,
874
+ "PriorityQueue#initialize only takes a Hash or "
875
+ "an integer");
876
+
877
+ break;
878
+ }
879
+ if (capa < 0) {
880
+ rb_raise(rb_eIndexError,
881
+ "PriorityQueue must have a capacity > 0. %d < 0",
882
+ capa);
883
+ }
884
+ pq->capa = capa;
885
+ if (rb_block_given_p()) {
886
+ pq->proc = rb_block_proc();
887
+ }
888
+ if (argc > 1) {
889
+ rb_raise(rb_eArgError,
890
+ "PriorityQueue#initialize only takes one parameter");
891
+ }
892
+ }
893
+
894
+ return self;
895
+ }
896
+
897
+ /*
898
+ * call-seq:
899
+ * pq.clone -> pq_clone
900
+ *
901
+ * Returns a shallow clone of the priority queue. That is only the priority
902
+ * queue is cloned, its contents are not cloned.
903
+ */
904
+ static VALUE
905
+ frb_pq_clone(VALUE self)
906
+ {
907
+ PriQ *pq, *new_pq = ALLOC(PriQ);
908
+ GET_PQ(pq, self);
909
+ memcpy(new_pq, pq, sizeof(PriQ));
910
+ new_pq->heap = ALLOC_N(VALUE, new_pq->mem_capa);
911
+ memcpy(new_pq->heap, pq->heap, sizeof(VALUE) * (new_pq->size + 1));
912
+
913
+ return Data_Wrap_Struct(cPriorityQueue, &frb_pq_mark, &frb_pq_free, new_pq);
914
+ }
915
+
916
+ /*
917
+ * call-seq:
918
+ * pq.clear -> self
919
+ *
920
+ * Clears all elements from the priority queue. The size will be reset to 0.
921
+ */
922
+ static VALUE
923
+ frb_pq_clear(VALUE self)
924
+ {
925
+ PriQ *pq;
926
+ GET_PQ(pq, self);
927
+ pq->size = 0;
928
+ return self;
929
+ }
930
+
931
+ /*
932
+ * call-seq:
933
+ * pq.insert(elem) -> self
934
+ * pq << elem -> self
935
+ *
936
+ * Insert an element into a queue. It will be inserted into the correct
937
+ * position in the queue according to its priority.
938
+ */
939
+ static VALUE
940
+ frb_pq_insert(VALUE self, VALUE elem)
941
+ {
942
+ PriQ *pq;
943
+ GET_PQ(pq, self);
944
+ if (pq->size < pq->capa) {
945
+ pq_push(pq, elem);
946
+ }
947
+ else if (pq->size > 0 && frb_pq_lt(pq->proc, pq->heap[1], elem)) {
948
+ pq->heap[1] = elem;
949
+ pq_down(pq);
950
+ }
951
+ /* else ignore the element */
952
+ return self;
953
+ }
954
+
955
+ /*
956
+ * call-seq:
957
+ * pq.adjust -> self
958
+ *
959
+ * Sometimes you modify the top element in the priority queue so that its
960
+ * priority changes. When you do this you need to reorder the queue and you
961
+ * do this by calling the adjust method.
962
+ */
963
+ static VALUE
964
+ frb_pq_adjust(VALUE self)
965
+ {
966
+ PriQ *pq;
967
+ GET_PQ(pq, self);
968
+ pq_down(pq);
969
+ return self;
970
+ }
971
+
972
+ /*
973
+ * call-seq:
974
+ * pq.top -> elem
975
+ *
976
+ * Returns the top element in the queue but does not remove it from the
977
+ * queue.
978
+ */
979
+ static VALUE
980
+ frb_pq_top(VALUE self)
981
+ {
982
+ PriQ *pq;
983
+ GET_PQ(pq, self);
984
+ return (pq->size > 0) ? pq->heap[1] : Qnil;
985
+ }
986
+
987
+ /*
988
+ * call-seq:
989
+ * pq.pop -> elem
990
+ *
991
+ * Returns the top element in the queue removing it from the queue.
992
+ */
993
+ static VALUE
994
+ frb_pq_pop(VALUE self)
995
+ {
996
+ PriQ *pq;
997
+ GET_PQ(pq, self);
998
+ if (pq->size > 0) {
999
+ VALUE result = pq->heap[1]; /* save first value */
1000
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
1001
+ pq->heap[pq->size] = Qnil;
1002
+ pq->size--;
1003
+ pq_down(pq); /* adjust heap */
1004
+ return result;
1005
+ }
1006
+ else {
1007
+ return Qnil;
1008
+ }
1009
+ }
1010
+
1011
+ /*
1012
+ * call-seq:
1013
+ * pq.size -> integer
1014
+ *
1015
+ * Returns the size of the queue, ie. the number of elements currently stored
1016
+ * in the queue. The _size_ of a PriorityQueue can never be greater than
1017
+ * its _capacity_
1018
+ */
1019
+ static VALUE
1020
+ frb_pq_size(VALUE self)
1021
+ {
1022
+ PriQ *pq;
1023
+ GET_PQ(pq, self);
1024
+ return INT2FIX(pq->size);
1025
+ }
1026
+
1027
+ /*
1028
+ * call-seq:
1029
+ * pq.capacity -> integer
1030
+ *
1031
+ * Returns the capacity of the queue, ie. the number of elements that can be
1032
+ * stored in a Priority queue before they start to drop off the end. The
1033
+ * _size_ of a PriorityQueue can never be greater than its
1034
+ * _capacity_
1035
+ */
1036
+ static VALUE
1037
+ frb_pq_capa(VALUE self)
1038
+ {
1039
+ PriQ *pq;
1040
+ GET_PQ(pq, self);
1041
+ return INT2FIX(pq->capa);
1042
+ }
1043
+
1044
+ /*
1045
+ * Document-class: Ferret::Utils::PriorityQueue
1046
+ *
1047
+ * == Summary
1048
+ *
1049
+ * A PriorityQueue is a very useful data structure and one that needs a fast
1050
+ * implementation. Hence this priority queue is implemented in C. It is
1051
+ * pretty easy to use; basically you just insert elements into the queue and
1052
+ * pop them off.
1053
+ *
1054
+ * The elements are sorted with the lowest valued elements on the top of
1055
+ * the heap, ie the first to be popped off. Elements are ordered using the
1056
+ * less_than '<' method. To change the order of the queue you can either
1057
+ * reimplement the '<' method pass a block when you initialize the queue.
1058
+ *
1059
+ * You can also set the capacity of the PriorityQueue. Once you hit the
1060
+ * capacity, the lowest values elements are automatically popped of the top
1061
+ * of the queue as more elements are added.
1062
+ *
1063
+ * == Example
1064
+ *
1065
+ * Here is a toy example that sorts strings by their length and has a capacity
1066
+ * of 5;
1067
+ *
1068
+ * q = PriorityQueue.new(5) {|a, b| a.size < b.size}
1069
+ * q << "x"
1070
+ * q << "xxxxx"
1071
+ * q << "xxx"
1072
+ * q << "xxxx"
1073
+ * q << "xxxxxx"
1074
+ * q << "xx" # hit capacity so "x" will be popped off the top
1075
+ *
1076
+ * puts q.size #=> 5
1077
+ * word = q.pop #=> "xx"
1078
+ * q.top << "yyyy" # "xxxyyyy" will still be at the top of the queue
1079
+ * q.adjust # move "xxxyyyy" to its correct location in queue
1080
+ * word = q.pop #=> "xxxx"
1081
+ * word = q.pop #=> "xxxxx"
1082
+ * word = q.pop #=> "xxxxxx"
1083
+ * word = q.pop #=> "xxxyyyy"
1084
+ * word = q.pop #=> nil
1085
+ */
1086
+ static void
1087
+ Init_PriorityQueue(void)
1088
+ {
1089
+ /* PriorityQueue */
1090
+ cPriorityQueue = rb_define_class_under(mUtils, "PriorityQueue", rb_cObject);
1091
+ rb_define_alloc_func(cPriorityQueue, frb_pq_alloc);
1092
+
1093
+ rb_define_method(cPriorityQueue, "initialize", frb_pq_init, -1);
1094
+ rb_define_method(cPriorityQueue, "clone", frb_pq_clone, 0);
1095
+ rb_define_method(cPriorityQueue, "clear", frb_pq_clear, 0);
1096
+ rb_define_method(cPriorityQueue, "insert", frb_pq_insert, 1);
1097
+ rb_define_method(cPriorityQueue, "<<", frb_pq_insert, 1);
1098
+ rb_define_method(cPriorityQueue, "top", frb_pq_top, 0);
1099
+ rb_define_method(cPriorityQueue, "pop", frb_pq_pop, 0);
1100
+ rb_define_method(cPriorityQueue, "size", frb_pq_size, 0);
1101
+ rb_define_method(cPriorityQueue, "capacity", frb_pq_capa, 0);
1102
+ rb_define_method(cPriorityQueue, "adjust", frb_pq_adjust, 0);
1103
+ }
1104
+
1105
+ /* rdoc hack
1106
+ extern VALUE mFerret = rb_define_module("Ferret");
1107
+ */
1108
+
1109
+ /*
1110
+ * Document-module: Ferret::Utils
1111
+ *
1112
+ * The Utils module contains a number of helper classes and modules that are
1113
+ * useful when indexing with Ferret. They are;
1114
+ *
1115
+ * * BitVector
1116
+ * * MultiMapper
1117
+ * * PriorityQueue
1118
+ * * => more to come
1119
+ *
1120
+ * These helper classes could also be quite useful outside of Ferret and may
1121
+ * one day find themselves in their own separate library.
1122
+ */
1123
+ void
1124
+ Init_Utils(void)
1125
+ {
1126
+ mUtils = rb_define_module_under(mFerret, "Utils");
1127
+
1128
+ Init_BitVector();
1129
+ Init_MultiMapper();
1130
+ Init_PriorityQueue();
1131
+ }