isomorfeus-ferret 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +612 -0
  3. data/README.md +44 -0
  4. data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
  5. data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
  6. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
  7. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
  8. data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
  9. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
  10. data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
  11. data/ext/isomorfeus_ferret_ext/email.rl +21 -0
  12. data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
  13. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
  14. data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
  15. data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
  16. data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
  17. data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
  18. data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
  19. data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
  20. data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
  21. data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
  22. data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
  23. data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
  24. data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
  25. data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
  26. data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
  27. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
  28. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
  29. data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
  30. data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
  31. data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
  32. data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
  33. data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
  34. data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
  42. data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
  43. data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
  44. data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
  45. data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
  46. data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
  47. data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
  48. data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
  50. data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
  51. data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
  52. data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
  53. data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
  54. data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
  55. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
  56. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
  57. data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
  58. data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
  59. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
  60. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
  61. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
  62. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
  63. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
  64. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
  65. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
  66. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
  67. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
  68. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
  69. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
  70. data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
  71. data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
  72. data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
  73. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
  74. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
  75. data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
  76. data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
  77. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
  78. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
  79. data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
  80. data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
  81. data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
  82. data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
  83. data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
  84. data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
  85. data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
  86. data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
  87. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
  88. data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
  89. data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
  90. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
  91. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
  92. data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
  93. data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
  94. data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
  95. data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
  96. data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
  97. data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
  98. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
  99. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
  100. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
  101. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
  102. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
  103. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
  104. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
  105. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
  106. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
  129. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
  130. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
  131. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
  132. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
  133. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
  134. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
  135. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
  136. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
  137. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
  138. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
  139. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
  140. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
  141. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
  143. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
  144. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
  145. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
  162. data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
  163. data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
  164. data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
  165. data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
  166. data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
  167. data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
  168. data/ext/isomorfeus_ferret_ext/test.c +850 -0
  169. data/ext/isomorfeus_ferret_ext/test.h +416 -0
  170. data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
  171. data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
  172. data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
  173. data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
  174. data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
  175. data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
  176. data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
  177. data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
  178. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
  179. data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
  180. data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
  181. data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
  182. data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
  183. data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
  184. data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
  185. data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
  186. data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
  187. data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
  188. data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
  189. data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
  190. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
  191. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
  192. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
  193. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
  194. data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
  195. data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
  196. data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
  197. data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
  198. data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
  199. data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
  200. data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
  201. data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
  202. data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
  203. data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
  204. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
  205. data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
  206. data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
  207. data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
  208. data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
  209. data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
  210. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
  211. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
  212. data/ext/isomorfeus_ferret_ext/url.rl +27 -0
  213. data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
  214. data/lib/isomorfeus/ferret/document.rb +132 -0
  215. data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
  216. data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
  217. data/lib/isomorfeus/ferret/index/index.rb +970 -0
  218. data/lib/isomorfeus/ferret/monitor.rb +323 -0
  219. data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
  220. data/lib/isomorfeus/ferret/version.rb +5 -0
  221. data/lib/isomorfeus-ferret.rb +8 -0
  222. metadata +307 -0
@@ -0,0 +1,1127 @@
1
+ #include "frt_bitvector.h"
2
+ #include "frt_multimapper.h"
3
+ #include "isomorfeus_ferret.h"
4
+ #include <ruby/st.h>
5
+
6
+ /*****************
7
+ *** BitVector ***
8
+ *****************/
9
+ static VALUE cBitVector;
10
+
11
+ static void
12
+ frb_bv_free(void *p)
13
+ {
14
+ object_del(p);
15
+ frt_bv_destroy((FrtBitVector *)p);
16
+ }
17
+
18
+ static VALUE
19
+ frb_bv_alloc(VALUE klass)
20
+ {
21
+ FrtBitVector *bv = frt_bv_new();
22
+ VALUE rbv = Data_Wrap_Struct(klass, NULL, &frb_bv_free, bv);
23
+ object_add(bv, rbv);
24
+ return rbv;
25
+ }
26
+
27
+ #define GET_BV(bv, self) Data_Get_Struct(self, FrtBitVector, bv)
28
+
29
+ VALUE
30
+ frb_get_bv(FrtBitVector *bv)
31
+ {
32
+ VALUE rbv;
33
+ if ((rbv = object_get(bv)) == Qnil) {
34
+ rbv = Data_Wrap_Struct(cBitVector, NULL, &frb_bv_free, bv);
35
+ FRT_REF(bv);
36
+ object_add(bv, rbv);
37
+ }
38
+ return rbv;
39
+ }
40
+
41
+ /*
42
+ * call-seq:
43
+ * BitVector.new() -> new_bit_vector
44
+ *
45
+ * Returns a new empty bit vector object
46
+ */
47
+ static VALUE
48
+ frb_bv_init(VALUE self)
49
+ {
50
+ return self;
51
+ }
52
+
53
+ /*
54
+ * call-seq:
55
+ * bv[i] = bool -> bool
56
+ *
57
+ * Set the bit and _i_ to *val* (+true+ or
58
+ * +false+).
59
+ */
60
+ VALUE
61
+ frb_bv_set(VALUE self, VALUE rindex, VALUE rstate)
62
+ {
63
+ FrtBitVector *bv;
64
+ int index = FIX2INT(rindex);
65
+ GET_BV(bv, self);
66
+ if (index < 0) {
67
+ rb_raise(rb_eIndexError, "%d < 0", index);
68
+ }
69
+ if (RTEST(rstate)) {
70
+ frt_bv_set(bv, index);
71
+ }
72
+ else {
73
+ frt_bv_unset(bv, index);
74
+ }
75
+
76
+ return rstate;
77
+ }
78
+
79
+ /*
80
+ * call-seq:
81
+ * bv.set(i) -> self
82
+ *
83
+ * Set the bit at _i_ to *on* (+true+)
84
+ */
85
+ VALUE
86
+ frb_bv_set_on(VALUE self, VALUE rindex)
87
+ {
88
+ frb_bv_set(self, rindex, Qtrue);
89
+ return self;
90
+ }
91
+
92
+ /*
93
+ * call-seq:
94
+ * bv.unset(i) -> self
95
+ *
96
+ * Set the bit at _i_ to *off* (+false+)
97
+ */
98
+ VALUE
99
+ frb_bv_set_off(VALUE self, VALUE rindex)
100
+ {
101
+ frb_bv_set(self, rindex, Qfalse);
102
+ return self;
103
+ }
104
+
105
+ /*
106
+ * call-seq:
107
+ * bv.get(i) -> bool
108
+ * bv[i] -> bool
109
+ *
110
+ * Get the bit value at _i_
111
+ */
112
+ VALUE
113
+ frb_bv_get(VALUE self, VALUE rindex)
114
+ {
115
+ FrtBitVector *bv;
116
+ int index = FIX2INT(rindex);
117
+ GET_BV(bv, self);
118
+ if (index < 0) {
119
+ rb_raise(rb_eIndexError, "%d < 0", index);
120
+ }
121
+
122
+ return frt_bv_get(bv, index) ? Qtrue : Qfalse;
123
+ }
124
+
125
+ /*
126
+ * call-seq:
127
+ * bv.count -> bit_count
128
+ *
129
+ * Count the number of bits set in the bit vector. If the bit vector has been
130
+ * negated using +#not+ then count the number of unset bits
131
+ * instead.
132
+ */
133
+ VALUE
134
+ frb_bv_count(VALUE self)
135
+ {
136
+ FrtBitVector *bv;
137
+ GET_BV(bv, self);
138
+ return INT2FIX(bv->count);
139
+ }
140
+
141
+ /*
142
+ * call-seq:
143
+ * bv.clear -> self
144
+ *
145
+ * Clears all set bits in the bit vector. Negated bit vectors will still have
146
+ * all bits set to *off*.
147
+ */
148
+ VALUE
149
+ frb_bv_clear(VALUE self)
150
+ {
151
+ FrtBitVector *bv;
152
+ GET_BV(bv, self);
153
+ frt_bv_clear(bv);
154
+ frt_bv_scan_reset(bv);
155
+ return self;
156
+ }
157
+
158
+ /*
159
+ * call-seq:
160
+ * bv1 == bv2 -> bool
161
+ * bv1 != bv2 -> bool
162
+ * bv1.eql(bv2) -> bool
163
+ *
164
+ * Compares two bit vectors and returns true if both bit vectors have the same
165
+ * bits set.
166
+ */
167
+ VALUE
168
+ frb_bv_eql(VALUE self, VALUE other)
169
+ {
170
+ FrtBitVector *bv1, *bv2;
171
+ GET_BV(bv1, self);
172
+ GET_BV(bv2, other);
173
+ return frt_bv_eq(bv1, bv2) ? Qtrue : Qfalse;
174
+ }
175
+
176
+ /*
177
+ * call-seq:
178
+ * bv.hash -> int
179
+ *
180
+ * Used to store bit vectors in Hashes. Especially useful if you want to
181
+ * cache them.
182
+ */
183
+ VALUE
184
+ frb_bv_hash(VALUE self)
185
+ {
186
+ FrtBitVector *bv;
187
+ GET_BV(bv, self);
188
+ return ULONG2NUM(frt_bv_hash(bv));
189
+ }
190
+
191
+ /*
192
+ * call-seq:
193
+ * bv1 & bv2 -> anded_bv
194
+ * bv1.and(bv2) -> anded_bv
195
+ *
196
+ * Perform a boolean _and_ operation on +bv1+ and
197
+ * +bv2+
198
+ */
199
+ VALUE
200
+ frb_bv_and(VALUE self, VALUE other)
201
+ {
202
+ FrtBitVector *bv1, *bv2;
203
+ GET_BV(bv1, self);
204
+ GET_BV(bv2, other);
205
+ return Data_Wrap_Struct(cBitVector, NULL, &frt_bv_destroy, frt_bv_and(bv1, bv2));
206
+ }
207
+
208
+ /*
209
+ * call-seq:
210
+ * bv1.and!(bv2) -> self
211
+ *
212
+ * Perform a boolean _and_ operation on +bv1+ and
213
+ * +bv2+ in place on +bv1+
214
+ */
215
+ VALUE
216
+ frb_bv_and_x(VALUE self, VALUE other)
217
+ {
218
+ FrtBitVector *bv1, *bv2;
219
+ GET_BV(bv1, self);
220
+ GET_BV(bv2, other);
221
+ frt_bv_and_x(bv1, bv2);
222
+ return self;
223
+ }
224
+
225
+ /*
226
+ * call-seq:
227
+ * bv1 | bv2 -> ored_bv
228
+ * bv1.or(bv2) -> ored_bv
229
+ *
230
+ * Perform a boolean _or_ operation on +bv1+ and
231
+ * +bv2+
232
+ */
233
+ VALUE
234
+ frb_bv_or(VALUE self, VALUE other)
235
+ {
236
+ FrtBitVector *bv1, *bv2;
237
+ GET_BV(bv1, self);
238
+ GET_BV(bv2, other);
239
+ return Data_Wrap_Struct(cBitVector, NULL, &frt_bv_destroy, frt_bv_or(bv1, bv2));
240
+ }
241
+
242
+ /*
243
+ * call-seq:
244
+ * bv1.or!(bv2) -> self
245
+ *
246
+ * Perform a boolean _or_ operation on +bv1+ and
247
+ * +bv2+ in place on +bv1+
248
+ */
249
+ VALUE
250
+ frb_bv_or_x(VALUE self, VALUE other)
251
+ {
252
+ FrtBitVector *bv1, *bv2;
253
+ GET_BV(bv1, self);
254
+ GET_BV(bv2, other);
255
+ frt_bv_or_x(bv1, bv2);
256
+ return self;
257
+ }
258
+
259
+ /*
260
+ * call-seq:
261
+ * bv1 ^ bv2 -> xored_bv
262
+ * bv1.xor(bv2) -> xored_bv
263
+ *
264
+ * Perform a boolean _xor_ operation on +bv1+ and
265
+ * +bv2+
266
+ */
267
+ VALUE
268
+ frb_bv_xor(VALUE self, VALUE other)
269
+ {
270
+ FrtBitVector *bv1, *bv2;
271
+ GET_BV(bv1, self);
272
+ GET_BV(bv2, other);
273
+ return Data_Wrap_Struct(cBitVector, NULL, &frt_bv_destroy, frt_bv_xor(bv1, bv2));
274
+ }
275
+
276
+ /*
277
+ * call-seq:
278
+ * bv1.xor!(bv2) -> self
279
+ *
280
+ * Perform a boolean _xor_ operation on +bv1+ and
281
+ * +bv2+ in place on +bv1+
282
+ */
283
+ VALUE
284
+ frb_bv_xor_x(VALUE self, VALUE other)
285
+ {
286
+ FrtBitVector *bv1, *bv2;
287
+ GET_BV(bv1, self);
288
+ GET_BV(bv2, other);
289
+ frt_bv_xor_x(bv1, bv2);
290
+ return self;
291
+ }
292
+
293
+ /*
294
+ * call-seq:
295
+ * ~bv -> bv
296
+ * bv.not -> bv
297
+ *
298
+ * Perform a boolean _not_ operation on +bv+
299
+ * */
300
+ VALUE
301
+ frb_bv_not(VALUE self)
302
+ {
303
+ FrtBitVector *bv;
304
+ GET_BV(bv, self);
305
+ return Data_Wrap_Struct(cBitVector, NULL, &frt_bv_destroy, frt_bv_not(bv));
306
+ }
307
+
308
+ /*
309
+ * call-seq:
310
+ * bv.not! -> self
311
+ *
312
+ * Perform a boolean _not_ operation on +bv+ in-place
313
+ */
314
+ VALUE
315
+ frb_bv_not_x(VALUE self)
316
+ {
317
+ FrtBitVector *bv;
318
+ GET_BV(bv, self);
319
+ frt_bv_not_x(bv);
320
+ return self;
321
+ }
322
+
323
+ /*
324
+ * call-seq:
325
+ * bv.reset_scan -> self
326
+ *
327
+ * Resets the BitVector ready for scanning. You should call this method
328
+ * before calling +#next+ or +#next_unset+. It isn't
329
+ * necessary for the other scan methods or for the +#each+ method.
330
+ */
331
+ VALUE
332
+ frb_bv_reset_scan(VALUE self)
333
+ {
334
+ FrtBitVector *bv;
335
+ GET_BV(bv, self);
336
+ frt_bv_scan_reset(bv);
337
+ return self;
338
+ }
339
+
340
+ /*
341
+ * call-seq:
342
+ * bv.next -> bit_num
343
+ *
344
+ * Returns the next set bit in the bit vector scanning from low order to high
345
+ * order. You should call +#reset_scan+ before calling this method
346
+ * if you want to scan from the beginning. It is automatically reset when you
347
+ * first create the bit vector.
348
+ */
349
+ VALUE
350
+ frb_bv_next(VALUE self)
351
+ {
352
+ FrtBitVector *bv;
353
+ GET_BV(bv, self);
354
+ return INT2FIX(frt_bv_scan_next(bv));
355
+ }
356
+
357
+ /*
358
+ * call-seq:
359
+ * bv.next_unset -> bit_num
360
+ *
361
+ * Returns the next unset bit in the bit vector scanning from low order to
362
+ * high order. This method should only be called on bit vectors which have
363
+ * been flipped (negated). You should call +#reset_scan+ before
364
+ * calling this method if you want to scan from the beginning. It is
365
+ * automatically reset when you first create the bit vector.
366
+ */
367
+ VALUE
368
+ frb_bv_next_unset(VALUE self)
369
+ {
370
+ FrtBitVector *bv;
371
+ GET_BV(bv, self);
372
+ return INT2FIX(frt_bv_scan_next_unset(bv));
373
+ }
374
+
375
+ /*
376
+ * call-seq:
377
+ * bv.next_from(from) -> bit_num
378
+ *
379
+ * Returns the next set bit in the bit vector scanning from low order to
380
+ * high order and starting at +from+. The scan is inclusive so if
381
+ * +from+ is equal to 10 and +bv[10]+ is set it will
382
+ * return the number 10. If the bit vector has been negated than you should
383
+ * use the +#next_unset_from+ method.
384
+ */
385
+ VALUE
386
+ frb_bv_next_from(VALUE self, VALUE rfrom)
387
+ {
388
+ FrtBitVector *bv;
389
+ int from = FIX2INT(rfrom);
390
+ GET_BV(bv, self);
391
+ if (from < 0) {
392
+ from = 0;
393
+ }
394
+ return INT2FIX(frt_bv_scan_next_from(bv, from));
395
+ }
396
+
397
+ /*
398
+ * call-seq:
399
+ * bv.next_unset_from(from) -> bit_num
400
+ *
401
+ * Returns the next unset bit in the bit vector scanning from low order to
402
+ * high order and starting at +from+. The scan is inclusive so if
403
+ * +from+ is equal to 10 and +bv[10]+ is unset it will
404
+ * return the number 10. If the bit vector has not been negated than you
405
+ * should use the +#next_from+ method.
406
+ */
407
+ VALUE
408
+ frb_bv_next_unset_from(VALUE self, VALUE rfrom)
409
+ {
410
+ FrtBitVector *bv;
411
+ int from = FIX2INT(rfrom);
412
+ GET_BV(bv, self);
413
+ if (from < 0) {
414
+ from = 0;
415
+ }
416
+ return INT2FIX(frt_bv_scan_next_unset_from(bv, from));
417
+ }
418
+
419
+ /*
420
+ * call-seq:
421
+ * bv.each { |bit_num| }
422
+ *
423
+ * Iterate through all the set bits in the bit vector yielding each one in
424
+ * order
425
+ */
426
+ VALUE
427
+ frb_bv_each(VALUE self)
428
+ {
429
+ FrtBitVector *bv;
430
+ int bit;
431
+ GET_BV(bv, self);
432
+ frt_bv_scan_reset(bv);
433
+ if (bv->extends_as_ones) {
434
+ while ((bit = frt_bv_scan_next_unset(bv)) >= 0) {
435
+ rb_yield(INT2FIX(bit));
436
+ }
437
+ }
438
+ else {
439
+ while ((bit = frt_bv_scan_next(bv)) >= 0) {
440
+ rb_yield(INT2FIX(bit));
441
+ }
442
+ }
443
+ return self;
444
+ }
445
+
446
+ /*
447
+ * call-seq:
448
+ * bv.to_a
449
+ *
450
+ * Iterate through all the set bits in the bit vector adding the index of
451
+ * each set bit to an array. This is useful if you want to perform array
452
+ * methods on the bit vector. If you want to convert an array to a bit_vector
453
+ * simply do this;
454
+ *
455
+ * bv = [1, 12, 45, 367, 455].inject(FrtBitVector.new) {|bv, i| bv.set(i)}
456
+ */
457
+ VALUE
458
+ frb_bv_to_a(VALUE self)
459
+ {
460
+ FrtBitVector *bv;
461
+ int bit;
462
+ VALUE ary;
463
+ GET_BV(bv, self);
464
+ ary = rb_ary_new();
465
+ frt_bv_scan_reset(bv);
466
+ if (bv->extends_as_ones) {
467
+ while ((bit = frt_bv_scan_next_unset(bv)) >= 0) {
468
+ rb_ary_push(ary, INT2FIX(bit));
469
+ }
470
+ }
471
+ else {
472
+ while ((bit = frt_bv_scan_next(bv)) >= 0) {
473
+ rb_ary_push(ary, INT2FIX(bit));
474
+ }
475
+ }
476
+ return ary;
477
+ }
478
+
479
+ static VALUE mUtils;
480
+
481
+ /*
482
+ * Document-class: Ferret::Utils::BitVector
483
+ *
484
+ * == Summary
485
+ *
486
+ * A BitVector is pretty easy to implement in Ruby using Ruby's BigNum class.
487
+ * This BitVector however allows you to count the set bits with the
488
+ * +#count+ method (or unset bits of flipped bit vectors) and also
489
+ * to quickly scan the set bits.
490
+ *
491
+ * == Boolean Operations
492
+ *
493
+ * BitVector handles four boolean operations;
494
+ *
495
+ * * +&+
496
+ * * +|+
497
+ * * +^+
498
+ * * +~+
499
+ *
500
+ * bv1 = BitVector.new
501
+ * bv2 = BitVector.new
502
+ * bv3 = BitVector.new
503
+ *
504
+ * bv4 = (bv1 & bv2) | ~bv3
505
+ *
506
+ * You can also do the operations in-place;
507
+ *
508
+ * * +and!+
509
+ * * +or!+
510
+ * * +xor!+
511
+ * * +not!+
512
+ *
513
+ * bv4.and!(bv5).not!
514
+ *
515
+ * == Set Bit Scanning
516
+ *
517
+ * Perhaps the most useful functionality in BitVector is the ability to
518
+ * quickly scan for set bits. To print all set bits;
519
+ *
520
+ * bv.each {|bit| puts bit }
521
+ *
522
+ * Alternatively you could use the lower level +next+ or
523
+ * +next_unset+ methods. Note that the +each+ method will
524
+ * automatically scan unset bits if the BitVector has been flipped (using
525
+ * +not+).
526
+ */
527
+ static void
528
+ Init_BitVector(void)
529
+ {
530
+ /* BitVector */
531
+ cBitVector = rb_define_class_under(mUtils, "BitVector", rb_cObject);
532
+ rb_define_alloc_func(cBitVector, frb_bv_alloc);
533
+
534
+ rb_define_method(cBitVector, "initialize", frb_bv_init, 0);
535
+ rb_define_method(cBitVector, "set", frb_bv_set_on, 1);
536
+ rb_define_method(cBitVector, "unset", frb_bv_set_off, 1);
537
+ rb_define_method(cBitVector, "[]=", frb_bv_set, 2);
538
+ rb_define_method(cBitVector, "get", frb_bv_get, 1);
539
+ rb_define_method(cBitVector, "[]", frb_bv_get, 1);
540
+ rb_define_method(cBitVector, "count", frb_bv_count, 0);
541
+ rb_define_method(cBitVector, "clear", frb_bv_clear, 0);
542
+ rb_define_method(cBitVector, "eql?", frb_bv_eql, 1);
543
+ rb_define_method(cBitVector, "==", frb_bv_eql, 1);
544
+ rb_define_method(cBitVector, "hash", frb_bv_hash, 0);
545
+ rb_define_method(cBitVector, "and!", frb_bv_and_x, 1);
546
+ rb_define_method(cBitVector, "and", frb_bv_and, 1);
547
+ rb_define_method(cBitVector, "&", frb_bv_and, 1);
548
+ rb_define_method(cBitVector, "or!", frb_bv_or_x, 1);
549
+ rb_define_method(cBitVector, "or", frb_bv_or, 1);
550
+ rb_define_method(cBitVector, "|", frb_bv_or, 1);
551
+ rb_define_method(cBitVector, "xor!", frb_bv_xor_x, 1);
552
+ rb_define_method(cBitVector, "xor", frb_bv_xor, 1);
553
+ rb_define_method(cBitVector, "^", frb_bv_xor, 1);
554
+ rb_define_method(cBitVector, "not!", frb_bv_not_x, 0);
555
+ rb_define_method(cBitVector, "not", frb_bv_not, 0);
556
+ rb_define_method(cBitVector, "~", frb_bv_not, 0);
557
+ rb_define_method(cBitVector, "reset_scan", frb_bv_reset_scan, 0);
558
+ rb_define_method(cBitVector, "next", frb_bv_next, 0);
559
+ rb_define_method(cBitVector, "next_unset", frb_bv_next_unset, 0);
560
+ rb_define_method(cBitVector, "next_from", frb_bv_next_from, 1);
561
+ rb_define_method(cBitVector, "next_unset_from", frb_bv_next_unset_from, 1);
562
+ rb_define_method(cBitVector, "each", frb_bv_each, 0);
563
+ rb_define_method(cBitVector, "to_a", frb_bv_to_a, 0);
564
+ }
565
+
566
+ /*******************
567
+ *** MultiMapper ***
568
+ *******************/
569
+ static VALUE cMultiMapper;
570
+
571
+ static void
572
+ frb_mulmap_free(void *p)
573
+ {
574
+ object_del(p);
575
+ frt_mulmap_destroy((FrtMultiMapper *)p);
576
+ }
577
+
578
+ static VALUE
579
+ frb_mulmap_alloc(VALUE klass)
580
+ {
581
+ FrtMultiMapper *mulmap = frt_mulmap_new();
582
+ VALUE rmulmap = Data_Wrap_Struct(klass, NULL, &frb_mulmap_free, mulmap);
583
+ object_add(mulmap, rmulmap);
584
+ return rmulmap;
585
+ }
586
+
587
+ /* XXX: Duplication from frb_add_mapping_i in r_analysis.c */
588
+ static void frb_mulmap_add_mapping_i(FrtMultiMapper *mulmap, VALUE from,
589
+ const char *to)
590
+ {
591
+ switch (TYPE(from)) {
592
+ case T_STRING:
593
+ frt_mulmap_add_mapping(mulmap, rs2s(from), to);
594
+ break;
595
+ case T_SYMBOL:
596
+ frt_mulmap_add_mapping(mulmap, rb_id2name(SYM2ID(from)), to);
597
+ break;
598
+ default:
599
+ rb_raise(rb_eArgError,
600
+ "cannot map from %s with MappingFilter",
601
+ rs2s(rb_obj_as_string(from)));
602
+ break;
603
+ }
604
+ }
605
+
606
+ /* XXX: Duplication from frb_add_mappings_i in r_analysis.c */
607
+ static int frb_mulmap_add_mappings_i(VALUE key, VALUE value, VALUE arg)
608
+ {
609
+ if (key == Qundef) {
610
+ return ST_CONTINUE;
611
+ } else {
612
+ FrtMultiMapper *mulmap = (FrtMultiMapper *)arg;
613
+ const char *to;
614
+ switch (TYPE(value)) {
615
+ case T_STRING:
616
+ to = rs2s(value);
617
+ break;
618
+ case T_SYMBOL:
619
+ to = rb_id2name(SYM2ID(value));
620
+ break;
621
+ default:
622
+ rb_raise(rb_eArgError,
623
+ "cannot map to %s with MultiMapper",
624
+ rs2s(rb_obj_as_string(key)));
625
+ break;
626
+ }
627
+ if (TYPE(key) == T_ARRAY) {
628
+ int i;
629
+ for (i = RARRAY_LEN(key) - 1; i >= 0; i--) {
630
+ frb_mulmap_add_mapping_i(mulmap, RARRAY_PTR(key)[i], to);
631
+ }
632
+ }
633
+ else {
634
+ frb_mulmap_add_mapping_i(mulmap, key, to);
635
+ }
636
+ }
637
+ return ST_CONTINUE;
638
+ }
639
+
640
+ /*
641
+ * call-seq:
642
+ * MultiMapper.new() -> new_multi_mapper
643
+ *
644
+ * Returns a new multi-mapper object and compiles it for optimization.
645
+ *
646
+ * Note that MultiMapper is immutable.
647
+ */
648
+ static VALUE
649
+ frb_mulmap_init(VALUE self, VALUE rmappings)
650
+ {
651
+ FrtMultiMapper *mulmap = DATA_PTR(self);
652
+ rb_hash_foreach(rmappings, frb_mulmap_add_mappings_i, (VALUE)mulmap);
653
+ frt_mulmap_compile(mulmap);
654
+
655
+ return self;
656
+ }
657
+
658
+ /*
659
+ * call-seq:
660
+ * multi_mapper.map(string) -> mapped_string
661
+ *
662
+ * Performs all the mappings on the string.
663
+ */
664
+ VALUE
665
+ frb_mulmap_map(VALUE self, VALUE rstring)
666
+ {
667
+ FrtMultiMapper *mulmap = DATA_PTR(self);
668
+ char *string = rs2s(rb_obj_as_string(rstring));
669
+ char *mapped_string = frt_mulmap_dynamic_map(mulmap, string);
670
+ VALUE rmapped_string = rb_str_new2(mapped_string);
671
+ free(mapped_string);
672
+ return rmapped_string;
673
+ }
674
+
675
+ /*
676
+ * Document-class: Ferret::Utils::MultiMapper
677
+ *
678
+ * == Summary
679
+ *
680
+ * A MultiMapper performs a list of mappings from one string to another. You
681
+ * could of course just use gsub to do this but when you are just mapping
682
+ * strings, this is much faster.
683
+ *
684
+ * Note that MultiMapper is immutable.
685
+ *
686
+ * == Example
687
+ *
688
+ * mapping = {
689
+ * ['à','á','â','ã','ä','å','ā','ă'] => 'a',
690
+ * 'æ' => 'ae',
691
+ * ['ď','đ'] => 'd',
692
+ * ['ç','ć','č','ĉ','ċ'] => 'c',
693
+ * ['è','é','ê','ë','ē','ę','ě','ĕ','ė',] => 'e',
694
+ * ['ƒ'] => 'f',
695
+ * ['ĝ','ğ','ġ','ģ'] => 'g',
696
+ * ['ĥ','ħ'] => 'h',
697
+ * ['ì','ì','í','î','ï','ī','ĩ','ĭ'] => 'i',
698
+ * ['į','ı','ij','ĵ'] => 'j',
699
+ * ['ķ','ĸ'] => 'k',
700
+ * ['ł','ľ','ĺ','ļ','ŀ'] => 'l',
701
+ * ['ñ','ń','ň','ņ','ʼn','ŋ'] => 'n',
702
+ * ['ò','ó','ô','õ','ö','ø','ō','ő','ŏ','ŏ'] => 'o',
703
+ * ['œ'] => 'oek',
704
+ * ['ą'] => 'q',
705
+ * ['ŕ','ř','ŗ'] => 'r',
706
+ * ['ś','š','ş','ŝ','ș'] => 's',
707
+ * ['ť','ţ','ŧ','ț'] => 't',
708
+ * ['ù','ú','û','ü','ū','ů','ű','ŭ','ũ','ų'] => 'u',
709
+ * ['ŵ'] => 'w',
710
+ * ['ý','ÿ','ŷ'] => 'y',
711
+ * ['ž','ż','ź'] => 'z'
712
+ * mapper = MultiMapper.new(mapping)
713
+ * mapped_string = mapper.map(string)
714
+ */
715
+ static void
716
+ Init_MultiMapper(void)
717
+ {
718
+ /* MultiMapper */
719
+ cMultiMapper = rb_define_class_under(mUtils, "MultiMapper", rb_cObject);
720
+ rb_define_alloc_func(cMultiMapper, frb_mulmap_alloc);
721
+
722
+ rb_define_method(cMultiMapper, "initialize", frb_mulmap_init, 1);
723
+ rb_define_method(cMultiMapper, "map", frb_mulmap_map, 1);
724
+ }
725
+
726
+ /*********************
727
+ *** PriorityQueue ***
728
+ *********************/
729
+ typedef struct PriQ
730
+ {
731
+ int size;
732
+ int capa;
733
+ int mem_capa;
734
+ VALUE *heap;
735
+ VALUE proc;
736
+ } PriQ;
737
+
738
+ #define PQ_START_CAPA 32
739
+
740
+ static bool frb_pq_lt(VALUE proc, VALUE v1, VALUE v2)
741
+ {
742
+ if (proc == Qnil) {
743
+ return RTEST(rb_funcall(v1, id_lt, 1, v2));
744
+ }
745
+ else {
746
+ return RTEST(rb_funcall(proc, id_call, 2, v1, v2));
747
+ }
748
+ }
749
+
750
+ static void frb_pq_up(PriQ *pq)
751
+ {
752
+ VALUE *heap = pq->heap;
753
+ VALUE node;
754
+ int i = pq->size;
755
+ int j = i >> 1;
756
+
757
+ node = heap[i];
758
+
759
+ while ((j > 0) && frb_pq_lt(pq->proc, node, heap[j])) {
760
+ heap[i] = heap[j];
761
+ i = j;
762
+ j = j >> 1;
763
+ }
764
+ heap[i] = node;
765
+ }
766
+
767
+ static void frb_pq_down(PriQ *pq)
768
+ {
769
+ register int i = 1;
770
+ register int j = 2; /* i << 1; */
771
+ register int k = 3; /* j + 1; */
772
+ register int size = pq->size;
773
+ VALUE *heap = pq->heap;
774
+ VALUE node = heap[i]; /* save top node */
775
+
776
+ if ((k <= size) && (frb_pq_lt(pq->proc, heap[k], heap[j]))) {
777
+ j = k;
778
+ }
779
+
780
+ while ((j <= size) && frb_pq_lt(pq->proc, heap[j], node)) {
781
+ heap[i] = heap[j]; /* shift up child */
782
+ i = j;
783
+ j = i << 1;
784
+ k = j + 1;
785
+ if ((k <= size) && frb_pq_lt(pq->proc, heap[k], heap[j])) {
786
+ j = k;
787
+ }
788
+ }
789
+ heap[i] = node;
790
+ }
791
+
792
+ static void frb_pq_push(PriQ *pq, VALUE elem)
793
+ {
794
+ pq->size++;
795
+ if (pq->size >= pq->mem_capa) {
796
+ pq->mem_capa <<= 1;
797
+ FRT_REALLOC_N(pq->heap, VALUE, pq->mem_capa);
798
+ }
799
+ pq->heap[pq->size] = elem;
800
+ frb_pq_up(pq);
801
+ }
802
+
803
+ static VALUE cPriorityQueue;
804
+
805
+ static void
806
+ frb_pq_mark(void *p)
807
+ {
808
+ PriQ *pq = (PriQ *)p;
809
+ int i;
810
+ for (i = pq->size; i > 0; i--) {
811
+ rb_gc_mark_maybe(pq->heap[i]);
812
+ }
813
+ }
814
+
815
+ static void frb_pq_free(PriQ *pq)
816
+ {
817
+ free(pq->heap);
818
+ free(pq);
819
+ }
820
+
821
+ static VALUE
822
+ frb_pq_alloc(VALUE klass)
823
+ {
824
+ PriQ *pq = FRT_ALLOC_AND_ZERO(PriQ);
825
+ pq->capa = PQ_START_CAPA;
826
+ pq->mem_capa = PQ_START_CAPA;
827
+ pq->heap = FRT_ALLOC_N(VALUE, PQ_START_CAPA);
828
+ pq->proc = Qnil;
829
+ return Data_Wrap_Struct(klass, &frb_pq_mark, &frb_pq_free, pq);
830
+ }
831
+
832
+ #define GET_PQ(pq, self) Data_Get_Struct(self, PriQ, pq)
833
+ /*
834
+ * call-seq:
835
+ * PriorityQueue.new(capacity = 32) -> new_pq
836
+ * PriorityQueue.new({:capacity => 32,
837
+ * :less_than_proc => lambda{|a, b| a < b}) -> new_pq
838
+ * PriorityQueue.new({:capacity => 32}) {|a, b| a < b} -> new_pq
839
+ *
840
+ * Returns a new empty priority queue object with an optional capacity.
841
+ * Once the capacity is filled, the lowest valued elements will be
842
+ * automatically popped off the top of the queue as more elements are
843
+ * inserted into the queue.
844
+ */
845
+ static VALUE
846
+ frb_pq_init(int argc, VALUE *argv, VALUE self)
847
+ {
848
+ if (argc >= 1) {
849
+ PriQ *pq;
850
+ VALUE options = argv[0];
851
+ VALUE param;
852
+ int capa = PQ_START_CAPA;
853
+ GET_PQ(pq, self);
854
+ switch (TYPE(options)) {
855
+ case T_FIXNUM:
856
+ capa = FIX2INT(options);
857
+ break;
858
+ case T_HASH:
859
+ if (!NIL_P(param = rb_hash_aref(options,
860
+ ID2SYM(id_capacity)))) {
861
+ capa = FIX2INT(param);
862
+ }
863
+ if (!NIL_P(param = rb_hash_aref(options,
864
+ ID2SYM(id_less_than)))) {
865
+ pq->proc = param;
866
+ }
867
+ break;
868
+ default:
869
+ rb_raise(rb_eArgError,
870
+ "PriorityQueue#initialize only takes a Hash or "
871
+ "an integer");
872
+
873
+ break;
874
+ }
875
+ if (capa < 0) {
876
+ rb_raise(rb_eIndexError,
877
+ "PriorityQueue must have a capacity > 0. %d < 0",
878
+ capa);
879
+ }
880
+ pq->capa = capa;
881
+ if (rb_block_given_p()) {
882
+ pq->proc = rb_block_proc();
883
+ }
884
+ if (argc > 1) {
885
+ rb_raise(rb_eArgError,
886
+ "PriorityQueue#initialize only takes one parameter");
887
+ }
888
+ }
889
+
890
+ return self;
891
+ }
892
+
893
+ /*
894
+ * call-seq:
895
+ * pq.clone -> frt_pq_clone
896
+ *
897
+ * Returns a shallow clone of the priority queue. That is only the priority
898
+ * queue is cloned, its contents are not cloned.
899
+ */
900
+ static VALUE
901
+ frb_pq_clone(VALUE self)
902
+ {
903
+ PriQ *pq, *new_pq = ALLOC(PriQ);
904
+ GET_PQ(pq, self);
905
+ memcpy(new_pq, pq, sizeof(PriQ));
906
+ new_pq->heap = FRT_ALLOC_N(VALUE, new_pq->mem_capa);
907
+ memcpy(new_pq->heap, pq->heap, sizeof(VALUE) * (new_pq->size + 1));
908
+
909
+ return Data_Wrap_Struct(cPriorityQueue, &frb_pq_mark, &frb_pq_free, new_pq);
910
+ }
911
+
912
+ /*
913
+ * call-seq:
914
+ * pq.clear -> self
915
+ *
916
+ * Clears all elements from the priority queue. The size will be reset to 0.
917
+ */
918
+ static VALUE
919
+ frb_pq_clear(VALUE self)
920
+ {
921
+ PriQ *pq;
922
+ GET_PQ(pq, self);
923
+ pq->size = 0;
924
+ return self;
925
+ }
926
+
927
+ /*
928
+ * call-seq:
929
+ * pq.insert(elem) -> self
930
+ * pq << elem -> self
931
+ *
932
+ * Insert an element into a queue. It will be inserted into the correct
933
+ * position in the queue according to its priority.
934
+ */
935
+ static VALUE
936
+ frb_pq_insert(VALUE self, VALUE elem)
937
+ {
938
+ PriQ *pq;
939
+ GET_PQ(pq, self);
940
+ if (pq->size < pq->capa) {
941
+ frb_pq_push(pq, elem);
942
+ }
943
+ else if (pq->size > 0 && frb_pq_lt(pq->proc, pq->heap[1], elem)) {
944
+ pq->heap[1] = elem;
945
+ frb_pq_down(pq);
946
+ }
947
+ /* else ignore the element */
948
+ return self;
949
+ }
950
+
951
+ /*
952
+ * call-seq:
953
+ * pq.adjust -> self
954
+ *
955
+ * Sometimes you modify the top element in the priority queue so that its
956
+ * priority changes. When you do this you need to reorder the queue and you
957
+ * do this by calling the adjust method.
958
+ */
959
+ static VALUE
960
+ frb_pq_adjust(VALUE self)
961
+ {
962
+ PriQ *pq;
963
+ GET_PQ(pq, self);
964
+ frb_pq_down(pq);
965
+ return self;
966
+ }
967
+
968
+ /*
969
+ * call-seq:
970
+ * pq.top -> elem
971
+ *
972
+ * Returns the top element in the queue but does not remove it from the
973
+ * queue.
974
+ */
975
+ static VALUE
976
+ frb_pq_top(VALUE self)
977
+ {
978
+ PriQ *pq;
979
+ GET_PQ(pq, self);
980
+ return (pq->size > 0) ? pq->heap[1] : Qnil;
981
+ }
982
+
983
+ /*
984
+ * call-seq:
985
+ * pq.pop -> elem
986
+ *
987
+ * Returns the top element in the queue removing it from the queue.
988
+ */
989
+ static VALUE
990
+ frb_pq_pop(VALUE self)
991
+ {
992
+ PriQ *pq;
993
+ GET_PQ(pq, self);
994
+ if (pq->size > 0) {
995
+ VALUE result = pq->heap[1]; /* save first value */
996
+ pq->heap[1] = pq->heap[pq->size]; /* move last to first */
997
+ pq->heap[pq->size] = Qnil;
998
+ pq->size--;
999
+ frb_pq_down(pq); /* adjust heap */
1000
+ return result;
1001
+ }
1002
+ else {
1003
+ return Qnil;
1004
+ }
1005
+ }
1006
+
1007
+ /*
1008
+ * call-seq:
1009
+ * pq.size -> integer
1010
+ *
1011
+ * Returns the size of the queue, ie. the number of elements currently stored
1012
+ * in the queue. The _size_ of a PriorityQueue can never be greater than
1013
+ * its _capacity_
1014
+ */
1015
+ static VALUE
1016
+ frb_pq_size(VALUE self)
1017
+ {
1018
+ PriQ *pq;
1019
+ GET_PQ(pq, self);
1020
+ return INT2FIX(pq->size);
1021
+ }
1022
+
1023
+ /*
1024
+ * call-seq:
1025
+ * pq.capacity -> integer
1026
+ *
1027
+ * Returns the capacity of the queue, ie. the number of elements that can be
1028
+ * stored in a Priority queue before they start to drop off the end. The
1029
+ * _size_ of a PriorityQueue can never be greater than its
1030
+ * _capacity_
1031
+ */
1032
+ static VALUE
1033
+ frb_pq_capa(VALUE self)
1034
+ {
1035
+ PriQ *pq;
1036
+ GET_PQ(pq, self);
1037
+ return INT2FIX(pq->capa);
1038
+ }
1039
+
1040
+ /*
1041
+ * Document-class: Ferret::Utils::PriorityQueue
1042
+ *
1043
+ * == Summary
1044
+ *
1045
+ * A PriorityQueue is a very useful data structure and one that needs a fast
1046
+ * implementation. Hence this priority queue is implemented in C. It is
1047
+ * pretty easy to use; basically you just insert elements into the queue and
1048
+ * pop them off.
1049
+ *
1050
+ * The elements are sorted with the lowest valued elements on the top of
1051
+ * the heap, ie the first to be popped off. Elements are ordered using the
1052
+ * less_than '<' method. To change the order of the queue you can either
1053
+ * reimplement the '<' method pass a block when you initialize the queue.
1054
+ *
1055
+ * You can also set the capacity of the PriorityQueue. Once you hit the
1056
+ * capacity, the lowest values elements are automatically popped of the top
1057
+ * of the queue as more elements are added.
1058
+ *
1059
+ * == Example
1060
+ *
1061
+ * Here is a toy example that sorts strings by their length and has a capacity
1062
+ * of 5;
1063
+ *
1064
+ * q = PriorityQueue.new(5) {|a, b| a.size < b.size}
1065
+ * q << "x"
1066
+ * q << "xxxxx"
1067
+ * q << "xxx"
1068
+ * q << "xxxx"
1069
+ * q << "xxxxxx"
1070
+ * q << "xx" # hit capacity so "x" will be popped off the top
1071
+ *
1072
+ * puts q.size #=> 5
1073
+ * word = q.pop #=> "xx"
1074
+ * q.top << "yyyy" # "xxxyyyy" will still be at the top of the queue
1075
+ * q.adjust # move "xxxyyyy" to its correct location in queue
1076
+ * word = q.pop #=> "xxxx"
1077
+ * word = q.pop #=> "xxxxx"
1078
+ * word = q.pop #=> "xxxxxx"
1079
+ * word = q.pop #=> "xxxyyyy"
1080
+ * word = q.pop #=> nil
1081
+ */
1082
+ static void
1083
+ Init_PriorityQueue(void)
1084
+ {
1085
+ /* PriorityQueue */
1086
+ cPriorityQueue = rb_define_class_under(mUtils, "PriorityQueue", rb_cObject);
1087
+ rb_define_alloc_func(cPriorityQueue, frb_pq_alloc);
1088
+
1089
+ rb_define_method(cPriorityQueue, "initialize", frb_pq_init, -1);
1090
+ rb_define_method(cPriorityQueue, "clone", frb_pq_clone, 0);
1091
+ rb_define_method(cPriorityQueue, "clear", frb_pq_clear, 0);
1092
+ rb_define_method(cPriorityQueue, "insert", frb_pq_insert, 1);
1093
+ rb_define_method(cPriorityQueue, "<<", frb_pq_insert, 1);
1094
+ rb_define_method(cPriorityQueue, "top", frb_pq_top, 0);
1095
+ rb_define_method(cPriorityQueue, "pop", frb_pq_pop, 0);
1096
+ rb_define_method(cPriorityQueue, "size", frb_pq_size, 0);
1097
+ rb_define_method(cPriorityQueue, "capacity", frb_pq_capa, 0);
1098
+ rb_define_method(cPriorityQueue, "adjust", frb_pq_adjust, 0);
1099
+ }
1100
+
1101
+ /* rdoc hack
1102
+ extern VALUE mFerret = rb_define_module("Ferret");
1103
+ */
1104
+
1105
+ /*
1106
+ * Document-module: Ferret::Utils
1107
+ *
1108
+ * The Utils module contains a number of helper classes and modules that are
1109
+ * useful when indexing with Ferret. They are;
1110
+ *
1111
+ * * BitVector
1112
+ * * MultiMapper
1113
+ * * PriorityQueue
1114
+ * * => more to come
1115
+ *
1116
+ * These helper classes could also be quite useful outside of Ferret and may
1117
+ * one day find themselves in their own separate library.
1118
+ */
1119
+ void
1120
+ Init_Utils(void)
1121
+ {
1122
+ mUtils = rb_define_module_under(mFerret, "Utils");
1123
+
1124
+ Init_BitVector();
1125
+ Init_MultiMapper();
1126
+ Init_PriorityQueue();
1127
+ }