sdsykes-ferret 0.11.6.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (195) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1555 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/inc/lang.h +48 -0
  37. data/ext/inc/threading.h +31 -0
  38. data/ext/index.c +6425 -0
  39. data/ext/index.h +961 -0
  40. data/ext/lang.h +66 -0
  41. data/ext/libstemmer.c +92 -0
  42. data/ext/libstemmer.h +79 -0
  43. data/ext/mempool.c +87 -0
  44. data/ext/mempool.h +35 -0
  45. data/ext/modules.h +162 -0
  46. data/ext/multimapper.c +310 -0
  47. data/ext/multimapper.h +51 -0
  48. data/ext/posh.c +1006 -0
  49. data/ext/posh.h +1007 -0
  50. data/ext/priorityqueue.c +151 -0
  51. data/ext/priorityqueue.h +143 -0
  52. data/ext/q_boolean.c +1608 -0
  53. data/ext/q_const_score.c +161 -0
  54. data/ext/q_filtered_query.c +209 -0
  55. data/ext/q_fuzzy.c +268 -0
  56. data/ext/q_match_all.c +148 -0
  57. data/ext/q_multi_term.c +677 -0
  58. data/ext/q_parser.c +2825 -0
  59. data/ext/q_phrase.c +1126 -0
  60. data/ext/q_prefix.c +100 -0
  61. data/ext/q_range.c +350 -0
  62. data/ext/q_span.c +2402 -0
  63. data/ext/q_term.c +337 -0
  64. data/ext/q_wildcard.c +171 -0
  65. data/ext/r_analysis.c +2575 -0
  66. data/ext/r_index.c +3472 -0
  67. data/ext/r_qparser.c +585 -0
  68. data/ext/r_search.c +4105 -0
  69. data/ext/r_store.c +513 -0
  70. data/ext/r_utils.c +963 -0
  71. data/ext/ram_store.c +471 -0
  72. data/ext/search.c +1741 -0
  73. data/ext/search.h +885 -0
  74. data/ext/similarity.c +150 -0
  75. data/ext/similarity.h +82 -0
  76. data/ext/sort.c +983 -0
  77. data/ext/stem_ISO_8859_1_danish.c +338 -0
  78. data/ext/stem_ISO_8859_1_danish.h +16 -0
  79. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  80. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  81. data/ext/stem_ISO_8859_1_english.c +1156 -0
  82. data/ext/stem_ISO_8859_1_english.h +16 -0
  83. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  84. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  85. data/ext/stem_ISO_8859_1_french.c +1276 -0
  86. data/ext/stem_ISO_8859_1_french.h +16 -0
  87. data/ext/stem_ISO_8859_1_german.c +512 -0
  88. data/ext/stem_ISO_8859_1_german.h +16 -0
  89. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  90. data/ext/stem_ISO_8859_1_italian.h +16 -0
  91. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  92. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  93. data/ext/stem_ISO_8859_1_porter.c +776 -0
  94. data/ext/stem_ISO_8859_1_porter.h +16 -0
  95. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  96. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  97. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  98. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  99. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  100. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  101. data/ext/stem_KOI8_R_russian.c +701 -0
  102. data/ext/stem_KOI8_R_russian.h +16 -0
  103. data/ext/stem_UTF_8_danish.c +344 -0
  104. data/ext/stem_UTF_8_danish.h +16 -0
  105. data/ext/stem_UTF_8_dutch.c +653 -0
  106. data/ext/stem_UTF_8_dutch.h +16 -0
  107. data/ext/stem_UTF_8_english.c +1176 -0
  108. data/ext/stem_UTF_8_english.h +16 -0
  109. data/ext/stem_UTF_8_finnish.c +808 -0
  110. data/ext/stem_UTF_8_finnish.h +16 -0
  111. data/ext/stem_UTF_8_french.c +1296 -0
  112. data/ext/stem_UTF_8_french.h +16 -0
  113. data/ext/stem_UTF_8_german.c +526 -0
  114. data/ext/stem_UTF_8_german.h +16 -0
  115. data/ext/stem_UTF_8_italian.c +1113 -0
  116. data/ext/stem_UTF_8_italian.h +16 -0
  117. data/ext/stem_UTF_8_norwegian.c +302 -0
  118. data/ext/stem_UTF_8_norwegian.h +16 -0
  119. data/ext/stem_UTF_8_porter.c +794 -0
  120. data/ext/stem_UTF_8_porter.h +16 -0
  121. data/ext/stem_UTF_8_portuguese.c +1055 -0
  122. data/ext/stem_UTF_8_portuguese.h +16 -0
  123. data/ext/stem_UTF_8_russian.c +709 -0
  124. data/ext/stem_UTF_8_russian.h +16 -0
  125. data/ext/stem_UTF_8_spanish.c +1137 -0
  126. data/ext/stem_UTF_8_spanish.h +16 -0
  127. data/ext/stem_UTF_8_swedish.c +313 -0
  128. data/ext/stem_UTF_8_swedish.h +16 -0
  129. data/ext/stopwords.c +401 -0
  130. data/ext/store.c +692 -0
  131. data/ext/store.h +777 -0
  132. data/ext/term_vectors.c +352 -0
  133. data/ext/threading.h +31 -0
  134. data/ext/utilities.c +446 -0
  135. data/ext/win32.h +54 -0
  136. data/lib/ferret.rb +29 -0
  137. data/lib/ferret/browser.rb +246 -0
  138. data/lib/ferret/browser/s/global.js +192 -0
  139. data/lib/ferret/browser/s/style.css +148 -0
  140. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  141. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  142. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  143. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  144. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  145. data/lib/ferret/browser/views/layout.rhtml +22 -0
  146. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  147. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  148. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  149. data/lib/ferret/browser/webrick.rb +14 -0
  150. data/lib/ferret/document.rb +130 -0
  151. data/lib/ferret/field_infos.rb +44 -0
  152. data/lib/ferret/index.rb +786 -0
  153. data/lib/ferret/number_tools.rb +157 -0
  154. data/lib/ferret_version.rb +3 -0
  155. data/setup.rb +1555 -0
  156. data/test/test_all.rb +5 -0
  157. data/test/test_helper.rb +24 -0
  158. data/test/threading/number_to_spoken.rb +132 -0
  159. data/test/threading/thread_safety_index_test.rb +79 -0
  160. data/test/threading/thread_safety_read_write_test.rb +76 -0
  161. data/test/threading/thread_safety_test.rb +133 -0
  162. data/test/unit/analysis/tc_analyzer.rb +548 -0
  163. data/test/unit/analysis/tc_token_stream.rb +646 -0
  164. data/test/unit/index/tc_index.rb +762 -0
  165. data/test/unit/index/tc_index_reader.rb +699 -0
  166. data/test/unit/index/tc_index_writer.rb +437 -0
  167. data/test/unit/index/th_doc.rb +315 -0
  168. data/test/unit/largefile/tc_largefile.rb +46 -0
  169. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  170. data/test/unit/search/tc_filter.rb +135 -0
  171. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  172. data/test/unit/search/tc_index_searcher.rb +61 -0
  173. data/test/unit/search/tc_multi_searcher.rb +128 -0
  174. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  175. data/test/unit/search/tc_search_and_sort.rb +179 -0
  176. data/test/unit/search/tc_sort.rb +49 -0
  177. data/test/unit/search/tc_sort_field.rb +27 -0
  178. data/test/unit/search/tc_spans.rb +190 -0
  179. data/test/unit/search/tm_searcher.rb +384 -0
  180. data/test/unit/store/tc_fs_store.rb +77 -0
  181. data/test/unit/store/tc_ram_store.rb +35 -0
  182. data/test/unit/store/tm_store.rb +34 -0
  183. data/test/unit/store/tm_store_lock.rb +68 -0
  184. data/test/unit/tc_document.rb +81 -0
  185. data/test/unit/ts_analysis.rb +2 -0
  186. data/test/unit/ts_index.rb +2 -0
  187. data/test/unit/ts_largefile.rb +4 -0
  188. data/test/unit/ts_query_parser.rb +2 -0
  189. data/test/unit/ts_search.rb +2 -0
  190. data/test/unit/ts_store.rb +2 -0
  191. data/test/unit/ts_utils.rb +2 -0
  192. data/test/unit/utils/tc_bit_vector.rb +295 -0
  193. data/test/unit/utils/tc_number_tools.rb +117 -0
  194. data/test/unit/utils/tc_priority_queue.rb +106 -0
  195. metadata +285 -0
@@ -0,0 +1,540 @@
1
+ #include "bitvector.h"
2
+ #include <string.h>
3
+
4
+ BitVector *bv_new_capa(int capa)
5
+ {
6
+ BitVector *bv = ALLOC(BitVector);
7
+
8
+ /* The capacity passed by the user is number of bits allowed, however we
9
+ * store capacity as the number of words (U32) allocated. */
10
+ bv->capa = (capa >> 5) + 1;
11
+ bv->bits = ALLOC_AND_ZERO_N(f_u32, bv->capa);
12
+
13
+ bv->size = 0;
14
+ bv->count = 0;
15
+ bv->curr_bit = -1;
16
+ bv->extends_as_ones = 0;
17
+ bv->ref_cnt = 1;
18
+ return bv;
19
+ }
20
+
21
+ BitVector *bv_new()
22
+ {
23
+ return bv_new_capa(BV_INIT_CAPA);
24
+ }
25
+
26
+ void bv_destroy(BitVector * bv)
27
+ {
28
+ if (--(bv->ref_cnt) == 0) {
29
+ free(bv->bits);
30
+ free(bv);
31
+ }
32
+ }
33
+
34
+ void bv_set(BitVector * bv, int bit)
35
+ {
36
+ f_u32 *word_p;
37
+ int word = bit >> 5;
38
+ f_u32 bitmask = 1 << (bit & 31);
39
+
40
+ /* Check to see if we need to grow the BitVector */
41
+ if (bit >= bv->size) {
42
+ bv->size = bit + 1; /* size is max range of bits set */
43
+ if (word >= bv->capa) {
44
+ int capa = bv->capa << 1;
45
+ while (capa <= word) {
46
+ capa <<= 1;
47
+ }
48
+ REALLOC_N(bv->bits, f_u32, capa);
49
+ memset(bv->bits + bv->capa, (bv->extends_as_ones ? 0xFF : 0),
50
+ sizeof(f_u32) * (capa - bv->capa));
51
+ bv->capa = capa;
52
+ }
53
+ }
54
+
55
+ /* Set the required bit */
56
+ word_p = &(bv->bits[word]);
57
+ if ((bitmask & *word_p) == 0) {
58
+ bv->count++; /* update count */
59
+ *word_p |= bitmask;
60
+ }
61
+ }
62
+
63
+ /*
64
+ * This method relies on the fact that enough space has been set for the bits
65
+ * to be set. You need to create the BitVector using bv_new_capa(capa) with
66
+ * a capacity larger than any bit being set.
67
+ */
68
+ void bv_set_fast(BitVector * bv, int bit)
69
+ {
70
+ bv->count++;
71
+ bv->size = bit;
72
+ bv->bits[bit >> 5] |= 1 << (bit & 31);
73
+ }
74
+
75
+ int bv_get(BitVector * bv, int bit)
76
+ {
77
+ /* out of range so return 0 because it can't have been set */
78
+ if (bit >= bv->size) {
79
+ return bv->extends_as_ones;
80
+ }
81
+ return (bv->bits[bit >> 5] >> (bit & 31)) & 0x01;
82
+ }
83
+
84
+ void bv_clear(BitVector * bv)
85
+ {
86
+ memset(bv->bits, 0, bv->capa * sizeof(f_u32));
87
+ bv->extends_as_ones = 0;
88
+ bv->count = 0;
89
+ bv->size = 0;
90
+ }
91
+
92
+ /*
93
+ * FIXME: if the top set bit is unset, size is not adjusted. This will not
94
+ * cause any bugs in this code but could cause problems if users are relying
95
+ * on the fact that size is accurate.
96
+ */
97
+ void bv_unset(BitVector * bv, int bit)
98
+ {
99
+ f_u32 *word_p;
100
+ f_u32 bitmask;
101
+ int word = bit >> 5;
102
+
103
+ if (bit >= bv->size) {
104
+ bv->size = bit + 1; /* size is max range of bits set */
105
+ if (word >= bv->capa) {
106
+ int capa = bv->capa << 1;
107
+
108
+ while (capa <= word) {
109
+ capa <<= 1;
110
+ }
111
+ REALLOC_N(bv->bits, f_u32, capa);
112
+ memset(bv->bits + bv->capa, (bv->extends_as_ones ? 0xFF : 0),
113
+ sizeof(f_u32) * (capa - bv->capa));
114
+ bv->capa = capa;
115
+ }
116
+ }
117
+
118
+ word_p = &(bv->bits[word]);
119
+ bitmask = 1 << (bit & 31);
120
+ if ((bitmask & *word_p) > 0) {
121
+ bv->count--; /* update count */
122
+ *word_p &= ~bitmask;
123
+ }
124
+ }
125
+
126
+ /* Table of bits per char. This table is used by the bv_recount method to
127
+ * optimize the counting of bits */
128
+ static const uchar BYTE_COUNTS[] = {
129
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
130
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
131
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
132
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
133
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
134
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
135
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
136
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
137
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
138
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
139
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
140
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
141
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
142
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
143
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
144
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
145
+ };
146
+
147
+ int bv_recount(BitVector * bv)
148
+ {
149
+ /* if the vector has been modified */
150
+ int i, c = 0;
151
+ uchar *bytes = (uchar *)bv->bits; /* count by character */
152
+ const int num_bytes = (((bv->size >> 5) + 1) << 2);
153
+ if (bv->extends_as_ones) {
154
+ for (i = 0; i < num_bytes; i++) {
155
+ c += BYTE_COUNTS[~(bytes[i]) & 0xFF]; /* sum bits per char */
156
+ }
157
+ }
158
+ else {
159
+ for (i = 0; i < num_bytes; i++) {
160
+ c += BYTE_COUNTS[bytes[i]]; /* sum bits per char */
161
+ }
162
+ }
163
+ bv->count = c;
164
+ return c;
165
+ }
166
+
167
+ void bv_scan_reset(BitVector * bv)
168
+ {
169
+ bv->curr_bit = -1;
170
+ }
171
+
172
+ /* Table showing the number of trailing 0s in a char. This is used to optimize
173
+ * the bv_scan_next method. */
174
+ const int NUM_TRAILING_ZEROS[] = {
175
+ 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
176
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
177
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
178
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
179
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
180
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
181
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
182
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
183
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
184
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
185
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
186
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
187
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
188
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
189
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
190
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
191
+ };
192
+
193
+ /*
194
+ * This method is highly optimized, hence the loop unrolling
195
+ */
196
+ static INLINE int bv_get_1_offset(f_u32 word)
197
+ {
198
+ if (word & 0xff) {
199
+ return NUM_TRAILING_ZEROS[word & 0xff];
200
+ }
201
+ else {
202
+ word >>= 8;
203
+ if (word & 0xff) {
204
+ return NUM_TRAILING_ZEROS[word & 0xff] + 8;
205
+ }
206
+ else {
207
+ word >>= 8;
208
+ if (word & 0xff) {
209
+ return NUM_TRAILING_ZEROS[word & 0xff] + 16;
210
+ }
211
+ else {
212
+ word >>= 8;
213
+ return NUM_TRAILING_ZEROS[word & 0xff] + 24;
214
+ }
215
+ }
216
+ }
217
+ }
218
+ /*
219
+ * second fastest;
220
+ *
221
+ * while ((inc = NUM_TRAILING_ZEROS[word & 0xff]) == 8) {
222
+ * word >>= 8;
223
+ * bit_pos += 8;
224
+ * }
225
+ *
226
+ * third fastest;
227
+ *
228
+ * bit_pos += inc;
229
+ * if ((word & 0xffff) == 0) {
230
+ * bit_pos += 16;
231
+ * word >>= 16;
232
+ * }
233
+ * if ((word & 0xff) == 0) {
234
+ * bit_pos += 8;
235
+ * word >>= 8;
236
+ * }
237
+ * bit_pos += NUM_TRAILING_ZEROS[word & 0xff];
238
+ */
239
+
240
+ int bv_scan_next_from(BitVector * bv, register const int from)
241
+ {
242
+ register const f_u32 *const bits = bv->bits;
243
+ register const int word_size = (bv->size >> 5) + 1;
244
+ register int word_pos = from >> 5;
245
+ register int bit_pos = (from & 31);
246
+ register f_u32 word = bits[word_pos] >> bit_pos;
247
+
248
+ if (from >= bv->size) {
249
+ return -1;
250
+ }
251
+ if (word == 0) {
252
+ bit_pos = 0;
253
+ do {
254
+ word_pos++;
255
+ if (word_pos >= word_size) {
256
+ return -1;
257
+ }
258
+ } while (bits[word_pos] == 0);
259
+ word = bits[word_pos];
260
+ }
261
+
262
+ /* check the word a byte at a time as the NUM_TRAILING_ZEROS table would
263
+ * be too large for 32-bit integer or even a 16-bit integer */
264
+ bit_pos += bv_get_1_offset(word);
265
+
266
+ return bv->curr_bit = ((word_pos << 5) + bit_pos);
267
+ }
268
+
269
+ int bv_scan_next(BitVector * bv)
270
+ {
271
+ return bv_scan_next_from(bv, bv->curr_bit + 1);
272
+ }
273
+
274
+ int bv_scan_next_unset_from(BitVector * bv, register const int from)
275
+ {
276
+ register const f_u32 *const bits = bv->bits;
277
+ register const int word_size = (bv->size >> 5) + 1;
278
+ register int word_pos = from >> 5;
279
+ register int bit_pos = (from & 31);
280
+ register f_u32 word = ~(~(bits[word_pos]) >> bit_pos);
281
+
282
+ if (from >= bv->size) {
283
+ return -1;
284
+ }
285
+ if (word == 0xFFFFFFFF) {
286
+ bit_pos = 0;
287
+ do {
288
+ word_pos++;
289
+ if (word_pos >= word_size) {
290
+ return -1;
291
+ }
292
+ } while (bits[word_pos] == 0xFFFFFFFF);
293
+ word = bits[word_pos];
294
+ }
295
+
296
+ bit_pos += bv_get_1_offset(~word);
297
+
298
+ return bv->curr_bit = ((word_pos << 5) + bit_pos);
299
+ }
300
+
301
+ int bv_scan_next_unset(BitVector * bv)
302
+ {
303
+ return bv_scan_next_unset_from(bv, bv->curr_bit + 1);
304
+ }
305
+
306
+ int bv_eq(BitVector *bv1, BitVector *bv2)
307
+ {
308
+ if (bv1 == bv2) {
309
+ return true;
310
+ }
311
+ else if (bv1->extends_as_ones != bv2->extends_as_ones) {
312
+ return false;
313
+ }
314
+ else {
315
+ f_u32 *bits = bv1->bits;
316
+ f_u32 *bits2 = bv2->bits;
317
+ int min_size = min2(bv1->size, bv2->size);
318
+ int word_size = (min_size >> 5) + 1;
319
+ int ext_word_size = 0;
320
+
321
+ int i;
322
+
323
+ for (i = 0; i < word_size; i++) {
324
+ if (bits[i] != bits2[i]) {
325
+ return false;
326
+ }
327
+ }
328
+ if (bv1->size > min_size) {
329
+ bits = bv1->bits;
330
+ ext_word_size = (bv1->size >> 5) + 1;
331
+ }
332
+ else if (bv2->size > min_size) {
333
+ bits = bv2->bits;
334
+ ext_word_size = (bv2->size >> 5) + 1;
335
+ }
336
+ if (ext_word_size) {
337
+ const f_u32 expected = (bv1->extends_as_ones ? 0xFFFFFFFF : 0);
338
+ for (i = word_size; i < ext_word_size; i++) {
339
+ if (bits[i] != expected) {
340
+ return false;
341
+ }
342
+ }
343
+ }
344
+ }
345
+ return true;
346
+ }
347
+
348
+ unsigned long bv_hash(BitVector *bv)
349
+ {
350
+ unsigned long hash = 0;
351
+ const f_u32 empty_word = bv->extends_as_ones ? 0xFFFFFFFF : 0;
352
+ int i;
353
+ for (i = (bv->size >> 5); i >= 0; i--) {
354
+ const f_u32 word = bv->bits[i];
355
+ if (word != empty_word) {
356
+ hash = (hash << 1) ^ word;
357
+ }
358
+ }
359
+ hash = (hash << 1) | bv->extends_as_ones;
360
+ return hash;
361
+ }
362
+
363
+ static INLINE void bv_recapa(BitVector *bv, int new_capa)
364
+ {
365
+ if (bv->capa < new_capa) {
366
+ REALLOC_N(bv->bits, f_u32, new_capa);
367
+ memset(bv->bits + bv->capa, (bv->extends_as_ones ? 0xFF : 0),
368
+ sizeof(f_u32) * (new_capa - bv->capa));
369
+ bv->capa = new_capa;
370
+ }
371
+ }
372
+
373
+ static BitVector *bv_and_i(BitVector *bv, BitVector *bv1, BitVector *bv2)
374
+ {
375
+ int i;
376
+ int size;
377
+ int word_size;
378
+ int capa = 4;
379
+
380
+ if (bv1->extends_as_ones && bv2->extends_as_ones) {
381
+ size = max2(bv1->size, bv2->size);
382
+ bv->extends_as_ones = true;
383
+ }
384
+ else if (bv1->extends_as_ones || bv2->extends_as_ones) {
385
+ size = max2(bv1->size, bv2->size);
386
+ bv->extends_as_ones = false;
387
+ }
388
+ else {
389
+ size = min2(bv1->size, bv2->size);
390
+ bv->extends_as_ones = false;
391
+ }
392
+
393
+ word_size = (size >> 5) + 1;
394
+ while (capa < word_size) {
395
+ capa <<= 1;
396
+ }
397
+ bv_recapa(bv1, capa);
398
+ bv_recapa(bv2, capa);
399
+ REALLOC_N(bv->bits, f_u32, capa);
400
+ bv->capa = capa;
401
+ bv->size = size;
402
+
403
+ memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
404
+ sizeof(f_u32) * (capa - word_size));
405
+
406
+ for (i = 0; i < word_size; i++) {
407
+ bv->bits[i] = bv1->bits[i] & bv2->bits[i];
408
+ }
409
+
410
+ bv_recount(bv);
411
+ return bv;
412
+ }
413
+
414
+ BitVector *bv_and(BitVector *bv1, BitVector *bv2)
415
+ {
416
+ return bv_and_i(bv_new(), bv1, bv2);
417
+ }
418
+
419
+ BitVector *bv_and_x(BitVector *bv1, BitVector *bv2)
420
+ {
421
+ return bv_and_i(bv1, bv1, bv2);
422
+ }
423
+
424
+ static BitVector *bv_or_i(BitVector *bv, BitVector *bv1, BitVector *bv2)
425
+ {
426
+ int i;
427
+ int max_size = max2(bv1->size, bv2->size);
428
+ int word_size = (max_size >> 5) + 1;
429
+ int capa = 4;
430
+ while (capa < word_size) {
431
+ capa <<= 1;
432
+ }
433
+ REALLOC_N(bv->bits, f_u32, capa);
434
+ bv->capa = capa;
435
+ bv->size = max_size;
436
+
437
+ bv_recapa(bv1, capa);
438
+ bv_recapa(bv2, capa);
439
+
440
+ if (bv1->extends_as_ones || bv2->extends_as_ones) {
441
+ bv->extends_as_ones = true;
442
+ }
443
+ else {
444
+ bv->extends_as_ones = false;
445
+ }
446
+
447
+ memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
448
+ sizeof(f_u32) * (capa - word_size));
449
+
450
+ for (i = 0; i < word_size; i++) {
451
+ bv->bits[i] = bv1->bits[i] | bv2->bits[i];
452
+ }
453
+ bv_recount(bv);
454
+ return bv;
455
+ }
456
+
457
+ BitVector *bv_or(BitVector *bv1, BitVector *bv2)
458
+ {
459
+ return bv_or_i(bv_new(), bv1, bv2);
460
+ }
461
+
462
+ BitVector *bv_or_x(BitVector *bv1, BitVector *bv2)
463
+ {
464
+ return bv_or_i(bv1, bv1, bv2);
465
+ }
466
+
467
+ static BitVector *bv_xor_i(BitVector *bv, BitVector *bv1, BitVector *bv2)
468
+ {
469
+ int i;
470
+ int max_size = max2(bv1->size, bv2->size);
471
+ int word_size = (max_size >> 5) + 1;
472
+ int capa = 4;
473
+ while (capa < word_size) {
474
+ capa <<= 1;
475
+ }
476
+ REALLOC_N(bv->bits, f_u32, capa);
477
+ bv->capa = capa;
478
+ bv->size = max_size;
479
+
480
+ bv_recapa(bv1, capa);
481
+ bv_recapa(bv2, capa);
482
+
483
+ if (bv1->extends_as_ones != bv2->extends_as_ones) {
484
+ bv->extends_as_ones = true;
485
+ }
486
+ else {
487
+ bv->extends_as_ones = false;
488
+ }
489
+
490
+ memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
491
+ sizeof(f_u32) * (capa - word_size));
492
+
493
+ for (i = 0; i < word_size; i++) {
494
+ bv->bits[i] = bv1->bits[i] ^ bv2->bits[i];
495
+ }
496
+ bv_recount(bv);
497
+ return bv;
498
+ }
499
+
500
+ BitVector *bv_xor(BitVector *bv1, BitVector *bv2)
501
+ {
502
+ return bv_xor_i(bv_new(), bv1, bv2);
503
+ }
504
+
505
+ BitVector *bv_xor_x(BitVector *bv1, BitVector *bv2)
506
+ {
507
+ return bv_xor_i(bv1, bv1, bv2);
508
+ }
509
+
510
+ static BitVector *bv_not_i(BitVector *bv, BitVector *bv1)
511
+ {
512
+ int i;
513
+ int word_size = (bv1->size >> 5) + 1;
514
+ int capa = 4;
515
+ while (capa < word_size) {
516
+ capa <<= 1;
517
+ }
518
+ REALLOC_N(bv->bits, f_u32, capa);
519
+ bv->capa = capa;
520
+ bv->size = bv1->size;
521
+ bv->extends_as_ones = 1 - bv1->extends_as_ones;
522
+ memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
523
+ sizeof(f_u32) * (capa - word_size));
524
+
525
+ for (i = 0; i < word_size; i++) {
526
+ bv->bits[i] = ~(bv1->bits[i]);
527
+ }
528
+ bv_recount(bv);
529
+ return bv;
530
+ }
531
+
532
+ BitVector *bv_not(BitVector *bv1)
533
+ {
534
+ return bv_not_i(bv_new(), bv1);
535
+ }
536
+
537
+ BitVector *bv_not_x(BitVector *bv1)
538
+ {
539
+ return bv_not_i(bv1, bv1);
540
+ }