jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
data/ext/bitvector.c ADDED
@@ -0,0 +1,96 @@
1
+ #include "bitvector.h"
2
+ #include "internal.h"
3
+ #include <string.h>
4
+
5
+ BitVector *bv_new_capa(int capa)
6
+ {
7
+ BitVector *bv = ALLOC_AND_ZERO(BitVector);
8
+
9
+ /* The capacity passed by the user is number of bits allowed, however we
10
+ * store capacity as the number of words (U32) allocated. */
11
+ bv->capa = max2(TO_WORD(capa), 4);
12
+ bv->bits = ALLOC_AND_ZERO_N(u32, bv->capa);
13
+ bv->curr_bit = -1;
14
+ bv->ref_cnt = 1;
15
+ return bv;
16
+ }
17
+
18
+ BitVector *bv_new()
19
+ {
20
+ return bv_new_capa(BV_INIT_CAPA);
21
+ }
22
+
23
+ void bv_destroy(BitVector *bv)
24
+ {
25
+ if (--(bv->ref_cnt) == 0) {
26
+ free(bv->bits);
27
+ free(bv);
28
+ }
29
+ }
30
+
31
+ void bv_clear(BitVector *bv)
32
+ {
33
+ memset(bv->bits, 0, bv->capa * sizeof(u32));
34
+ bv->extends_as_ones = 0;
35
+ bv->count = 0;
36
+ bv->size = 0;
37
+ }
38
+
39
+ void bv_scan_reset(BitVector *bv)
40
+ {
41
+ bv->curr_bit = -1;
42
+ }
43
+
44
+ int bv_eq(BitVector *bv1, BitVector *bv2)
45
+ {
46
+ if (bv1 == bv2) {
47
+ return true;
48
+ }
49
+
50
+ if (bv1->extends_as_ones != bv2->extends_as_ones) {
51
+ return false;
52
+ }
53
+
54
+ u32 *bits = bv1->bits;
55
+ u32 *bits2 = bv2->bits;
56
+ int min_size = min2(bv1->size, bv2->size);
57
+ int word_size = TO_WORD(min_size);
58
+ int ext_word_size = 0;
59
+ int i;
60
+
61
+ for (i = 0; i < word_size; i++) {
62
+ if (bits[i] != bits2[i]) {
63
+ return false;
64
+ }
65
+ }
66
+ if (bv1->size > min_size) {
67
+ bits = bv1->bits;
68
+ ext_word_size = TO_WORD(bv1->size);
69
+ }
70
+ else if (bv2->size > min_size) {
71
+ bits = bv2->bits;
72
+ ext_word_size = TO_WORD(bv2->size);
73
+ }
74
+ if (ext_word_size) {
75
+ const u32 expected = (bv1->extends_as_ones ? 0xFFFFFFFF : 0);
76
+ for (i = word_size; i < ext_word_size; i++) {
77
+ if (bits[i] != expected) {
78
+ return false;
79
+ }
80
+ }
81
+ }
82
+ return true;
83
+ }
84
+
85
+ unsigned long bv_hash(BitVector *bv)
86
+ {
87
+ unsigned long hash = 0;
88
+ const u32 empty_word = bv->extends_as_ones ? 0xFFFFFFFF : 0;
89
+ int i;
90
+ for (i = TO_WORD(bv->size) - 1; i >= 0; i--) {
91
+ const u32 word = bv->bits[i];
92
+ if (word != empty_word)
93
+ hash = (hash << 1) ^ word;
94
+ }
95
+ return (hash << 1) | bv->extends_as_ones;
96
+ }
data/ext/bitvector.h ADDED
@@ -0,0 +1,594 @@
1
+ #ifndef FRT_BIT_VECTOR_H
2
+ #define FRT_BIT_VECTOR_H
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include "global.h"
9
+
10
+ #define FRT_BV_INIT_CAPA 256
11
+
12
+ typedef struct FrtBitVector
13
+ {
14
+ /** The bits are held in an array of 32-bit integers */
15
+ frt_u32 *bits;
16
+
17
+ /** size is equal to 1 + the highest order bit set */
18
+ int size;
19
+
20
+ /** capa is the number of words (U32) allocated for the bits */
21
+ int capa;
22
+
23
+ /** count is the running count of bits set. This is kept up to
24
+ * date by frt_bv_set and frt_bv_unset. You can reset this value
25
+ * by calling frt_bv_recount */
26
+ int count;
27
+
28
+ /** curr_bit is used by scan_next to record the previously scanned bit */
29
+ int curr_bit;
30
+
31
+ bool extends_as_ones : 1;
32
+ int ref_cnt;
33
+ } FrtBitVector;
34
+
35
+ /**
36
+ * Create a new FrtBitVector with a capacity of
37
+ * +FRT_BV_INIT_CAPA+. Note that the FrtBitVector is growable and will
38
+ * adjust it's capacity when you use frt_bv_set.
39
+ *
40
+ * @return FrtBitVector with a capacity of +FRT_BV_INIT_CAPA+.
41
+ */
42
+ extern FRT_ATTR_MALLOC
43
+ FrtBitVector *frt_bv_new();
44
+
45
+ /**
46
+ * Create a new FrtBitVector with a capacity of +capa+. Note that the
47
+ * FrtBitVector is growable and will adjust it's capacity when you use
48
+ * frt_bv_set.
49
+ *
50
+ * @param capa the initial capacity of the FrtBitVector
51
+ * @return FrtBitVector with a capacity of +capa+.
52
+ */
53
+ extern FRT_ATTR_MALLOC
54
+ FrtBitVector *frt_bv_new_capa(int capa);
55
+
56
+ /**
57
+ * Destroy a FrtBitVector, freeing all memory allocated to that
58
+ * FrtBitVector
59
+ *
60
+ * @param bv FrtBitVector to destroy
61
+ */
62
+ extern void frt_bv_destroy(FrtBitVector *bv);
63
+
64
+ /**
65
+ * Set the bit at position +index+ with +value+. If +index+ is outside
66
+ * of the range of the FrtBitVector, that is >= FrtBitVector.size,
67
+ * FrtBitVector.size will be set to +index+ + 1. If it is greater than
68
+ * the capacity of the FrtBitVector, the capacity will be expanded to
69
+ * accomodate.
70
+ *
71
+ * @param bv the FrtBitVector to set the bit in
72
+ * @param index the index of the bit to set
73
+ * @param value the boolean value
74
+ */
75
+
76
+ /*
77
+ * FIXME: if the top set bit is unset, size is not adjusted. This will not
78
+ * cause any bugs in this code but could cause problems if users are relying
79
+ * on the fact that size is accurate.
80
+ */
81
+ static FRT_ATTR_ALWAYS_INLINE
82
+ void frt_bv_set_value(FrtBitVector *bv, int bit, bool value)
83
+ {
84
+ frt_u32 *word_p;
85
+ int word = bit >> 5;
86
+ frt_u32 bitmask = 1 << (bit & 31);
87
+
88
+ /* Check to see if we need to grow the BitVector */
89
+ if (unlikely(bit >= bv->size)) {
90
+ bv->size = bit + 1; /* size is max range of bits set */
91
+ if (word >= bv->capa) {
92
+ int capa = bv->capa << 1;
93
+ while (capa <= word) {
94
+ capa <<= 1;
95
+ }
96
+ FRT_REALLOC_N(bv->bits, frt_u32, capa);
97
+ memset(bv->bits + bv->capa, (bv->extends_as_ones ? 0xFF : 0),
98
+ sizeof(frt_u32) * (capa - bv->capa));
99
+ bv->capa = capa;
100
+ }
101
+ }
102
+
103
+ /* Set the required bit */
104
+ word_p = &(bv->bits[word]);
105
+ if ((!!(bitmask & *word_p)) != value) {
106
+ if (value) {
107
+ bv->count++;
108
+ *word_p |= bitmask;
109
+ }
110
+ else {
111
+ bv->count--;
112
+ *word_p &= ~bitmask;
113
+ }
114
+ }
115
+ }
116
+
117
+ /**
118
+ * Set the bit at position +index+. If +index+ is outside of the range
119
+ * of the FrtBitVector, that is >= FrtBitVector.size,
120
+ * FrtBitVector.size will be set to +index+ + 1. If it is greater than
121
+ * the capacity of the FrtBitVector, the capacity will be expanded to
122
+ * accomodate.
123
+ *
124
+ * @param bv the FrtBitVector to set the bit in
125
+ * @param index the index of the bit to set
126
+ */
127
+ static FRT_ATTR_ALWAYS_INLINE
128
+ void frt_bv_set(FrtBitVector *bv, int bit)
129
+ {
130
+ frt_bv_set_value(bv, bit, 1);
131
+ }
132
+
133
+ /**
134
+ * Unsafely set the bit at position +index+. If you choose to use this
135
+ * function you must create the FrtBitVector with a large enough
136
+ * capacity to accomodate all of the frt_bv_set_fast operations. You
137
+ * must also set bits in order and only one time per bit. Otherwise,
138
+ * use the safe frt_bv_set function.
139
+ *
140
+ * So this is ok;
141
+ * <pre>
142
+ * FrtBitVector *bv = frt_bv_new_capa(1000);
143
+ * frt_bv_set_fast(bv, 900);
144
+ * frt_bv_set_fast(bv, 920);
145
+ * frt_bv_set_fast(bv, 999);
146
+ * </pre>
147
+ *
148
+ * While these are not ok;
149
+ * <pre>
150
+ * FrtBitVector *bv = frt_bv_new_capa(90);
151
+ * frt_bv_set_fast(bv, 80);
152
+ * frt_bv_set_fast(bv, 79); // <= Bad: Out of Order
153
+ * frt_bv_set_fast(bv, 80); // <= Bad: Already set
154
+ * frt_bv_set_fast(bv, 90); // <= Bad: Out of Range. index must be < capa
155
+ * </pre>
156
+ *
157
+ * @param bv the FrtBitVector to set the bit in
158
+ * @param index the index of the bit to set
159
+ */
160
+ static FRT_ATTR_ALWAYS_INLINE
161
+ void frt_bv_set_fast(FrtBitVector *bv, int bit)
162
+ {
163
+ bv->count++;
164
+ bv->size = bit + 1;
165
+ bv->bits[bit >> 5] |= (1 << (bit & 31));
166
+ }
167
+
168
+ /**
169
+ * Return 1 if the bit at +index+ was set or 0 otherwise. If +index+
170
+ * is out of range, that is greater then the BitVectors capacity, it
171
+ * will also return 0.
172
+ *
173
+ * @param bv the FrtBitVector to check in
174
+ * @param index the index of the bit to check
175
+ * @return 1 if the bit was set, 0 otherwise
176
+ */
177
+ static FRT_ATTR_ALWAYS_INLINE
178
+ int frt_bv_get(FrtBitVector *bv, int bit)
179
+ {
180
+ /* out of range so return 0 because it can't have been set */
181
+ if (unlikely(bit >= bv->size)) {
182
+ return bv->extends_as_ones;
183
+ }
184
+ return (bv->bits[bit >> 5] >> (bit & 31)) & 0x01;
185
+ }
186
+
187
+ /**
188
+ * Unset the bit at position +index+. If the +index+ was out of range,
189
+ * that is greater than the BitVectors capacity then do
190
+ * nothing. (frt_bv_get will return 0 in this case anyway).
191
+ *
192
+ * @param bv the FrtBitVector to unset the bit in
193
+ * @param index the index of the bit to unset
194
+ */
195
+ static FRT_ATTR_ALWAYS_INLINE
196
+ void frt_bv_unset(FrtBitVector *bv, int bit)
197
+ {
198
+ frt_bv_set_value(bv, bit, 0);
199
+ }
200
+
201
+ /**
202
+ * Clear all set bits. This function will set all set bits to 0.
203
+ *
204
+ * @param bv the FrtBitVector to clear
205
+ */
206
+ extern void frt_bv_clear(FrtBitVector *bv);
207
+
208
+ /**
209
+ * Resets the set bit count by running through the whole FrtBitVector
210
+ * and counting all set bits. A running count of the bits is kept by
211
+ * frt_bv_set, *frt_bv_get and frt_bv_set_fast so this function is
212
+ * only necessary if the count could have been corrupted somehow or if
213
+ * the FrtBitVector has been constructed in a different way (for
214
+ * example being read from the file_system).
215
+ *
216
+ * @param bv the FrtBitVector to count the bits in
217
+ * @return the number of set bits in the FrtBitVector. FrtBitVector.count is also
218
+ * set
219
+ */
220
+ static FRT_ATTR_ALWAYS_INLINE
221
+ int frt_bv_recount(FrtBitVector *bv)
222
+ {
223
+ unsigned int extra = ((bv->size & 31) >> 3) + 1;
224
+ unsigned int len = bv->size >> 5;
225
+ unsigned int idx, count = 0;
226
+
227
+ if (bv->extends_as_ones) {
228
+ for (idx = 0; idx < len; ++idx) {
229
+ count += frt_count_zeros(bv->bits[idx]);
230
+ }
231
+ switch (extra) {
232
+ case 4: count += frt_count_zeros(bv->bits[idx] | 0x00ffffff);
233
+ case 3: count += frt_count_zeros(bv->bits[idx] | 0xff00ffff);
234
+ case 2: count += frt_count_zeros(bv->bits[idx] | 0xffff00ff);
235
+ case 1: count += frt_count_zeros(bv->bits[idx] | 0xffffff00);
236
+ }
237
+ }
238
+ else {
239
+ for (idx = 0; idx < len; ++idx) {
240
+ count += frt_count_ones(bv->bits[idx]);
241
+ }
242
+ switch (extra) {
243
+ case 4: count += frt_count_ones(bv->bits[idx] & 0xff000000);
244
+ case 3: count += frt_count_ones(bv->bits[idx] & 0x00ff0000);
245
+ case 2: count += frt_count_ones(bv->bits[idx] & 0x0000ff00);
246
+ case 1: count += frt_count_ones(bv->bits[idx] & 0x000000ff);
247
+ }
248
+ }
249
+ return bv->count = count;
250
+ }
251
+
252
+ /**
253
+ * Reset the FrtBitVector for scanning. This function should be called
254
+ * before using frt_bv_scan_next to scan through all set bits in the
255
+ * FrtBitVector. This is not necessary when using
256
+ * frt_bv_scan_next_from.
257
+ *
258
+ * @param bv the FrtBitVector to reset for scanning
259
+ */
260
+ extern void frt_bv_scan_reset(FrtBitVector *bv);
261
+
262
+ /**
263
+ * Scan the FrtBitVector for the next set bit after +from+. If no more
264
+ * bits are set then return -1, otherwise return the index of teh next
265
+ * set bit.
266
+ *
267
+ * @param bv the FrtBitVector to scan
268
+ * @return the next set bit's index or -1 if no more bits are set
269
+ */
270
+ static FRT_ATTR_ALWAYS_INLINE
271
+ int frt_bv_scan_next_from(FrtBitVector *bv, const int bit)
272
+ {
273
+ frt_u32 pos = bit >> 5;
274
+ frt_u32 word = bv->bits[pos];
275
+
276
+ if (bit >= bv->size)
277
+ return -1;
278
+
279
+ /* Keep only the bits above this position */
280
+ word &= ~0 << (bit & 31);
281
+ if (word) {
282
+ goto done;
283
+ }
284
+ else {
285
+ frt_u32 word_size = FRT_TO_WORD(bv->size);
286
+ for (pos++; pos < word_size; ++pos)
287
+ {
288
+ if ( (word = bv->bits[pos]) )
289
+ goto done;
290
+ }
291
+ }
292
+ return -1;
293
+ done:
294
+ return bv->curr_bit = (pos << 5) + frt_count_trailing_zeros(word);
295
+ }
296
+
297
+ /**
298
+ * Scan the FrtBitVector for the next set bit. Before using this
299
+ * function you should reset the FrtBitVector for scanning using
300
+ * +frt_bv_scan_reset+. You can the repeatedly call frt_bv_scan_next
301
+ * to get each set bit until it finally returns -1.
302
+ *
303
+ * @param bv the FrtBitVector to scan
304
+ * @return the next set bits index or -1 if no more bits are set
305
+ */
306
+ static FRT_ATTR_ALWAYS_INLINE
307
+ int frt_bv_scan_next(FrtBitVector *bv)
308
+ {
309
+ return frt_bv_scan_next_from(bv, bv->curr_bit + 1);
310
+ }
311
+
312
+ /**
313
+ * Scan the FrtBitVector for the next unset bit after +from+. If no
314
+ * more bits are unset then return -1, otherwise return the index of
315
+ * teh next unset bit.
316
+ *
317
+ * @param bv the FrtBitVector to scan
318
+ * @return the next unset bit's index or -1 if no more bits are unset
319
+ */
320
+ static FRT_ATTR_ALWAYS_INLINE
321
+ int frt_bv_scan_next_unset_from(FrtBitVector *bv, const int bit)
322
+ {
323
+ frt_u32 pos = bit >> 5;
324
+ frt_u32 word = bv->bits[pos];
325
+
326
+ if (bit >= bv->size)
327
+ return -1;
328
+
329
+ /* Set all of the bits below this position */
330
+ word |= (1 << (bit & 31)) - 1;
331
+ if (~word) {
332
+ goto done;
333
+ }
334
+ else {
335
+ frt_u32 word_size = FRT_TO_WORD(bv->size);
336
+ for (pos++; pos < word_size; ++pos)
337
+ {
338
+ if ( ~(word = bv->bits[pos]) )
339
+ goto done;
340
+ }
341
+ }
342
+ return -1;
343
+ done:
344
+ return bv->curr_bit = (pos << 5) + frt_count_trailing_ones(word);
345
+ }
346
+
347
+ /**
348
+ * Scan the FrtBitVector for the next unset bit. Before using this
349
+ * function you should reset the FrtBitVector for scanning using
350
+ * +frt_bv_scan_reset+. You can the repeated call frt_bv_scan_next to
351
+ * get each unset bit until it finally returns -1.
352
+ *
353
+ * @param bv the FrtBitVector to scan
354
+ * @return the next unset bits index or -1 if no more bits are unset
355
+ */
356
+ static FRT_ATTR_ALWAYS_INLINE
357
+ int frt_bv_scan_next_unset(FrtBitVector *bv)
358
+ {
359
+ return frt_bv_scan_next_unset_from(bv, bv->curr_bit + 1);
360
+ }
361
+
362
+ /**
363
+ * Check whether the two BitVectors have the same bits set.
364
+ *
365
+ * @param bv1 first FrtBitVector to compare
366
+ * @param bv2 second BitVectors to compare
367
+ * @return true if bv1 == bv2
368
+ */
369
+ extern int frt_bv_eq(FrtBitVector *bv1, FrtBitVector *bv2);
370
+
371
+ /**
372
+ * Determines a hash value for the FrtBitVector
373
+ *
374
+ * @param bv the FrtBitVector to hash
375
+ * @return A hash value for the FrtBitVector
376
+ */
377
+ extern unsigned long frt_bv_hash(FrtBitVector *bv);
378
+
379
+ static FRT_ATTR_ALWAYS_INLINE
380
+ void frt_bv_capa(FrtBitVector *bv, int capa, int size)
381
+ {
382
+ int word_size = FRT_TO_WORD(size);
383
+ if (bv->capa < capa)
384
+ {
385
+ FRT_REALLOC_N(bv->bits, frt_u32, capa);
386
+ bv->capa = capa;
387
+ memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
388
+ sizeof(frt_u32) * (capa - word_size));
389
+ }
390
+ bv->size = size;
391
+ }
392
+
393
+ #define frt_bv_and_ext(dest, src, extends_as_ones, i, max) do { \
394
+ if (extends_as_ones) \
395
+ memcpy(&dest[i], &src[i], sizeof(*dest)*(max - i)); \
396
+ else memset(&dest[i], 0x00 , sizeof(*dest)*(max - i)); \
397
+ } while(0)
398
+
399
+ #define frt_bv_or_ext(dest, src, extends_as_ones, i, max) do { \
400
+ if (extends_as_ones) \
401
+ memset(&dest[i], 0xFF , sizeof(*dest)*(max - i)); \
402
+ else memcpy(&dest[i], &src[i], sizeof(*dest)*(max - i)); \
403
+ } while(0)
404
+
405
+ #define frt_bv_xor_ext(dest, src, extends_as_ones, i, max) do { \
406
+ frt_u32 n = (extends_as_ones ? 0xffffffff : 0); \
407
+ for (; i < max; ++i) \
408
+ dest[i] = src[i] ^ n; \
409
+ } while(0)
410
+
411
+ #define FRT_BV_OP(bv, a, b, op, ext_cb) do { \
412
+ int i; \
413
+ int a_wsz = FRT_TO_WORD(a->size); \
414
+ int b_wsz = FRT_TO_WORD(b->size); \
415
+ int max_size = frt_max2(a->size, b->size); \
416
+ int min_size = frt_min2(a->size, b->size); \
417
+ int max_word_size = FRT_TO_WORD(max_size); \
418
+ int min_word_size = FRT_TO_WORD(min_size); \
419
+ int capa = frt_max2(frt_round2(max_word_size), 4); \
420
+ \
421
+ bv->extends_as_ones = (a->extends_as_ones op b->extends_as_ones); \
422
+ frt_bv_capa(bv, capa, max_size); \
423
+ \
424
+ for (i = 0; i < min_word_size; ++i) \
425
+ bv->bits[i] = a->bits[i] op b->bits[i]; \
426
+ \
427
+ if (a_wsz != b_wsz) { \
428
+ frt_u32 *bits = a->bits; \
429
+ bool extends_as_ones = b->extends_as_ones; \
430
+ if (a_wsz < b_wsz) { \
431
+ bits = b->bits; \
432
+ extends_as_ones = a->extends_as_ones; \
433
+ } \
434
+ ext_cb(bv->bits, bits, extends_as_ones, i, max_word_size); \
435
+ } \
436
+ frt_bv_recount(bv); \
437
+ } while(0)
438
+
439
+ static FRT_ATTR_ALWAYS_INLINE
440
+ FrtBitVector *frt_bv_and_i(FrtBitVector *bv,
441
+ FrtBitVector *a, FrtBitVector *b)
442
+ {
443
+ FRT_BV_OP(bv, a, b, &, frt_bv_and_ext);
444
+ return bv;
445
+ }
446
+
447
+ static FRT_ATTR_ALWAYS_INLINE
448
+ FrtBitVector *frt_bv_or_i(FrtBitVector *bv,
449
+ FrtBitVector *a, FrtBitVector *b)
450
+ {
451
+ FRT_BV_OP(bv, a, b, |, frt_bv_or_ext);
452
+ return bv;
453
+ }
454
+
455
+ static FRT_ATTR_ALWAYS_INLINE
456
+ FrtBitVector *frt_bv_xor_i(FrtBitVector *bv,
457
+ FrtBitVector *a, FrtBitVector *b)
458
+ {
459
+ FRT_BV_OP(bv, a, b, ^, frt_bv_xor_ext);
460
+ return bv;
461
+ }
462
+
463
+ static FRT_ATTR_ALWAYS_INLINE
464
+ FrtBitVector *frt_bv_not_i(FrtBitVector *bv, FrtBitVector *bv1)
465
+ {
466
+ int i;
467
+ int word_size = FRT_TO_WORD(bv1->size);
468
+ int capa = frt_max2(frt_round2(word_size), 4);
469
+
470
+ bv->extends_as_ones = !bv1->extends_as_ones;
471
+ frt_bv_capa(bv, capa, bv1->size);
472
+
473
+ for (i = 0; i < word_size; i++)
474
+ bv->bits[i] = ~(bv1->bits[i]);
475
+
476
+ memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
477
+ sizeof(frt_u32) * (bv->capa - word_size));
478
+
479
+ frt_bv_recount(bv);
480
+ return bv;
481
+ }
482
+
483
+ /**
484
+ * ANDs two BitVectors (+bv1+ and +bv2+) together and return the resultant
485
+ * FrtBitVector
486
+ *
487
+ * @param bv1 first FrtBitVector to AND
488
+ * @param bv2 second FrtBitVector to AND
489
+ * @return A FrtBitVector with all bits set that are set in both bv1 and bv2
490
+ */
491
+ static FRT_ATTR_ALWAYS_INLINE
492
+ FrtBitVector *frt_bv_and(FrtBitVector *bv1, FrtBitVector *bv2)
493
+ {
494
+ return frt_bv_and_i(frt_bv_new(), bv1, bv2);
495
+ }
496
+
497
+ /**
498
+ * ORs two BitVectors (+bv1+ and +bv2+) together and return the resultant
499
+ * FrtBitVector
500
+ *
501
+ * @param bv1 first FrtBitVector to OR
502
+ * @param bv2 second FrtBitVector to OR
503
+ * @return A FrtBitVector with all bits set that are set in both bv1 and bv2
504
+ */
505
+ static FRT_ATTR_ALWAYS_INLINE
506
+ FrtBitVector *frt_bv_or(FrtBitVector *bv1, FrtBitVector *bv2)
507
+ {
508
+ return frt_bv_or_i(frt_bv_new(), bv1, bv2);
509
+ }
510
+
511
+
512
+ /**
513
+ * XORs two BitVectors (+bv1+ and +bv2+) together and return the resultant
514
+ * FrtBitVector
515
+ *
516
+ * @param bv1 first FrtBitVector to XOR
517
+ * @param bv2 second FrtBitVector to XOR
518
+ * @return A FrtBitVector with all bits set that are equal in bv1 and bv2
519
+ */
520
+ static FRT_ATTR_ALWAYS_INLINE
521
+ FrtBitVector *frt_bv_xor(FrtBitVector *bv1, FrtBitVector *bv2)
522
+ {
523
+ return frt_bv_xor_i(frt_bv_new(), bv1, bv2);
524
+ }
525
+
526
+ /**
527
+ * Returns FrtBitVector with all of +bv+'s bits flipped
528
+ *
529
+ * @param bv FrtBitVector to flip
530
+ * @return A FrtBitVector with all bits set that are set in both bv1 and bv2
531
+ */
532
+ static FRT_ATTR_ALWAYS_INLINE
533
+ FrtBitVector *frt_bv_not(FrtBitVector *bv)
534
+ {
535
+ return frt_bv_not_i(frt_bv_new(), bv);
536
+ }
537
+
538
+ /**
539
+ * ANDs two BitVectors together +bv1+ and +bv2+ in place of +bv1+
540
+ *
541
+ * @param bv1 first FrtBitVector to AND
542
+ * @param bv2 second FrtBitVector to AND
543
+ * @return A FrtBitVector
544
+ * @return bv1 with all bits set that where set in both bv1 and bv2
545
+ */
546
+ static FRT_ATTR_ALWAYS_INLINE
547
+ FrtBitVector *frt_bv_and_x(FrtBitVector *bv1, FrtBitVector *bv2)
548
+ {
549
+ return frt_bv_and_i(bv1, bv1, bv2);
550
+ }
551
+
552
+ /**
553
+ * ORs two BitVectors together
554
+ *
555
+ * @param bv1 first FrtBitVector to OR
556
+ * @param bv2 second FrtBitVector to OR
557
+ * @return bv1
558
+ */
559
+ static FRT_ATTR_ALWAYS_INLINE
560
+ FrtBitVector *frt_bv_or_x(FrtBitVector *bv1, FrtBitVector *bv2)
561
+ {
562
+ return frt_bv_or_i(bv1, bv1, bv2);
563
+ }
564
+
565
+ /**
566
+ * XORs two BitVectors together +bv1+ and +bv2+ in place of +bv1+
567
+ *
568
+ * @param bv1 first FrtBitVector to XOR
569
+ * @param bv2 second FrtBitVector to XOR
570
+ * @return bv1
571
+ */
572
+ static FRT_ATTR_ALWAYS_INLINE
573
+ FrtBitVector *frt_bv_xor_x(FrtBitVector *bv1, FrtBitVector *bv2)
574
+ {
575
+ return frt_bv_xor_i(bv1, bv1, bv2);
576
+ }
577
+
578
+ /**
579
+ * Flips all bits in the FrtBitVector +bv+
580
+ *
581
+ * @param bv FrtBitVector to flip
582
+ * @return A +bv+ with all it's bits flipped
583
+ */
584
+ static FRT_ATTR_ALWAYS_INLINE
585
+ FrtBitVector *frt_bv_not_x(FrtBitVector *bv)
586
+ {
587
+ return frt_bv_not_i(bv, bv);
588
+ }
589
+
590
+ #ifdef __cplusplus
591
+ } // extern "C"
592
+ #endif
593
+
594
+ #endif