ferret 0.11.6 → 0.11.8.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. data/README +10 -22
  2. data/RELEASE_CHANGES +137 -0
  3. data/RELEASE_NOTES +60 -0
  4. data/Rakefile +379 -274
  5. data/TODO +100 -8
  6. data/bin/ferret-browser +0 -0
  7. data/ext/BZLIB_blocksort.c +1094 -0
  8. data/ext/BZLIB_bzlib.c +1578 -0
  9. data/ext/BZLIB_compress.c +672 -0
  10. data/ext/BZLIB_crctable.c +104 -0
  11. data/ext/BZLIB_decompress.c +626 -0
  12. data/ext/BZLIB_huffman.c +205 -0
  13. data/ext/BZLIB_randtable.c +84 -0
  14. data/ext/{api.c → STEMMER_api.c} +7 -10
  15. data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
  16. data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
  17. data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
  18. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  19. data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
  20. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  21. data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
  22. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  25. data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
  26. data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
  27. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  29. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  30. data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
  31. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  32. data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
  33. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  34. data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
  35. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  36. data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
  37. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  38. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  39. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  40. data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
  41. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  42. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  43. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  44. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  45. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  46. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  47. data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
  48. data/ext/analysis.c +276 -121
  49. data/ext/analysis.h +190 -143
  50. data/ext/api.h +3 -4
  51. data/ext/array.c +5 -3
  52. data/ext/array.h +52 -43
  53. data/ext/bitvector.c +38 -482
  54. data/ext/bitvector.h +446 -124
  55. data/ext/bzlib.h +282 -0
  56. data/ext/bzlib_private.h +503 -0
  57. data/ext/compound_io.c +23 -22
  58. data/ext/config.h +21 -11
  59. data/ext/document.c +43 -40
  60. data/ext/document.h +31 -21
  61. data/ext/except.c +20 -38
  62. data/ext/except.h +89 -76
  63. data/ext/extconf.rb +3 -2
  64. data/ext/ferret.c +49 -35
  65. data/ext/ferret.h +14 -11
  66. data/ext/field_index.c +262 -0
  67. data/ext/field_index.h +52 -0
  68. data/ext/filter.c +11 -10
  69. data/ext/fs_store.c +65 -47
  70. data/ext/global.c +245 -165
  71. data/ext/global.h +252 -54
  72. data/ext/hash.c +200 -243
  73. data/ext/hash.h +205 -163
  74. data/ext/hashset.c +118 -96
  75. data/ext/hashset.h +110 -82
  76. data/ext/header.h +19 -19
  77. data/ext/helper.c +11 -10
  78. data/ext/helper.h +14 -6
  79. data/ext/index.c +745 -366
  80. data/ext/index.h +503 -529
  81. data/ext/internal.h +1020 -0
  82. data/ext/lang.c +10 -0
  83. data/ext/lang.h +35 -15
  84. data/ext/mempool.c +5 -4
  85. data/ext/mempool.h +30 -22
  86. data/ext/modules.h +35 -7
  87. data/ext/multimapper.c +43 -2
  88. data/ext/multimapper.h +32 -23
  89. data/ext/posh.c +0 -0
  90. data/ext/posh.h +4 -38
  91. data/ext/priorityqueue.c +10 -12
  92. data/ext/priorityqueue.h +33 -21
  93. data/ext/q_boolean.c +22 -9
  94. data/ext/q_const_score.c +3 -2
  95. data/ext/q_filtered_query.c +15 -12
  96. data/ext/q_fuzzy.c +147 -135
  97. data/ext/q_match_all.c +3 -2
  98. data/ext/q_multi_term.c +28 -32
  99. data/ext/q_parser.c +451 -173
  100. data/ext/q_phrase.c +158 -79
  101. data/ext/q_prefix.c +16 -18
  102. data/ext/q_range.c +363 -31
  103. data/ext/q_span.c +130 -141
  104. data/ext/q_term.c +21 -21
  105. data/ext/q_wildcard.c +19 -23
  106. data/ext/r_analysis.c +369 -242
  107. data/ext/r_index.c +421 -434
  108. data/ext/r_qparser.c +142 -92
  109. data/ext/r_search.c +790 -407
  110. data/ext/r_store.c +44 -44
  111. data/ext/r_utils.c +264 -96
  112. data/ext/ram_store.c +29 -23
  113. data/ext/scanner.c +895 -0
  114. data/ext/scanner.h +36 -0
  115. data/ext/scanner_mb.c +6701 -0
  116. data/ext/scanner_utf8.c +4415 -0
  117. data/ext/search.c +210 -87
  118. data/ext/search.h +556 -488
  119. data/ext/similarity.c +17 -16
  120. data/ext/similarity.h +51 -44
  121. data/ext/sort.c +157 -354
  122. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  123. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  124. data/ext/stem_UTF_8_hungarian.h +16 -0
  125. data/ext/stem_UTF_8_romanian.h +16 -0
  126. data/ext/stem_UTF_8_turkish.h +16 -0
  127. data/ext/stopwords.c +287 -278
  128. data/ext/store.c +57 -51
  129. data/ext/store.h +308 -286
  130. data/ext/symbol.c +10 -0
  131. data/ext/symbol.h +23 -0
  132. data/ext/term_vectors.c +14 -293
  133. data/ext/threading.h +22 -22
  134. data/ext/win32.h +12 -4
  135. data/lib/ferret.rb +2 -1
  136. data/lib/ferret/browser.rb +1 -1
  137. data/lib/ferret/field_symbol.rb +94 -0
  138. data/lib/ferret/index.rb +221 -34
  139. data/lib/ferret/number_tools.rb +6 -6
  140. data/lib/ferret/version.rb +3 -0
  141. data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
  142. data/test/test_helper.rb +7 -2
  143. data/test/test_installed.rb +1 -0
  144. data/test/threading/thread_safety_index_test.rb +10 -1
  145. data/test/threading/thread_safety_read_write_test.rb +4 -7
  146. data/test/threading/thread_safety_test.rb +0 -0
  147. data/test/unit/analysis/tc_analyzer.rb +29 -27
  148. data/test/unit/analysis/tc_token_stream.rb +23 -16
  149. data/test/unit/index/tc_index.rb +116 -11
  150. data/test/unit/index/tc_index_reader.rb +27 -27
  151. data/test/unit/index/tc_index_writer.rb +10 -0
  152. data/test/unit/index/th_doc.rb +38 -21
  153. data/test/unit/search/tc_filter.rb +31 -10
  154. data/test/unit/search/tc_index_searcher.rb +6 -0
  155. data/test/unit/search/tm_searcher.rb +53 -1
  156. data/test/unit/store/tc_fs_store.rb +40 -2
  157. data/test/unit/store/tc_ram_store.rb +0 -0
  158. data/test/unit/store/tm_store.rb +0 -0
  159. data/test/unit/store/tm_store_lock.rb +7 -6
  160. data/test/unit/tc_field_symbol.rb +26 -0
  161. data/test/unit/ts_analysis.rb +0 -0
  162. data/test/unit/ts_index.rb +0 -0
  163. data/test/unit/ts_store.rb +0 -0
  164. data/test/unit/ts_utils.rb +0 -0
  165. data/test/unit/utils/tc_number_tools.rb +0 -0
  166. data/test/utils/content_generator.rb +226 -0
  167. metadata +262 -221
  168. data/ext/inc/lang.h +0 -48
  169. data/ext/inc/threading.h +0 -31
  170. data/ext/stem_ISO_8859_1_english.c +0 -1156
  171. data/ext/stem_ISO_8859_1_french.c +0 -1276
  172. data/ext/stem_ISO_8859_1_italian.c +0 -1091
  173. data/ext/stem_ISO_8859_1_norwegian.c +0 -296
  174. data/ext/stem_ISO_8859_1_spanish.c +0 -1119
  175. data/ext/stem_ISO_8859_1_swedish.c +0 -307
  176. data/ext/stem_UTF_8_danish.c +0 -344
  177. data/ext/stem_UTF_8_english.c +0 -1176
  178. data/ext/stem_UTF_8_french.c +0 -1296
  179. data/ext/stem_UTF_8_italian.c +0 -1113
  180. data/ext/stem_UTF_8_norwegian.c +0 -302
  181. data/ext/stem_UTF_8_portuguese.c +0 -1055
  182. data/ext/stem_UTF_8_russian.c +0 -709
  183. data/ext/stem_UTF_8_spanish.c +0 -1137
  184. data/ext/stem_UTF_8_swedish.c +0 -313
  185. data/lib/ferret_version.rb +0 -3
@@ -1,53 +1,62 @@
1
1
  #ifndef FRT_ARRAY_H
2
2
  #define FRT_ARRAY_H
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
3
8
  #include "global.h"
4
9
 
5
10
  #if defined POSH_OS_SOLARIS || defined POSH_OS_SUNOS
6
- # define ARY_META_CNT 4
11
+ # define FRT_ARY_META_CNT 4
7
12
  #else
8
- # define ARY_META_CNT 3
13
+ # define FRT_ARY_META_CNT 3
9
14
  #endif
10
15
 
11
- #define ARY_INIT_CAPA 8
12
- #define ary_size(ary) ary_sz(ary)
13
- #define ary_sz(ary) (((int *)ary)[-1])
14
- #define ary_capa(ary) (((int *)ary)[-2])
15
- #define ary_type_size(ary) (((int *)ary)[-3])
16
- #define ary_start(ary) ((void **)&(((int *)ary)[-ARY_META_CNT]))
17
- #define ary_free(ary) free(ary_start(ary))
18
-
19
- #define ary_new_type_capa(type, init_capa)\
20
- (type *)ary_new_i(sizeof(type), init_capa)
21
- #define ary_new_type(type) (type *)ary_new_i(sizeof(type), 0)
22
- #define ary_new_capa(init_capa) ary_new_i(sizeof(void *), init_capa)
23
- #define ary_new() ary_new_i(sizeof(void *), 0)
24
- #define ary_resize(ary, size) ary_resize_i(((void ***)(void *)&ary), size)
25
- #define ary_set(ary, i, val) ary_set_i(((void ***)(void *)&ary), i, val)
26
- #define ary_get(ary, i) ary_get_i(((void **)ary), i)
27
- #define ary_push(ary, val) ary_push_i(((void ***)(void *)&ary), val)
28
- #define ary_pop(ary) ary_pop_i(((void **)ary))
29
- #define ary_unshift(ary, val) ary_unshift_i(((void ***)(void *)&ary), val)
30
- #define ary_shift(ary) ary_shift_i(((void **)ary))
31
- #define ary_remove(ary, i) ary_remove_i(((void **)ary), i)
32
- #define ary_delete(ary, i, f) ary_delete_i(((void **)ary), i, (free_ft)f)
33
- #define ary_destroy(ary, f) ary_destroy_i(((void **)ary), (free_ft)f)
34
- #define ary_rsz(ary, size) ary_resize(ary, size)
35
- #define ary_grow(ary) ary_resize(ary, ary_sz(ary))
36
- #define ary_last(ary) ary[ary_sz(ary) - 1]
37
- #define ary_sort(ary, cmp) qsort(ary, ary_size(ary), ary_type_size(ary), cmp)
38
- #define ary_each_rev(ary, i) for (i = ary_size(ary) - 1; i >= 0; i--)
39
- #define ary_each(ary, i) for (i = 0; i < ary_size(ary); i++)
40
-
41
- extern void ary_resize_i(void ***ary, int size);
42
- extern void **ary_new_i(int type_size, int init_capa);
43
- extern void ary_set_i(void ***ary, int index, void *value);
44
- extern void *ary_get_i(void **ary, int index);
45
- extern void ary_push_i(void ***ary, void *value);
46
- extern void *ary_pop_i(void **ary);
47
- extern void ary_unshift_i(void ***ary, void *value);
48
- extern void *ary_shift_i(void **ary);
49
- extern void *ary_remove_i(void **ary, int index);
50
- extern void ary_delete_i(void **ary, int index, void (*free_elem)(void *p));
51
- extern void ary_destroy_i(void **ary, void (*free_elem)(void *p));
16
+ #define FRT_ARY_INIT_CAPA 8
17
+ #define frt_ary_size(ary) frt_ary_sz(ary)
18
+ #define frt_ary_sz(ary) (((int *)ary)[-1])
19
+ #define frt_ary_capa(ary) (((int *)ary)[-2])
20
+ #define frt_ary_type_size(ary) (((int *)ary)[-3])
21
+ #define frt_ary_start(ary) ((void **)&(((int *)ary)[-FRT_ARY_META_CNT]))
22
+ #define frt_ary_free(ary) free(frt_ary_start(ary))
23
+
24
+ #define frt_ary_new_type_capa(type, init_capa)\
25
+ (type *)frt_ary_new_i(sizeof(type), init_capa)
26
+ #define frt_ary_new_type(type) (type *)frt_ary_new_i(sizeof(type), 0)
27
+ #define frt_ary_new_capa(init_capa) frt_ary_new_i(sizeof(void *), init_capa)
28
+ #define frt_ary_new() frt_ary_new_i(sizeof(void *), 0)
29
+ #define frt_ary_resize(ary, size) frt_ary_resize_i(((void ***)(void *)&ary), size)
30
+ #define frt_ary_set(ary, i, val) frt_ary_set_i(((void ***)(void *)&ary), i, val)
31
+ #define frt_ary_get(ary, i) frt_ary_get_i(((void **)ary), i)
32
+ #define frt_ary_push(ary, val) frt_ary_push_i(((void ***)(void *)&ary), val)
33
+ #define frt_ary_pop(ary) frt_ary_pop_i(((void **)ary))
34
+ #define frt_ary_unshift(ary, val) frt_ary_unshift_i(((void ***)(void *)&ary), val)
35
+ #define frt_ary_shift(ary) frt_ary_shift_i(((void **)ary))
36
+ #define frt_ary_remove(ary, i) frt_ary_remove_i(((void **)ary), i)
37
+ #define frt_ary_delete(ary, i, f) frt_ary_delete_i(((void **)ary), i, (free_ft)f)
38
+ #define frt_ary_destroy(ary, f) frt_ary_destroy_i(((void **)ary), (free_ft)f)
39
+ #define frt_ary_rsz(ary, size) frt_ary_resize(ary, size)
40
+ #define frt_ary_grow(ary) frt_ary_resize(ary, frt_ary_sz(ary))
41
+ #define frt_ary_last(ary) ary[frt_ary_sz(ary) - 1]
42
+ #define frt_ary_sort(ary, cmp) qsort(ary, frt_ary_size(ary), frt_ary_type_size(ary), cmp)
43
+ #define frt_ary_each_rev(ary, i) for (i = frt_ary_size(ary) - 1; i >= 0; i--)
44
+ #define frt_ary_each(ary, i) for (i = 0; i < frt_ary_size(ary); i++)
45
+
46
+ extern void frt_ary_resize_i(void ***ary, int size);
47
+ extern void **frt_ary_new_i(int type_size, int init_capa);
48
+ extern void frt_ary_set_i(void ***ary, int index, void *value);
49
+ extern void *frt_ary_get_i(void **ary, int index);
50
+ extern void frt_ary_push_i(void ***ary, void *value);
51
+ extern void *frt_ary_pop_i(void **ary);
52
+ extern void frt_ary_unshift_i(void ***ary, void *value);
53
+ extern void *frt_ary_shift_i(void **ary);
54
+ extern void *frt_ary_remove_i(void **ary, int index);
55
+ extern void frt_ary_delete_i(void **ary, int index, frt_free_ft p);
56
+ extern void frt_ary_destroy_i(void **ary, frt_free_ft p);
57
+
58
+ #ifdef __cplusplus
59
+ } // extern "C"
60
+ #endif
52
61
 
53
62
  #endif
@@ -1,19 +1,16 @@
1
1
  #include "bitvector.h"
2
+ #include "internal.h"
2
3
  #include <string.h>
3
4
 
4
5
  BitVector *bv_new_capa(int capa)
5
6
  {
6
- BitVector *bv = ALLOC(BitVector);
7
+ BitVector *bv = ALLOC_AND_ZERO(BitVector);
7
8
 
8
9
  /* The capacity passed by the user is number of bits allowed, however we
9
10
  * store capacity as the number of words (U32) allocated. */
10
- bv->capa = (capa >> 5) + 1;
11
- bv->bits = ALLOC_AND_ZERO_N(f_u32, bv->capa);
12
-
13
- bv->size = 0;
14
- bv->count = 0;
11
+ bv->capa = max2(TO_WORD(capa), 4);
12
+ bv->bits = ALLOC_AND_ZERO_N(u32, bv->capa);
15
13
  bv->curr_bit = -1;
16
- bv->extends_as_ones = 0;
17
14
  bv->ref_cnt = 1;
18
15
  return bv;
19
16
  }
@@ -23,7 +20,7 @@ BitVector *bv_new()
23
20
  return bv_new_capa(BV_INIT_CAPA);
24
21
  }
25
22
 
26
- void bv_destroy(BitVector * bv)
23
+ void bv_destroy(BitVector *bv)
27
24
  {
28
25
  if (--(bv->ref_cnt) == 0) {
29
26
  free(bv->bits);
@@ -31,314 +28,54 @@ void bv_destroy(BitVector * bv)
31
28
  }
32
29
  }
33
30
 
34
- void bv_set(BitVector * bv, int bit)
35
- {
36
- f_u32 *word_p;
37
- int word = bit >> 5;
38
- f_u32 bitmask = 1 << (bit & 31);
39
-
40
- /* Check to see if we need to grow the BitVector */
41
- if (bit >= bv->size) {
42
- bv->size = bit + 1; /* size is max range of bits set */
43
- if (word >= bv->capa) {
44
- int capa = bv->capa << 1;
45
- while (capa <= word) {
46
- capa <<= 1;
47
- }
48
- REALLOC_N(bv->bits, f_u32, capa);
49
- memset(bv->bits + bv->capa, (bv->extends_as_ones ? 0xFF : 0),
50
- sizeof(f_u32) * (capa - bv->capa));
51
- bv->capa = capa;
52
- }
53
- }
54
-
55
- /* Set the required bit */
56
- word_p = &(bv->bits[word]);
57
- if ((bitmask & *word_p) == 0) {
58
- bv->count++; /* update count */
59
- *word_p |= bitmask;
60
- }
61
- }
62
-
63
- /*
64
- * This method relies on the fact that enough space has been set for the bits
65
- * to be set. You need to create the BitVector using bv_new_capa(capa) with
66
- * a capacity larger than any bit being set.
67
- */
68
- void bv_set_fast(BitVector * bv, int bit)
31
+ void bv_clear(BitVector *bv)
69
32
  {
70
- bv->count++;
71
- bv->size = bit;
72
- bv->bits[bit >> 5] |= 1 << (bit & 31);
73
- }
74
-
75
- int bv_get(BitVector * bv, int bit)
76
- {
77
- /* out of range so return 0 because it can't have been set */
78
- if (bit >= bv->size) {
79
- return bv->extends_as_ones;
80
- }
81
- return (bv->bits[bit >> 5] >> (bit & 31)) & 0x01;
82
- }
83
-
84
- void bv_clear(BitVector * bv)
85
- {
86
- memset(bv->bits, 0, bv->capa * sizeof(f_u32));
33
+ memset(bv->bits, 0, bv->capa * sizeof(u32));
87
34
  bv->extends_as_ones = 0;
88
35
  bv->count = 0;
89
36
  bv->size = 0;
90
37
  }
91
38
 
92
- /*
93
- * FIXME: if the top set bit is unset, size is not adjusted. This will not
94
- * cause any bugs in this code but could cause problems if users are relying
95
- * on the fact that size is accurate.
96
- */
97
- void bv_unset(BitVector * bv, int bit)
98
- {
99
- f_u32 *word_p;
100
- f_u32 bitmask;
101
- int word = bit >> 5;
102
-
103
- if (bit >= bv->size) {
104
- bv->size = bit + 1; /* size is max range of bits set */
105
- if (word >= bv->capa) {
106
- int capa = bv->capa << 1;
107
-
108
- while (capa <= word) {
109
- capa <<= 1;
110
- }
111
- REALLOC_N(bv->bits, f_u32, capa);
112
- memset(bv->bits + bv->capa, (bv->extends_as_ones ? 0xFF : 0),
113
- sizeof(f_u32) * (capa - bv->capa));
114
- bv->capa = capa;
115
- }
116
- }
117
-
118
- word_p = &(bv->bits[word]);
119
- bitmask = 1 << (bit & 31);
120
- if ((bitmask & *word_p) > 0) {
121
- bv->count--; /* update count */
122
- *word_p &= ~bitmask;
123
- }
124
- }
125
-
126
- /* Table of bits per char. This table is used by the bv_recount method to
127
- * optimize the counting of bits */
128
- static const uchar BYTE_COUNTS[] = {
129
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
130
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
131
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
132
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
133
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
134
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
135
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
136
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
137
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
138
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
139
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
140
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
141
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
142
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
143
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
144
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
145
- };
146
-
147
- int bv_recount(BitVector * bv)
148
- {
149
- /* if the vector has been modified */
150
- int i, c = 0;
151
- uchar *bytes = (uchar *)bv->bits; /* count by character */
152
- const int num_bytes = (((bv->size >> 5) + 1) << 2);
153
- if (bv->extends_as_ones) {
154
- for (i = 0; i < num_bytes; i++) {
155
- c += BYTE_COUNTS[~(bytes[i]) & 0xFF]; /* sum bits per char */
156
- }
157
- }
158
- else {
159
- for (i = 0; i < num_bytes; i++) {
160
- c += BYTE_COUNTS[bytes[i]]; /* sum bits per char */
161
- }
162
- }
163
- bv->count = c;
164
- return c;
165
- }
166
-
167
- void bv_scan_reset(BitVector * bv)
39
+ void bv_scan_reset(BitVector *bv)
168
40
  {
169
41
  bv->curr_bit = -1;
170
42
  }
171
43
 
172
- /* Table showing the number of trailing 0s in a char. This is used to optimize
173
- * the bv_scan_next method. */
174
- const int NUM_TRAILING_ZEROS[] = {
175
- 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
176
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
177
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
178
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
179
- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
180
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
181
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
182
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
183
- 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
184
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
185
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
186
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
187
- 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
188
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
189
- 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
190
- 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
191
- };
192
-
193
- /*
194
- * This method is highly optimized, hence the loop unrolling
195
- */
196
- static INLINE int bv_get_1_offset(f_u32 word)
197
- {
198
- if (word & 0xff) {
199
- return NUM_TRAILING_ZEROS[word & 0xff];
200
- }
201
- else {
202
- word >>= 8;
203
- if (word & 0xff) {
204
- return NUM_TRAILING_ZEROS[word & 0xff] + 8;
205
- }
206
- else {
207
- word >>= 8;
208
- if (word & 0xff) {
209
- return NUM_TRAILING_ZEROS[word & 0xff] + 16;
210
- }
211
- else {
212
- word >>= 8;
213
- return NUM_TRAILING_ZEROS[word & 0xff] + 24;
214
- }
215
- }
216
- }
217
- }
218
- /*
219
- * second fastest;
220
- *
221
- * while ((inc = NUM_TRAILING_ZEROS[word & 0xff]) == 8) {
222
- * word >>= 8;
223
- * bit_pos += 8;
224
- * }
225
- *
226
- * third fastest;
227
- *
228
- * bit_pos += inc;
229
- * if ((word & 0xffff) == 0) {
230
- * bit_pos += 16;
231
- * word >>= 16;
232
- * }
233
- * if ((word & 0xff) == 0) {
234
- * bit_pos += 8;
235
- * word >>= 8;
236
- * }
237
- * bit_pos += NUM_TRAILING_ZEROS[word & 0xff];
238
- */
239
-
240
- int bv_scan_next_from(BitVector * bv, register const int from)
241
- {
242
- register const f_u32 *const bits = bv->bits;
243
- register const int word_size = (bv->size >> 5) + 1;
244
- register int word_pos = from >> 5;
245
- register int bit_pos = (from & 31);
246
- register f_u32 word = bits[word_pos] >> bit_pos;
247
-
248
- if (from >= bv->size) {
249
- return -1;
250
- }
251
- if (word == 0) {
252
- bit_pos = 0;
253
- do {
254
- word_pos++;
255
- if (word_pos >= word_size) {
256
- return -1;
257
- }
258
- } while (bits[word_pos] == 0);
259
- word = bits[word_pos];
260
- }
261
-
262
- /* check the word a byte at a time as the NUM_TRAILING_ZEROS table would
263
- * be too large for 32-bit integer or even a 16-bit integer */
264
- bit_pos += bv_get_1_offset(word);
265
-
266
- return bv->curr_bit = ((word_pos << 5) + bit_pos);
267
- }
268
-
269
- int bv_scan_next(BitVector * bv)
270
- {
271
- return bv_scan_next_from(bv, bv->curr_bit + 1);
272
- }
273
-
274
- int bv_scan_next_unset_from(BitVector * bv, register const int from)
275
- {
276
- register const f_u32 *const bits = bv->bits;
277
- register const int word_size = (bv->size >> 5) + 1;
278
- register int word_pos = from >> 5;
279
- register int bit_pos = (from & 31);
280
- register f_u32 word = ~(~(bits[word_pos]) >> bit_pos);
281
-
282
- if (from >= bv->size) {
283
- return -1;
284
- }
285
- if (word == 0xFFFFFFFF) {
286
- bit_pos = 0;
287
- do {
288
- word_pos++;
289
- if (word_pos >= word_size) {
290
- return -1;
291
- }
292
- } while (bits[word_pos] == 0xFFFFFFFF);
293
- word = bits[word_pos];
294
- }
295
-
296
- bit_pos += bv_get_1_offset(~word);
297
-
298
- return bv->curr_bit = ((word_pos << 5) + bit_pos);
299
- }
300
-
301
- int bv_scan_next_unset(BitVector * bv)
302
- {
303
- return bv_scan_next_unset_from(bv, bv->curr_bit + 1);
304
- }
305
-
306
44
  int bv_eq(BitVector *bv1, BitVector *bv2)
307
45
  {
308
46
  if (bv1 == bv2) {
309
47
  return true;
310
48
  }
311
- else if (bv1->extends_as_ones != bv2->extends_as_ones) {
49
+
50
+ if (bv1->extends_as_ones != bv2->extends_as_ones) {
312
51
  return false;
313
52
  }
314
- else {
315
- f_u32 *bits = bv1->bits;
316
- f_u32 *bits2 = bv2->bits;
317
- int min_size = min2(bv1->size, bv2->size);
318
- int word_size = (min_size >> 5) + 1;
319
- int ext_word_size = 0;
320
53
 
321
- int i;
54
+ u32 *bits = bv1->bits;
55
+ u32 *bits2 = bv2->bits;
56
+ int min_size = min2(bv1->size, bv2->size);
57
+ int word_size = TO_WORD(min_size);
58
+ int ext_word_size = 0;
59
+ int i;
322
60
 
323
- for (i = 0; i < word_size; i++) {
324
- if (bits[i] != bits2[i]) {
325
- return false;
326
- }
327
- }
328
- if (bv1->size > min_size) {
329
- bits = bv1->bits;
330
- ext_word_size = (bv1->size >> 5) + 1;
331
- }
332
- else if (bv2->size > min_size) {
333
- bits = bv2->bits;
334
- ext_word_size = (bv2->size >> 5) + 1;
61
+ for (i = 0; i < word_size; i++) {
62
+ if (bits[i] != bits2[i]) {
63
+ return false;
335
64
  }
336
- if (ext_word_size) {
337
- const f_u32 expected = (bv1->extends_as_ones ? 0xFFFFFFFF : 0);
338
- for (i = word_size; i < ext_word_size; i++) {
339
- if (bits[i] != expected) {
340
- return false;
341
- }
65
+ }
66
+ if (bv1->size > min_size) {
67
+ bits = bv1->bits;
68
+ ext_word_size = TO_WORD(bv1->size);
69
+ }
70
+ else if (bv2->size > min_size) {
71
+ bits = bv2->bits;
72
+ ext_word_size = TO_WORD(bv2->size);
73
+ }
74
+ if (ext_word_size) {
75
+ const u32 expected = (bv1->extends_as_ones ? 0xFFFFFFFF : 0);
76
+ for (i = word_size; i < ext_word_size; i++) {
77
+ if (bits[i] != expected) {
78
+ return false;
342
79
  }
343
80
  }
344
81
  }
@@ -348,193 +85,12 @@ int bv_eq(BitVector *bv1, BitVector *bv2)
348
85
  unsigned long bv_hash(BitVector *bv)
349
86
  {
350
87
  unsigned long hash = 0;
351
- const f_u32 empty_word = bv->extends_as_ones ? 0xFFFFFFFF : 0;
88
+ const u32 empty_word = bv->extends_as_ones ? 0xFFFFFFFF : 0;
352
89
  int i;
353
- for (i = (bv->size >> 5); i >= 0; i--) {
354
- const f_u32 word = bv->bits[i];
355
- if (word != empty_word) {
90
+ for (i = TO_WORD(bv->size) - 1; i >= 0; i--) {
91
+ const u32 word = bv->bits[i];
92
+ if (word != empty_word)
356
93
  hash = (hash << 1) ^ word;
357
- }
358
94
  }
359
- hash = (hash << 1) | bv->extends_as_ones;
360
- return hash;
361
- }
362
-
363
- static INLINE void bv_recapa(BitVector *bv, int new_capa)
364
- {
365
- if (bv->capa < new_capa) {
366
- REALLOC_N(bv->bits, f_u32, new_capa);
367
- memset(bv->bits + bv->capa, (bv->extends_as_ones ? 0xFF : 0),
368
- sizeof(f_u32) * (new_capa - bv->capa));
369
- bv->capa = new_capa;
370
- }
371
- }
372
-
373
- static BitVector *bv_and_i(BitVector *bv, BitVector *bv1, BitVector *bv2)
374
- {
375
- int i;
376
- int size;
377
- int word_size;
378
- int capa = 4;
379
-
380
- if (bv1->extends_as_ones && bv2->extends_as_ones) {
381
- size = max2(bv1->size, bv2->size);
382
- bv->extends_as_ones = true;
383
- }
384
- else if (bv1->extends_as_ones || bv2->extends_as_ones) {
385
- size = max2(bv1->size, bv2->size);
386
- bv->extends_as_ones = false;
387
- }
388
- else {
389
- size = min2(bv1->size, bv2->size);
390
- bv->extends_as_ones = false;
391
- }
392
-
393
- word_size = (size >> 5) + 1;
394
- while (capa < word_size) {
395
- capa <<= 1;
396
- }
397
- bv_recapa(bv1, capa);
398
- bv_recapa(bv2, capa);
399
- REALLOC_N(bv->bits, f_u32, capa);
400
- bv->capa = capa;
401
- bv->size = size;
402
-
403
- memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
404
- sizeof(f_u32) * (capa - word_size));
405
-
406
- for (i = 0; i < word_size; i++) {
407
- bv->bits[i] = bv1->bits[i] & bv2->bits[i];
408
- }
409
-
410
- bv_recount(bv);
411
- return bv;
412
- }
413
-
414
- BitVector *bv_and(BitVector *bv1, BitVector *bv2)
415
- {
416
- return bv_and_i(bv_new(), bv1, bv2);
417
- }
418
-
419
- BitVector *bv_and_x(BitVector *bv1, BitVector *bv2)
420
- {
421
- return bv_and_i(bv1, bv1, bv2);
422
- }
423
-
424
- static BitVector *bv_or_i(BitVector *bv, BitVector *bv1, BitVector *bv2)
425
- {
426
- int i;
427
- int max_size = max2(bv1->size, bv2->size);
428
- int word_size = (max_size >> 5) + 1;
429
- int capa = 4;
430
- while (capa < word_size) {
431
- capa <<= 1;
432
- }
433
- REALLOC_N(bv->bits, f_u32, capa);
434
- bv->capa = capa;
435
- bv->size = max_size;
436
-
437
- bv_recapa(bv1, capa);
438
- bv_recapa(bv2, capa);
439
-
440
- if (bv1->extends_as_ones || bv2->extends_as_ones) {
441
- bv->extends_as_ones = true;
442
- }
443
- else {
444
- bv->extends_as_ones = false;
445
- }
446
-
447
- memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
448
- sizeof(f_u32) * (capa - word_size));
449
-
450
- for (i = 0; i < word_size; i++) {
451
- bv->bits[i] = bv1->bits[i] | bv2->bits[i];
452
- }
453
- bv_recount(bv);
454
- return bv;
455
- }
456
-
457
- BitVector *bv_or(BitVector *bv1, BitVector *bv2)
458
- {
459
- return bv_or_i(bv_new(), bv1, bv2);
460
- }
461
-
462
- BitVector *bv_or_x(BitVector *bv1, BitVector *bv2)
463
- {
464
- return bv_or_i(bv1, bv1, bv2);
465
- }
466
-
467
- static BitVector *bv_xor_i(BitVector *bv, BitVector *bv1, BitVector *bv2)
468
- {
469
- int i;
470
- int max_size = max2(bv1->size, bv2->size);
471
- int word_size = (max_size >> 5) + 1;
472
- int capa = 4;
473
- while (capa < word_size) {
474
- capa <<= 1;
475
- }
476
- REALLOC_N(bv->bits, f_u32, capa);
477
- bv->capa = capa;
478
- bv->size = max_size;
479
-
480
- bv_recapa(bv1, capa);
481
- bv_recapa(bv2, capa);
482
-
483
- if (bv1->extends_as_ones != bv2->extends_as_ones) {
484
- bv->extends_as_ones = true;
485
- }
486
- else {
487
- bv->extends_as_ones = false;
488
- }
489
-
490
- memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
491
- sizeof(f_u32) * (capa - word_size));
492
-
493
- for (i = 0; i < word_size; i++) {
494
- bv->bits[i] = bv1->bits[i] ^ bv2->bits[i];
495
- }
496
- bv_recount(bv);
497
- return bv;
498
- }
499
-
500
- BitVector *bv_xor(BitVector *bv1, BitVector *bv2)
501
- {
502
- return bv_xor_i(bv_new(), bv1, bv2);
503
- }
504
-
505
- BitVector *bv_xor_x(BitVector *bv1, BitVector *bv2)
506
- {
507
- return bv_xor_i(bv1, bv1, bv2);
508
- }
509
-
510
- static BitVector *bv_not_i(BitVector *bv, BitVector *bv1)
511
- {
512
- int i;
513
- int word_size = (bv1->size >> 5) + 1;
514
- int capa = 4;
515
- while (capa < word_size) {
516
- capa <<= 1;
517
- }
518
- REALLOC_N(bv->bits, f_u32, capa);
519
- bv->capa = capa;
520
- bv->size = bv1->size;
521
- bv->extends_as_ones = 1 - bv1->extends_as_ones;
522
- memset(bv->bits + word_size, (bv->extends_as_ones ? 0xFF : 0),
523
- sizeof(f_u32) * (capa - word_size));
524
-
525
- for (i = 0; i < word_size; i++) {
526
- bv->bits[i] = ~(bv1->bits[i]);
527
- }
528
- bv_recount(bv);
529
- return bv;
530
- }
531
-
532
- BitVector *bv_not(BitVector *bv1)
533
- {
534
- return bv_not_i(bv_new(), bv1);
535
- }
536
-
537
- BitVector *bv_not_x(BitVector *bv1)
538
- {
539
- return bv_not_i(bv1, bv1);
95
+ return (hash << 1) | bv->extends_as_ones;
540
96
  }