jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
data/ext/global.h ADDED
@@ -0,0 +1,302 @@
1
+ #ifndef FRT_GLOBAL_H
2
+ #define FRT_GLOBAL_H
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include "config.h"
9
+ #include "except.h"
10
+ #include "lang.h"
11
+ #include <stdlib.h>
12
+ #include <stdio.h>
13
+ #include <stdarg.h>
14
+ #include <assert.h>
15
+ #include <string.h>
16
+
17
+ #define FRT_MAX_WORD_SIZE 255
18
+ #define FRT_MAX_FILE_PATH 1024
19
+ #define FRT_BUFFER_SIZE 1024
20
+
21
+ #if defined(__GNUC__) && !defined(__cplusplus)
22
+ # define FRT_INLINE
23
+ #else
24
+ # define FRT_INLINE
25
+ #endif
26
+
27
+ #if __GNUC__ >= 3
28
+ # define FRT_ATTR_ALWAYS_INLINE inline __attribute__ ((always_inline))
29
+ # define FRT_ATTR_MALLOC __attribute__ ((malloc))
30
+ # define FRT_ATTR_PURE __attribute__ ((pure))
31
+ # define FRT_ATTR_CONST __attribute__ ((const))
32
+ # define likely(x) __builtin_expect(!!(x), 1)
33
+ # define unlikely(x) __builtin_expect(!!(x), 0)
34
+ #else
35
+ # define FRT_ATTR_ALWAYS_INLINE
36
+ # define FRT_ATTR_MALLOC
37
+ # define FRT_ATTR_PURE
38
+ # define FRT_ATTR_CONST
39
+ # define likely(x) (x)
40
+ # define unlikely(x) (x)
41
+ #endif
42
+
43
+ #ifdef __cplusplus
44
+ #define FRT_EXTERNC extern "C"
45
+ #else
46
+ #define FRT_EXTERNC
47
+ #endif
48
+
49
+ typedef void (*frt_free_ft)(void *key);
50
+
51
+ #define FRT_NELEMS(array) ((int)(sizeof(array)/sizeof(array[0])))
52
+
53
+
54
+ #define FRT_ZEROSET(ptr, type) memset(ptr, 0, sizeof(type))
55
+ #define FRT_ZEROSET_N(ptr, type, n) memset(ptr, 0, sizeof(type)*(n))
56
+
57
+ #define FRT_ALLOC_AND_ZERO(type) (type*)frt_ecalloc(sizeof(type))
58
+ #define FRT_ALLOC_AND_ZERO_N(type,n) (type*)frt_ecalloc(sizeof(type)*(n))
59
+
60
+ #define FRT_REF(a) (a)->ref_cnt++
61
+ #define FRT_DEREF(a) (a)->ref_cnt--
62
+
63
+ #define FRT_NEXT_NUM(index, size) (((index) + 1) % (size))
64
+ #define FRT_PREV_NUM(index, size) (((index) + (size) - 1) % (size))
65
+
66
+ #define FRT_MIN(a, b) ((a) < (b) ? (a) : (b))
67
+ #define FRT_MAX(a, b) ((a) > (b) ? (a) : (b))
68
+
69
+ #define FRT_MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
70
+ #define FRT_MAX3(a, b, c) ((a) > (b) ? ((a) > (c) ? (a) : (c)) : ((b) > (c) ? (b) : (c)))
71
+
72
+ #define FRT_ABS(n) ((n >= 0) ? n : -n)
73
+ #define FRT_TO_WORD(n) (((n - 1) >> 5) + 1)
74
+
75
+ #define FRT_RECAPA(self, len, capa, ptr, type) \
76
+ do {\
77
+ if (self->len >= self->capa) {\
78
+ if (self->capa > 0) {\
79
+ self->capa <<= 1;\
80
+ } else {\
81
+ self->capa = 4;\
82
+ }\
83
+ self->ptr = (type *)frt_erealloc(self->ptr, sizeof(type) * self->capa);\
84
+ }\
85
+ } while (0)
86
+
87
+ #ifdef POSH_OS_WIN32
88
+ # define Jx fprintf(stderr,"%s, %d\n", __FILE__, __LINE__);
89
+ # define Xj fprintf(stdout,"%s, %d\n", __FILE__, __LINE__);
90
+ #else
91
+ # define Jx fprintf(stderr,"%s, %d: %s\n", __FILE__, __LINE__, __func__);
92
+ # define Xj fprintf(stdout,"%s, %d: %s\n", __FILE__, __LINE__, __func__);
93
+ #endif
94
+
95
+ extern unsigned int *frt_imalloc(unsigned int value);
96
+ extern unsigned long *frt_lmalloc(unsigned long value);
97
+ extern frt_u32 *frt_u32malloc(frt_u32 value);
98
+ extern frt_u64 *frt_u64malloc(frt_u64 value);
99
+
100
+ extern char *frt_estrdup(const char *s);
101
+ extern char *frt_estrcat(char *str, char *str_cat);
102
+ extern void frt_weprintf(const char *fmt, ...);
103
+ extern char *frt_epstrdup(const char *fmt, int len, ...);
104
+
105
+ extern const char *FRT_EMPTY_STRING;
106
+
107
+ extern int frt_scmp(const void *p1, const void *p2);
108
+ extern int frt_icmp(const void *p1, const void *p2);
109
+ extern int frt_icmp_risky(const void *p1, const void *p2);
110
+ extern void frt_strsort(char **string_array, int size);
111
+
112
+ extern int frt_min2(int a, int b);
113
+ extern int frt_min3(int a, int b, int c);
114
+ extern int frt_max2(int a, int b);
115
+ extern int frt_max3(int a, int b, int c);
116
+
117
+ extern char *frt_dbl_to_s(char *buf, double num);
118
+ extern char *frt_strfmt(const char *fmt, ...);
119
+ extern char *frt_vstrfmt(const char *fmt, va_list args);
120
+
121
+ extern char *frt_get_stacktrace();
122
+ extern void frt_print_stacktrace();
123
+
124
+ extern void frt_register_for_cleanup(void *p, frt_free_ft free_func);
125
+ extern void frt_do_clean_up();
126
+
127
+ /**
128
+ * A dummy function which can be passed to functions which expect a free
129
+ * function such as h_new() if you don't want the free functions to do anything.
130
+ * This function will do nothing.
131
+ *
132
+ * @param p the object which this function will be called on.
133
+ */
134
+ extern void frt_dummy_free(void *p);
135
+
136
+ /**
137
+ * Returns the count of leading [MSB] 0 bits in +word+.
138
+ */
139
+ static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
140
+ int frt_count_leading_zeros(frt_u32 word)
141
+ {
142
+ #ifdef __GNUC__
143
+ if (word)
144
+ return __builtin_clz(word);
145
+ return 32;
146
+ #else
147
+ static const int count_leading_zeros[] = {
148
+ 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
149
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
150
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
151
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
152
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
153
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
154
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
155
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
156
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
157
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
158
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
159
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
160
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
161
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
162
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
163
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
164
+ };
165
+ if (word & 0xff) return count_leading_zeros[word & 0xff];
166
+ word >>= 8; if (word & 0xff) return count_leading_zeros[word & 0xff] + 8;
167
+ word >>= 8; if (word & 0xff) return count_leading_zeros[word & 0xff] + 16;
168
+ word >>= 8; return count_leading_zeros[word & 0xff] + 24;
169
+ #endif
170
+ }
171
+
172
+ static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
173
+ int frt_count_leading_ones(frt_u32 word)
174
+ {
175
+ return frt_count_leading_zeros(~word);
176
+ }
177
+
178
+ /**
179
+ * Return the count of trailing [LSB] 0 bits in +word+.
180
+ */
181
+
182
+ static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
183
+ int frt_count_trailing_zeros(frt_u32 word)
184
+ {
185
+ #ifdef __GNUC__
186
+ if (word)
187
+ return __builtin_ctz(word);
188
+ return 32;
189
+ #else
190
+ static const int count_trailing_zeros[] = {
191
+ 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
192
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
193
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
194
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
195
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
196
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
197
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
198
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
199
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
200
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
201
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
202
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
203
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
204
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
205
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
206
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
207
+ };
208
+ if (word & 0xff) return count_trailing_zeros[word & 0xff];
209
+ word >>= 8; if (word & 0xff) return count_trailing_zeros[word & 0xff] + 8;
210
+ word >>= 8; if (word & 0xff) return count_trailing_zeros[word & 0xff] + 16;
211
+ word >>= 8; return count_trailing_zeros[word & 0xff] + 24;
212
+ #endif
213
+ }
214
+
215
+ static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
216
+ int frt_count_trailing_ones(frt_u32 word)
217
+ {
218
+ return frt_count_trailing_zeros(~word);
219
+ }
220
+
221
+ static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
222
+ int frt_count_ones(frt_u32 word)
223
+ {
224
+ #ifdef __GNUC__
225
+ return __builtin_popcount(word);
226
+ #else
227
+ static const frt_uchar count_ones[] = {
228
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
229
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
230
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
231
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
232
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
233
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
234
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
235
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
236
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
237
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
238
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
239
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
240
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
241
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
242
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
243
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
244
+ };
245
+ return count_ones[(word ) & 0xff]
246
+ + count_ones[(word >> 8 ) & 0xff]
247
+ + count_ones[(word >> 16) & 0xff]
248
+ + count_ones[(word >> 24) & 0xff];
249
+ #endif
250
+ }
251
+
252
+ static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
253
+ int frt_count_zeros(frt_u32 word)
254
+ {
255
+ return frt_count_ones(~word);
256
+ }
257
+
258
+ /**
259
+ * Round up to the next power of 2
260
+ */
261
+ static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
262
+ int frt_round2(frt_u32 word)
263
+ {
264
+ return 1 << (32 - frt_count_leading_zeros(word));
265
+ }
266
+
267
+ /**
268
+ * For coverage, we don't want FRT_XEXIT to actually exit on uncaught
269
+ * exceptions. +frt_x_abort_on_exception+ is +true+ by default, set it to
270
+ * +false+, and +frt_x_has_aborted+ will be set as appropriate. We also
271
+ * don't want spurious errors to be printed out to stderr, so we give
272
+ * the option to set where errors go to with +frt_x_exception_stream+.
273
+ */
274
+
275
+ extern bool frt_x_abort_on_exception;
276
+ extern bool frt_x_has_aborted;
277
+ extern FILE *frt_x_exception_stream;
278
+
279
+ /**
280
+ * The convenience macro +EXCEPTION_STREAM+ returns stderr when
281
+ * +frt_x_exception_stream+ isn't explicitely set.
282
+ */
283
+ #define EXCEPTION_STREAM (frt_x_exception_stream ? frt_x_exception_stream : stderr)
284
+
285
+ #ifdef DEBUG
286
+ extern bool frt_x_do_logging;
287
+ #define xlog if (frt_x_do_logging) printf
288
+ #else
289
+ #define xlog()
290
+ #endif
291
+
292
+ extern void frt_init(int arc, const char *const argv[]);
293
+ extern void frt_setprogname(const char *str);
294
+ extern const char *frt_progname();
295
+ extern void frt_micro_sleep(const int micro_seconds);
296
+ extern void frt_clean_up();
297
+
298
+ #ifdef __cplusplus
299
+ } // extern "C"
300
+ #endif
301
+
302
+ #endif
data/ext/hash.c ADDED
@@ -0,0 +1,524 @@
1
+ #include "hash.h"
2
+ #include "global.h"
3
+ #include <string.h>
4
+ #include "internal.h"
5
+
6
+ /****************************************************************************
7
+ *
8
+ * Hash
9
+ *
10
+ * This hash table is modeled after Python's dictobject and a description of
11
+ * the algorithm can be found in the file dictobject.c in Python's src
12
+ ****************************************************************************/
13
+
14
+ static char *dummy_key = "";
15
+ static char *dummy_int_key = "i";
16
+
17
+
18
+ #define PERTURB_SHIFT 5
19
+ #define MAX_FREE_HASH_TABLES 80
20
+
21
+ static Hash *free_hts[MAX_FREE_HASH_TABLES];
22
+ static int num_free_hts = 0;
23
+
24
+ unsigned long str_hash(const char *const str)
25
+ {
26
+ register unsigned long h = 0;
27
+ register unsigned char *p = (unsigned char *)str;
28
+
29
+ for (; *p; p++) {
30
+ h = 37 * h + *p;
31
+ }
32
+
33
+ return h;
34
+ }
35
+
36
+ unsigned long ptr_hash(const void *const ptr)
37
+ {
38
+ return (unsigned long)ptr;
39
+ }
40
+
41
+ int ptr_eq(const void *q1, const void *q2)
42
+ {
43
+ return q1 == q2;
44
+ }
45
+
46
+ static int str_eq(const void *q1, const void *q2)
47
+ {
48
+ return strcmp((const char *)q1, (const char *)q2) == 0;
49
+ }
50
+
51
+ typedef HashEntry *(*lookup_ft)(struct Hash *self, register const void *key);
52
+
53
+ /**
54
+ * Fast lookup function for resizing as we know there are no equal elements or
55
+ * deletes to worry about.
56
+ *
57
+ * @param self the Hash to do the fast lookup in
58
+ * @param the hashkey we are looking for
59
+ */
60
+ static INLINE HashEntry *h_resize_lookup(Hash *self,
61
+ register const unsigned long hash)
62
+ {
63
+ register unsigned long perturb;
64
+ register int mask = self->mask;
65
+ register HashEntry *he0 = self->table;
66
+ register int i = hash & mask;
67
+ register HashEntry *he = &he0[i];
68
+
69
+ if (he->key == NULL) {
70
+ he->hash = hash;
71
+ return he;
72
+ }
73
+
74
+ for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
75
+ i = (i << 2) + i + perturb + 1;
76
+ he = &he0[i & mask];
77
+ if (he->key == NULL) {
78
+ he->hash = hash;
79
+ return he;
80
+ }
81
+ }
82
+ }
83
+
84
+ static HashEntry *h_lookup_ptr(Hash *self, const void *key)
85
+ {
86
+ register const unsigned long hash = (long)key;
87
+ register unsigned long perturb;
88
+ register int mask = self->mask;
89
+ register HashEntry *he0 = self->table;
90
+ register int i = hash & mask;
91
+ register HashEntry *he = &he0[i];
92
+ register HashEntry *freeslot = NULL;
93
+
94
+ if (he->key == NULL || he->hash == hash) {
95
+ he->hash = hash;
96
+ return he;
97
+ }
98
+ if (he->key == dummy_key) {
99
+ freeslot = he;
100
+ }
101
+
102
+ for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
103
+ i = (i << 2) + i + perturb + 1;
104
+ he = &he0[i & mask];
105
+ if (he->key == NULL) {
106
+ if (freeslot != NULL) {
107
+ he = freeslot;
108
+ }
109
+ he->hash = hash;
110
+ return he;
111
+ }
112
+ if (he->hash == hash) {
113
+ return he;
114
+ }
115
+ if (he->key == dummy_key && freeslot == NULL) {
116
+ freeslot = he;
117
+ }
118
+ }
119
+ }
120
+
121
+ HashEntry *h_lookup(Hash *self, register const void *key)
122
+ {
123
+ register const unsigned long hash = self->hash_i(key);
124
+ register unsigned long perturb;
125
+ register int mask = self->mask;
126
+ register HashEntry *he0 = self->table;
127
+ register int i = hash & mask;
128
+ register HashEntry *he = &he0[i];
129
+ register HashEntry *freeslot = NULL;
130
+ eq_ft eq = self->eq_i;
131
+
132
+ if (he->key == NULL || he->key == key) {
133
+ he->hash = hash;
134
+ return he;
135
+ }
136
+ if (he->key == dummy_key) {
137
+ freeslot = he;
138
+ }
139
+ else {
140
+ if ((he->hash == hash) && eq(he->key, key)) {
141
+ return he;
142
+ }
143
+ }
144
+
145
+ for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
146
+ i = (i << 2) + i + perturb + 1;
147
+ he = &he0[i & mask];
148
+ if (he->key == NULL) {
149
+ if (freeslot != NULL) {
150
+ he = freeslot;
151
+ }
152
+ he->hash = hash;
153
+ return he;
154
+ }
155
+ if (he->key == key
156
+ || (he->hash == hash
157
+ && he->key != dummy_key && eq(he->key, key))) {
158
+ return he;
159
+ }
160
+ if (he->key == dummy_key && freeslot == NULL) {
161
+ freeslot = he;
162
+ }
163
+ }
164
+ }
165
+
166
+ Hash *h_new_str(free_ft free_key, free_ft free_value)
167
+ {
168
+ Hash *self;
169
+ if (num_free_hts > 0) {
170
+ self = free_hts[--num_free_hts];
171
+ }
172
+ else {
173
+ self = ALLOC(Hash);
174
+ }
175
+ self->fill = 0;
176
+ self->size = 0;
177
+ self->mask = HASH_MINSIZE - 1;
178
+ self->table = self->smalltable;
179
+ memset(self->smalltable, 0, sizeof(self->smalltable));
180
+ self->lookup_i = (lookup_ft)&h_lookup;
181
+ self->eq_i = str_eq;
182
+ self->hash_i = (hash_ft)str_hash;
183
+
184
+ self->free_key_i = free_key != NULL ? free_key : &dummy_free;
185
+ self->free_value_i = free_value != NULL ? free_value : &dummy_free;
186
+ self->ref_cnt = 1;
187
+ return self;
188
+ }
189
+
190
+ Hash *h_new_int(free_ft free_value)
191
+ {
192
+ Hash *self = h_new_str(NULL, free_value);
193
+
194
+ self->lookup_i = &h_lookup_ptr;
195
+ self->eq_i = NULL;
196
+ self->hash_i = NULL;
197
+
198
+ return self;
199
+ }
200
+
201
+ Hash *h_new(hash_ft hash, eq_ft eq, free_ft free_key, free_ft free_value)
202
+ {
203
+ Hash *self = h_new_str(free_key, free_value);
204
+
205
+ self->lookup_i = &h_lookup;
206
+ self->eq_i = eq;
207
+ self->hash_i = hash;
208
+
209
+ return self;
210
+ }
211
+
212
+ void h_clear(Hash *self)
213
+ {
214
+ int i;
215
+ HashEntry *he;
216
+ free_ft free_key = self->free_key_i;
217
+ free_ft free_value = self->free_value_i;
218
+
219
+ /* Clear all the hash values and keys as necessary */
220
+ if (free_key != dummy_free || free_value != dummy_free) {
221
+ for (i = 0; i <= self->mask; i++) {
222
+ he = &self->table[i];
223
+ if (he->key != NULL && he->key != dummy_key) {
224
+ free_value(he->value);
225
+ free_key(he->key);
226
+ }
227
+ he->key = NULL;
228
+ }
229
+ }
230
+ ZEROSET_N(self->table, HashEntry, self->mask + 1);
231
+ self->size = 0;
232
+ self->fill = 0;
233
+ }
234
+
235
+ void h_destroy(Hash *self)
236
+ {
237
+ if (--(self->ref_cnt) <= 0) {
238
+ h_clear(self);
239
+
240
+ /* if a new table was created, be sure to free it */
241
+ if (self->table != self->smalltable) {
242
+ free(self->table);
243
+ }
244
+
245
+ if (num_free_hts < MAX_FREE_HASH_TABLES) {
246
+ free_hts[num_free_hts++] = self;
247
+ }
248
+ else {
249
+ free(self);
250
+ }
251
+ }
252
+ }
253
+
254
+ void *h_get(Hash *self, const void *key)
255
+ {
256
+ /* Note: lookup_i will never return NULL. */
257
+ return self->lookup_i(self, key)->value;
258
+ }
259
+
260
+ int h_del(Hash *self, const void *key)
261
+ {
262
+ HashEntry *he = self->lookup_i(self, key);
263
+
264
+ if (he->key != NULL && he->key != dummy_key) {
265
+ self->free_key_i(he->key);
266
+ self->free_value_i(he->value);
267
+ he->key = dummy_key;
268
+ he->value = NULL;
269
+ self->size--;
270
+ return true;
271
+ }
272
+ else {
273
+ return false;
274
+ }
275
+ }
276
+
277
+ void *h_rem(Hash *self, const void *key, bool destroy_key)
278
+ {
279
+ void *val;
280
+ HashEntry *he = self->lookup_i(self, key);
281
+
282
+ if (he->key != NULL && he->key != dummy_key) {
283
+ if (destroy_key) {
284
+ self->free_key_i(he->key);
285
+ }
286
+
287
+ he->key = dummy_key;
288
+ val = he->value;
289
+ he->value = NULL;
290
+ self->size--;
291
+ return val;
292
+ }
293
+ else {
294
+ return NULL;
295
+ }
296
+ }
297
+
298
+ static int h_resize(Hash *self, int min_newsize)
299
+ {
300
+ HashEntry smallcopy[HASH_MINSIZE];
301
+ HashEntry *oldtable;
302
+ HashEntry *he_old, *he_new;
303
+ int newsize, num_active;
304
+
305
+ /* newsize will be a power of two */
306
+ for (newsize = HASH_MINSIZE; newsize < min_newsize; newsize <<= 1) {
307
+ }
308
+
309
+ oldtable = self->table;
310
+ if (newsize == HASH_MINSIZE) {
311
+ if (self->table == self->smalltable) {
312
+ /* need to copy the data out so we can rebuild the table into
313
+ * the same space */
314
+ memcpy(smallcopy, self->smalltable, sizeof(smallcopy));
315
+ oldtable = smallcopy;
316
+ }
317
+ else {
318
+ self->table = self->smalltable;
319
+ }
320
+ }
321
+ else {
322
+ self->table = ALLOC_N(HashEntry, newsize);
323
+ }
324
+ memset(self->table, 0, sizeof(HashEntry) * newsize);
325
+ self->fill = self->size;
326
+ self->mask = newsize - 1;
327
+
328
+ for (num_active = self->size, he_old = oldtable; num_active > 0; he_old++) {
329
+ if (he_old->key && he_old->key != dummy_key) { /* active entry */
330
+ /*he_new = self->lookup_i(self, he_old->key); */
331
+ he_new = h_resize_lookup(self, he_old->hash);
332
+ he_new->key = he_old->key;
333
+ he_new->value = he_old->value;
334
+ num_active--;
335
+ } /* else empty entry so nothing to do */
336
+ }
337
+ if (oldtable != smallcopy && oldtable != self->smalltable) {
338
+ free(oldtable);
339
+ }
340
+ return 0;
341
+ }
342
+
343
+ INLINE bool h_set_ext(Hash *self, const void *key, HashEntry **he)
344
+ {
345
+ *he = self->lookup_i(self, key);
346
+ if ((*he)->key == NULL) {
347
+ if (self->fill * 3 > self->mask * 2) {
348
+ h_resize(self, self->size * ((self->size > SLOW_DOWN) ? 4 : 2));
349
+ *he = self->lookup_i(self, key);
350
+ }
351
+ self->fill++;
352
+ self->size++;
353
+ return true;
354
+ }
355
+ else if ((*he)->key == dummy_key) {
356
+ self->size++;
357
+ return true;
358
+ }
359
+
360
+ return false;
361
+ }
362
+
363
+ HashKeyStatus h_set(Hash *self, const void *key, void *value)
364
+ {
365
+ HashKeyStatus ret_val = HASH_KEY_DOES_NOT_EXIST;
366
+ HashEntry *he;
367
+ if (!h_set_ext(self, key, &he)) {
368
+ if (he->key != key) {
369
+ self->free_key_i(he->key);
370
+ if (he->value != value) {
371
+ self->free_value_i(he->value);
372
+ }
373
+ ret_val = HASH_KEY_EQUAL;
374
+ }
375
+ else {
376
+ /* Only free old value if it isn't the new value */
377
+ if (he->value != value) {
378
+ self->free_value_i(he->value);
379
+ }
380
+ ret_val = HASH_KEY_SAME;
381
+ }
382
+ }
383
+ he->key = (void *)key;
384
+ he->value = value;
385
+
386
+ return ret_val;
387
+ }
388
+
389
+ int h_set_safe(Hash *self, const void *key, void *value)
390
+ {
391
+ HashEntry *he;
392
+ if (h_set_ext(self, key, &he)) {
393
+ he->key = (void *)key;
394
+ he->value = value;
395
+ return true;
396
+ }
397
+ else {
398
+ return false;
399
+ }
400
+ }
401
+
402
+ HashKeyStatus h_has_key(Hash *self, const void *key)
403
+ {
404
+ HashEntry *he = self->lookup_i(self, key);
405
+ if (he->key == NULL || he->key == dummy_key) {
406
+ return HASH_KEY_DOES_NOT_EXIST;
407
+ }
408
+ else if (he->key == key) {
409
+ return HASH_KEY_SAME;
410
+ }
411
+ return HASH_KEY_EQUAL;
412
+ }
413
+
414
+ INLINE void *h_get_int(Hash *self, const unsigned long key)
415
+ {
416
+ return h_get(self, (const void *)key);
417
+ }
418
+
419
+ INLINE int h_del_int(Hash *self, const unsigned long key)
420
+ {
421
+ return h_del(self, (const void *)key);
422
+ }
423
+
424
+ INLINE void *h_rem_int(Hash *self, const unsigned long key)
425
+ {
426
+ return h_rem(self, (const void *)key, false);
427
+ }
428
+
429
+ INLINE HashKeyStatus h_set_int(Hash *self,
430
+ const unsigned long key,
431
+ void *value)
432
+ {
433
+ HashKeyStatus ret_val = HASH_KEY_DOES_NOT_EXIST;
434
+ HashEntry *he;
435
+ if (!h_set_ext(self, (const void *)key, &he)) {
436
+ /* Only free old value if it isn't the new value */
437
+ if (he->value != value) {
438
+ self->free_value_i(he->value);
439
+ }
440
+ ret_val = HASH_KEY_EQUAL;
441
+ }
442
+ he->key = dummy_int_key;
443
+ he->value = value;
444
+
445
+ return ret_val;
446
+ }
447
+
448
+ INLINE int h_set_safe_int(Hash *self, const unsigned long key, void *value)
449
+ {
450
+ HashEntry *he;
451
+ if (h_set_ext(self, (const void *)key, &he)) {
452
+ he->key = dummy_int_key;
453
+ he->value = value;
454
+ return true;
455
+ }
456
+ return false;
457
+ }
458
+
459
+ INLINE int h_has_key_int(Hash *self, const unsigned long key)
460
+ {
461
+ return h_has_key(self, (const void *)key);
462
+ }
463
+
464
+ void h_each(Hash *self,
465
+ void (*each_kv) (void *key, void *value, void *arg), void *arg)
466
+ {
467
+ HashEntry *he;
468
+ int i = self->size;
469
+ for (he = self->table; i > 0; he++) {
470
+ if (he->key && he->key != dummy_key) { /* active entry */
471
+ each_kv(he->key, he->value, arg);
472
+ i--;
473
+ }
474
+ }
475
+ }
476
+
477
+ Hash *h_clone(Hash *self, h_clone_ft clone_key, h_clone_ft clone_value)
478
+ {
479
+ void *key, *value;
480
+ HashEntry *he;
481
+ int i = self->size;
482
+ Hash *ht_clone;
483
+
484
+ ht_clone = h_new(self->hash_i,
485
+ self->eq_i,
486
+ self->free_key_i,
487
+ self->free_value_i);
488
+
489
+ for (he = self->table; i > 0; he++) {
490
+ if (he->key && he->key != dummy_key) { /* active entry */
491
+ key = clone_key ? clone_key(he->key) : he->key;
492
+ value = clone_value ? clone_value(he->value) : he->value;
493
+ h_set(ht_clone, key, value);
494
+ i--;
495
+ }
496
+ }
497
+ return ht_clone;
498
+ }
499
+
500
+ void h_str_print_keys(Hash *self, FILE *out)
501
+ {
502
+ HashEntry *he;
503
+ int i = self->size;
504
+ char **keys = ALLOC_N(char *, self->size);
505
+ for (he = self->table; i > 0; he++) {
506
+ if (he->key && he->key != dummy_key) { /* active entry */
507
+ i--;
508
+ keys[i] = (char *)he->key;
509
+ }
510
+ }
511
+ strsort(keys, self->size);
512
+ fprintf(out, "keys:\n");
513
+ for (i = 0; i < self->size; i++) {
514
+ fprintf(out, "\t%s\n", keys[i]);
515
+ }
516
+ free(keys);
517
+ }
518
+
519
+ void hash_finalize()
520
+ {
521
+ while (num_free_hts > 0) {
522
+ free(free_hts[--num_free_hts]);
523
+ }
524
+ }