np-ferret 0.11.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (275) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/Makefile +218 -0
  9. data/ext/analysis.c +1584 -0
  10. data/ext/analysis.h +219 -0
  11. data/ext/analysis.o +0 -0
  12. data/ext/api.c +69 -0
  13. data/ext/api.h +27 -0
  14. data/ext/api.o +0 -0
  15. data/ext/array.c +123 -0
  16. data/ext/array.h +53 -0
  17. data/ext/array.o +0 -0
  18. data/ext/bitvector.c +540 -0
  19. data/ext/bitvector.h +272 -0
  20. data/ext/bitvector.o +0 -0
  21. data/ext/compound_io.c +383 -0
  22. data/ext/compound_io.o +0 -0
  23. data/ext/config.h +42 -0
  24. data/ext/document.c +156 -0
  25. data/ext/document.h +53 -0
  26. data/ext/document.o +0 -0
  27. data/ext/except.c +120 -0
  28. data/ext/except.h +168 -0
  29. data/ext/except.o +0 -0
  30. data/ext/extconf.rb +14 -0
  31. data/ext/ferret.c +402 -0
  32. data/ext/ferret.h +91 -0
  33. data/ext/ferret.o +0 -0
  34. data/ext/ferret_ext.bundle +0 -0
  35. data/ext/filter.c +156 -0
  36. data/ext/filter.o +0 -0
  37. data/ext/fs_store.c +484 -0
  38. data/ext/fs_store.o +0 -0
  39. data/ext/global.c +418 -0
  40. data/ext/global.h +117 -0
  41. data/ext/global.o +0 -0
  42. data/ext/hash.c +598 -0
  43. data/ext/hash.h +475 -0
  44. data/ext/hash.o +0 -0
  45. data/ext/hashset.c +170 -0
  46. data/ext/hashset.h +187 -0
  47. data/ext/hashset.o +0 -0
  48. data/ext/header.h +58 -0
  49. data/ext/helper.c +62 -0
  50. data/ext/helper.h +13 -0
  51. data/ext/helper.o +0 -0
  52. data/ext/inc/lang.h +48 -0
  53. data/ext/inc/threading.h +31 -0
  54. data/ext/index.c +6510 -0
  55. data/ext/index.h +964 -0
  56. data/ext/index.o +0 -0
  57. data/ext/lang.h +66 -0
  58. data/ext/libstemmer.c +92 -0
  59. data/ext/libstemmer.h +79 -0
  60. data/ext/libstemmer.o +0 -0
  61. data/ext/mempool.c +87 -0
  62. data/ext/mempool.h +35 -0
  63. data/ext/mempool.o +0 -0
  64. data/ext/modules.h +162 -0
  65. data/ext/multimapper.c +310 -0
  66. data/ext/multimapper.h +51 -0
  67. data/ext/multimapper.o +0 -0
  68. data/ext/posh.c +1006 -0
  69. data/ext/posh.h +1007 -0
  70. data/ext/posh.o +0 -0
  71. data/ext/priorityqueue.c +151 -0
  72. data/ext/priorityqueue.h +143 -0
  73. data/ext/priorityqueue.o +0 -0
  74. data/ext/q_boolean.c +1608 -0
  75. data/ext/q_boolean.o +0 -0
  76. data/ext/q_const_score.c +165 -0
  77. data/ext/q_const_score.o +0 -0
  78. data/ext/q_filtered_query.c +209 -0
  79. data/ext/q_filtered_query.o +0 -0
  80. data/ext/q_fuzzy.c +335 -0
  81. data/ext/q_fuzzy.o +0 -0
  82. data/ext/q_match_all.c +148 -0
  83. data/ext/q_match_all.o +0 -0
  84. data/ext/q_multi_term.c +677 -0
  85. data/ext/q_multi_term.o +0 -0
  86. data/ext/q_parser.c +2825 -0
  87. data/ext/q_parser.o +0 -0
  88. data/ext/q_phrase.c +1126 -0
  89. data/ext/q_phrase.o +0 -0
  90. data/ext/q_prefix.c +100 -0
  91. data/ext/q_prefix.o +0 -0
  92. data/ext/q_range.c +356 -0
  93. data/ext/q_range.o +0 -0
  94. data/ext/q_span.c +2402 -0
  95. data/ext/q_span.o +0 -0
  96. data/ext/q_term.c +337 -0
  97. data/ext/q_term.o +0 -0
  98. data/ext/q_wildcard.c +171 -0
  99. data/ext/q_wildcard.o +0 -0
  100. data/ext/r_analysis.c +2636 -0
  101. data/ext/r_analysis.o +0 -0
  102. data/ext/r_index.c +3509 -0
  103. data/ext/r_index.o +0 -0
  104. data/ext/r_qparser.c +585 -0
  105. data/ext/r_qparser.o +0 -0
  106. data/ext/r_search.c +4240 -0
  107. data/ext/r_search.o +0 -0
  108. data/ext/r_store.c +513 -0
  109. data/ext/r_store.o +0 -0
  110. data/ext/r_utils.c +963 -0
  111. data/ext/r_utils.o +0 -0
  112. data/ext/ram_store.c +471 -0
  113. data/ext/ram_store.o +0 -0
  114. data/ext/search.c +1743 -0
  115. data/ext/search.h +885 -0
  116. data/ext/search.o +0 -0
  117. data/ext/similarity.c +150 -0
  118. data/ext/similarity.h +82 -0
  119. data/ext/similarity.o +0 -0
  120. data/ext/sort.c +985 -0
  121. data/ext/sort.o +0 -0
  122. data/ext/stem_ISO_8859_1_danish.c +338 -0
  123. data/ext/stem_ISO_8859_1_danish.h +16 -0
  124. data/ext/stem_ISO_8859_1_danish.o +0 -0
  125. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  126. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.o +0 -0
  128. data/ext/stem_ISO_8859_1_english.c +1156 -0
  129. data/ext/stem_ISO_8859_1_english.h +16 -0
  130. data/ext/stem_ISO_8859_1_english.o +0 -0
  131. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  132. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  133. data/ext/stem_ISO_8859_1_finnish.o +0 -0
  134. data/ext/stem_ISO_8859_1_french.c +1276 -0
  135. data/ext/stem_ISO_8859_1_french.h +16 -0
  136. data/ext/stem_ISO_8859_1_french.o +0 -0
  137. data/ext/stem_ISO_8859_1_german.c +512 -0
  138. data/ext/stem_ISO_8859_1_german.h +16 -0
  139. data/ext/stem_ISO_8859_1_german.o +0 -0
  140. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  141. data/ext/stem_ISO_8859_1_italian.h +16 -0
  142. data/ext/stem_ISO_8859_1_italian.o +0 -0
  143. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  144. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  145. data/ext/stem_ISO_8859_1_norwegian.o +0 -0
  146. data/ext/stem_ISO_8859_1_porter.c +776 -0
  147. data/ext/stem_ISO_8859_1_porter.h +16 -0
  148. data/ext/stem_ISO_8859_1_porter.o +0 -0
  149. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  150. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  151. data/ext/stem_ISO_8859_1_portuguese.o +0 -0
  152. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  153. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  154. data/ext/stem_ISO_8859_1_spanish.o +0 -0
  155. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  156. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  157. data/ext/stem_ISO_8859_1_swedish.o +0 -0
  158. data/ext/stem_KOI8_R_russian.c +701 -0
  159. data/ext/stem_KOI8_R_russian.h +16 -0
  160. data/ext/stem_KOI8_R_russian.o +0 -0
  161. data/ext/stem_UTF_8_danish.c +344 -0
  162. data/ext/stem_UTF_8_danish.h +16 -0
  163. data/ext/stem_UTF_8_danish.o +0 -0
  164. data/ext/stem_UTF_8_dutch.c +653 -0
  165. data/ext/stem_UTF_8_dutch.h +16 -0
  166. data/ext/stem_UTF_8_dutch.o +0 -0
  167. data/ext/stem_UTF_8_english.c +1176 -0
  168. data/ext/stem_UTF_8_english.h +16 -0
  169. data/ext/stem_UTF_8_english.o +0 -0
  170. data/ext/stem_UTF_8_finnish.c +808 -0
  171. data/ext/stem_UTF_8_finnish.h +16 -0
  172. data/ext/stem_UTF_8_finnish.o +0 -0
  173. data/ext/stem_UTF_8_french.c +1296 -0
  174. data/ext/stem_UTF_8_french.h +16 -0
  175. data/ext/stem_UTF_8_french.o +0 -0
  176. data/ext/stem_UTF_8_german.c +526 -0
  177. data/ext/stem_UTF_8_german.h +16 -0
  178. data/ext/stem_UTF_8_german.o +0 -0
  179. data/ext/stem_UTF_8_italian.c +1113 -0
  180. data/ext/stem_UTF_8_italian.h +16 -0
  181. data/ext/stem_UTF_8_italian.o +0 -0
  182. data/ext/stem_UTF_8_norwegian.c +302 -0
  183. data/ext/stem_UTF_8_norwegian.h +16 -0
  184. data/ext/stem_UTF_8_norwegian.o +0 -0
  185. data/ext/stem_UTF_8_porter.c +794 -0
  186. data/ext/stem_UTF_8_porter.h +16 -0
  187. data/ext/stem_UTF_8_porter.o +0 -0
  188. data/ext/stem_UTF_8_portuguese.c +1055 -0
  189. data/ext/stem_UTF_8_portuguese.h +16 -0
  190. data/ext/stem_UTF_8_portuguese.o +0 -0
  191. data/ext/stem_UTF_8_russian.c +709 -0
  192. data/ext/stem_UTF_8_russian.h +16 -0
  193. data/ext/stem_UTF_8_russian.o +0 -0
  194. data/ext/stem_UTF_8_spanish.c +1137 -0
  195. data/ext/stem_UTF_8_spanish.h +16 -0
  196. data/ext/stem_UTF_8_spanish.o +0 -0
  197. data/ext/stem_UTF_8_swedish.c +313 -0
  198. data/ext/stem_UTF_8_swedish.h +16 -0
  199. data/ext/stem_UTF_8_swedish.o +0 -0
  200. data/ext/stopwords.c +401 -0
  201. data/ext/stopwords.o +0 -0
  202. data/ext/store.c +692 -0
  203. data/ext/store.h +777 -0
  204. data/ext/store.o +0 -0
  205. data/ext/term_vectors.c +352 -0
  206. data/ext/term_vectors.o +0 -0
  207. data/ext/threading.h +31 -0
  208. data/ext/utilities.c +446 -0
  209. data/ext/utilities.o +0 -0
  210. data/ext/win32.h +54 -0
  211. data/ferret.gemspec +39 -0
  212. data/lib/ferret.rb +29 -0
  213. data/lib/ferret/browser.rb +246 -0
  214. data/lib/ferret/browser/s/global.js +192 -0
  215. data/lib/ferret/browser/s/style.css +148 -0
  216. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  217. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  218. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  219. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  220. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  221. data/lib/ferret/browser/views/layout.rhtml +22 -0
  222. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  223. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  224. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  225. data/lib/ferret/browser/webrick.rb +14 -0
  226. data/lib/ferret/document.rb +130 -0
  227. data/lib/ferret/field_infos.rb +44 -0
  228. data/lib/ferret/index.rb +786 -0
  229. data/lib/ferret/number_tools.rb +157 -0
  230. data/lib/ferret_ext.bundle +0 -0
  231. data/lib/ferret_version.rb +3 -0
  232. data/pkg/ferret-0.11.6.gem +0 -0
  233. data/pkg/ferret-0.11.6.tgz +0 -0
  234. data/pkg/ferret-0.11.6.zip +0 -0
  235. data/setup.rb +1555 -0
  236. data/test/test_all.rb +5 -0
  237. data/test/test_helper.rb +24 -0
  238. data/test/threading/number_to_spoken.rb +132 -0
  239. data/test/threading/thread_safety_index_test.rb +79 -0
  240. data/test/threading/thread_safety_read_write_test.rb +76 -0
  241. data/test/threading/thread_safety_test.rb +133 -0
  242. data/test/unit/analysis/tc_analyzer.rb +548 -0
  243. data/test/unit/analysis/tc_token_stream.rb +646 -0
  244. data/test/unit/index/tc_index.rb +762 -0
  245. data/test/unit/index/tc_index_reader.rb +699 -0
  246. data/test/unit/index/tc_index_writer.rb +437 -0
  247. data/test/unit/index/th_doc.rb +315 -0
  248. data/test/unit/largefile/tc_largefile.rb +46 -0
  249. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  250. data/test/unit/search/tc_filter.rb +135 -0
  251. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  252. data/test/unit/search/tc_index_searcher.rb +61 -0
  253. data/test/unit/search/tc_multi_searcher.rb +128 -0
  254. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  255. data/test/unit/search/tc_search_and_sort.rb +179 -0
  256. data/test/unit/search/tc_sort.rb +49 -0
  257. data/test/unit/search/tc_sort_field.rb +27 -0
  258. data/test/unit/search/tc_spans.rb +190 -0
  259. data/test/unit/search/tm_searcher.rb +384 -0
  260. data/test/unit/store/tc_fs_store.rb +77 -0
  261. data/test/unit/store/tc_ram_store.rb +35 -0
  262. data/test/unit/store/tm_store.rb +34 -0
  263. data/test/unit/store/tm_store_lock.rb +68 -0
  264. data/test/unit/tc_document.rb +81 -0
  265. data/test/unit/ts_analysis.rb +2 -0
  266. data/test/unit/ts_index.rb +2 -0
  267. data/test/unit/ts_largefile.rb +4 -0
  268. data/test/unit/ts_query_parser.rb +2 -0
  269. data/test/unit/ts_search.rb +2 -0
  270. data/test/unit/ts_store.rb +2 -0
  271. data/test/unit/ts_utils.rb +2 -0
  272. data/test/unit/utils/tc_bit_vector.rb +295 -0
  273. data/test/unit/utils/tc_number_tools.rb +117 -0
  274. data/test/unit/utils/tc_priority_queue.rb +106 -0
  275. metadata +392 -0
data/ext/global.o ADDED
Binary file
data/ext/hash.c ADDED
@@ -0,0 +1,598 @@
1
+ #include "hash.h"
2
+ #include "global.h"
3
+ #include <string.h>
4
+
5
+
6
+ /****************************************************************************
7
+ *
8
+ * HashTable
9
+ *
10
+ * This hash table is modeled after Python's dictobject and a description of
11
+ * the algorithm can be found in the file dictobject.c in Python's src
12
+ ****************************************************************************/
13
+
14
+ static char *dummy_key = "";
15
+
16
+ #define PERTURB_SHIFT 5
17
+ #define MAX_FREE_HASH_TABLES 80
18
+
19
+ #define max(a,b) ((b < a) ? a : b)
20
+
21
+ static HashTable *free_hts[MAX_FREE_HASH_TABLES];
22
+ static int num_free_hts = 0;
23
+
24
+ unsigned long str_hash(const char *const str)
25
+ {
26
+ register unsigned long h = 0;
27
+ register unsigned char *p = (unsigned char *) str;
28
+
29
+ for (; *p; p++) {
30
+ h = 37 * h + *p;
31
+ }
32
+
33
+ return h;
34
+ }
35
+
36
+ unsigned long ptr_hash(const void *const ptr)
37
+ {
38
+ return (unsigned long)ptr;
39
+ }
40
+
41
+ int ptr_eq(const void *q1, const void *q2)
42
+ {
43
+ return q1 == q2;
44
+ }
45
+
46
+ static int int_eq(const void *q1, const void *q2)
47
+ {
48
+ (void)q1;
49
+ (void)q2;
50
+ return true;
51
+ }
52
+
53
+ static unsigned long int_hash(const void *i)
54
+ {
55
+ return *((unsigned long *)i);
56
+ }
57
+
58
+ typedef HashEntry *(*lookup_ft)(struct HashTable *ht, register const void *key);
59
+
60
+ /**
61
+ * Fast lookup function for resizing as we know there are no equal elements or
62
+ * deletes to worry about.
63
+ *
64
+ * @param ht the HashTable to do the fast lookup in
65
+ * @param the hashkey we are looking for
66
+ */
67
+ static INLINE HashEntry *h_resize_lookup(HashTable *ht,
68
+ register const unsigned long hash)
69
+ {
70
+ register unsigned long perturb;
71
+ register int mask = ht->mask;
72
+ register HashEntry *he0 = ht->table;
73
+ register int i = hash & mask;
74
+ register HashEntry *he = &he0[i];
75
+
76
+ if (he->key == NULL) {
77
+ he->hash = hash;
78
+ return he;
79
+ }
80
+
81
+ for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
82
+ i = (i << 2) + i + perturb + 1;
83
+ he = &he0[i & mask];
84
+ if (he->key == NULL) {
85
+ he->hash = hash;
86
+ return he;
87
+ }
88
+ }
89
+ }
90
+
91
+ HashEntry *h_lookup_int(HashTable *ht, const void *key)
92
+ {
93
+ register unsigned long hash = *((int *)key);
94
+ register unsigned long perturb;
95
+ register int mask = ht->mask;
96
+ register HashEntry *he0 = ht->table;
97
+ register int i = hash & mask;
98
+ register HashEntry *he = &he0[i];
99
+ register HashEntry *freeslot = NULL;
100
+
101
+ if (he->key == NULL || he->hash == hash) {
102
+ he->hash = hash;
103
+ return he;
104
+ }
105
+ if (he->key == dummy_key) {
106
+ freeslot = he;
107
+ }
108
+
109
+ for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
110
+ i = (i << 2) + i + perturb + 1;
111
+ he = &he0[i & mask];
112
+ if (he->key == NULL) {
113
+ if (freeslot != NULL) {
114
+ he = freeslot;
115
+ }
116
+ he->hash = hash;
117
+ return he;
118
+ }
119
+ if (he->hash == hash) {
120
+ return he;
121
+ }
122
+ if (he->key == dummy_key && freeslot == NULL) {
123
+ freeslot = he;
124
+ }
125
+ }
126
+ }
127
+
128
+ HashEntry *h_lookup_str(HashTable *ht, register const char *key)
129
+ {
130
+ register unsigned long hash = str_hash(key);
131
+ register unsigned long perturb;
132
+ register int mask = ht->mask;
133
+ register HashEntry *he0 = ht->table;
134
+ register int i = hash & mask;
135
+ register HashEntry *he = &he0[i];
136
+ register HashEntry *freeslot;
137
+
138
+ if (he->key == NULL || he->key == key) {
139
+ he->hash = hash;
140
+ return he;
141
+ }
142
+ if (he->key == dummy_key) {
143
+ freeslot = he;
144
+ }
145
+ else {
146
+ if ((he->hash == hash) && (strcmp(he->key, key) == 0)) {
147
+ return he;
148
+ }
149
+ freeslot = NULL;
150
+ }
151
+
152
+ for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
153
+ i = (i << 2) + i + perturb + 1;
154
+ he = &he0[i & mask];
155
+ if (he->key == NULL) {
156
+ if (freeslot != NULL) {
157
+ he = freeslot;
158
+ }
159
+ he->hash = hash;
160
+ return he;
161
+ }
162
+ if (he->key == key
163
+ || (he->hash == hash
164
+ && he->key != dummy_key && strcmp(he->key, key) == 0)) {
165
+ return he;
166
+ }
167
+ if (he->key == dummy_key && freeslot == NULL) {
168
+ freeslot = he;
169
+ }
170
+ }
171
+ }
172
+
173
+ HashEntry *h_lookup(HashTable *ht, register const void *key)
174
+ {
175
+ register unsigned int hash = ht->hash_i(key);
176
+ register unsigned int perturb;
177
+ register int mask = ht->mask;
178
+ register HashEntry *he0 = ht->table;
179
+ register int i = hash & mask;
180
+ register HashEntry *he = &he0[i];
181
+ register HashEntry *freeslot;
182
+ eq_ft eq = ht->eq_i;
183
+
184
+ if (he->key == NULL || he->key == key) {
185
+ he->hash = hash;
186
+ return he;
187
+ }
188
+ if (he->key == dummy_key) {
189
+ freeslot = he;
190
+ }
191
+ else {
192
+ if ((he->hash == hash) && eq(he->key, key)) {
193
+ return he;
194
+ }
195
+ freeslot = NULL;
196
+ }
197
+
198
+ for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
199
+ i = (i << 2) + i + perturb + 1;
200
+ he = &he0[i & mask];
201
+ if (he->key == NULL) {
202
+ if (freeslot != NULL) {
203
+ he = freeslot;
204
+ }
205
+ he->hash = hash;
206
+ return he;
207
+ }
208
+ if (he->key == key
209
+ || (he->hash == hash
210
+ && he->key != dummy_key && eq(he->key, key))) {
211
+ return he;
212
+ }
213
+ if (he->key == dummy_key && freeslot == NULL) {
214
+ freeslot = he;
215
+ }
216
+ }
217
+ }
218
+
219
+ HashTable *h_new_str(free_ft free_key, free_ft free_value)
220
+ {
221
+ HashTable *ht;
222
+ if (num_free_hts > 0) {
223
+ ht = free_hts[--num_free_hts];
224
+ // ht->max_mask is preserved...
225
+ // ht->table is preserved...
226
+ }
227
+ else {
228
+ ht = ALLOC(HashTable);
229
+ ht->max_mask = HASH_MINSIZE - 1;
230
+ ht->table = ht->smalltable;
231
+ }
232
+ ht->fill = 0;
233
+ ht->size = 0;
234
+ ht->mask = HASH_MINSIZE - 1;
235
+ // memset(ht->smalltable, 0, sizeof(ht->smalltable));
236
+ memset(ht->table, 0, sizeof(HashEntry)*(ht->mask + 1));
237
+ ht->lookup_i = (lookup_ft)&h_lookup_str;
238
+
239
+ ht->free_key_i = free_key != NULL ? free_key : &dummy_free;
240
+ ht->free_value_i = free_value != NULL ? free_value : &dummy_free;
241
+ ht->ref_cnt = 1;
242
+ return ht;
243
+ }
244
+
245
+ HashTable *h_new_int(free_ft free_value)
246
+ {
247
+ HashTable *ht = h_new_str(NULL, free_value);
248
+ ht->lookup_i = &h_lookup_int;
249
+ ht->eq_i = int_eq;
250
+ ht->hash_i = int_hash;
251
+ return ht;
252
+ }
253
+
254
+ HashTable *h_new(hash_ft hash, eq_ft eq, free_ft free_key, free_ft free_value)
255
+ {
256
+ HashTable *ht = h_new_str(free_key, free_value);
257
+
258
+ ht->lookup_i = &h_lookup;
259
+ ht->eq_i = eq;
260
+ ht->hash_i = hash;
261
+ return ht;
262
+ }
263
+
264
+ void h_clear(HashTable *ht)
265
+ {
266
+ int i;
267
+ HashEntry *he;
268
+ free_ft free_key = ht->free_key_i;
269
+ free_ft free_value = ht->free_value_i;
270
+
271
+ /* Clear all the hash values and keys as necessary */
272
+ if (free_key != dummy_free || free_value != dummy_free) {
273
+ for (i = 0; i <= ht->mask; i++) {
274
+ he = &ht->table[i];
275
+ if (he->key != NULL && he->key != dummy_key) {
276
+ free_value(he->value);
277
+ free_key(he->key);
278
+ }
279
+ he->key = NULL;
280
+ }
281
+ }
282
+ // Set cur_mask small but leave max_mask big... So that
283
+ // we don't end up having to scan/compress the entire space
284
+ // even when we're working with a small set of values.
285
+ ht->mask = HASH_MINSIZE - 1;
286
+
287
+ ZEROSET_N(ht->table, HashEntry, ht->mask + 1);
288
+ ht->size = 0;
289
+ ht->fill = 0;
290
+ }
291
+
292
+ void h_destroy(HashTable *ht)
293
+ {
294
+ if (--(ht->ref_cnt) <= 0) {
295
+ h_clear(ht);
296
+
297
+ #ifdef DEBUG
298
+ /* if a new table was created, be sure to free it */
299
+ if (ht->table != ht->smalltable) {
300
+ free(ht->table);
301
+ }
302
+ free(ht);
303
+ #else
304
+ if (num_free_hts < MAX_FREE_HASH_TABLES) {
305
+ free_hts[num_free_hts++] = ht;
306
+ }
307
+ else {
308
+ /* if a new table was created, be sure to free it */
309
+ if (ht->table != ht->smalltable) {
310
+ free(ht->table);
311
+ }
312
+ free(ht);
313
+ }
314
+ #endif
315
+ }
316
+ }
317
+
318
+ void *h_get(HashTable *ht, const void *key)
319
+ {
320
+ /* Note: lookup_i will never return NULL. */
321
+ return ht->lookup_i(ht, key)->value;
322
+ }
323
+
324
+ int h_del(HashTable *ht, const void *key)
325
+ {
326
+ HashEntry *he = ht->lookup_i(ht, key);
327
+
328
+ if (he->key != NULL && he->key != dummy_key) {
329
+ ht->free_key_i(he->key);
330
+ ht->free_value_i(he->value);
331
+ he->key = dummy_key;
332
+ he->value = NULL;
333
+ ht->size--;
334
+ return true;
335
+ }
336
+ else {
337
+ return false;
338
+ }
339
+ }
340
+
341
+ void *h_rem(HashTable *ht, const void *key, bool destroy_key)
342
+ {
343
+ void *val;
344
+ HashEntry *he = ht->lookup_i(ht, key);
345
+
346
+ if (he->key != NULL && he->key != dummy_key) {
347
+ if (destroy_key) {
348
+ ht->free_key_i(he->key);
349
+ }
350
+
351
+ he->key = dummy_key;
352
+ val = he->value;
353
+ he->value = NULL;
354
+ ht->size--;
355
+ return val;
356
+ }
357
+ else {
358
+ return NULL;
359
+ }
360
+ }
361
+
362
+ static int h_resize(HashTable *ht, int min_newsize)
363
+ {
364
+ HashEntry smallcopy[HASH_MINSIZE];
365
+ HashEntry *oldtable;
366
+ HashEntry *he_old, *he_new;
367
+ int newsize, num_active;
368
+ bool copied = false;
369
+
370
+ /* newsize will be a power of two */
371
+ for (newsize = HASH_MINSIZE; newsize < min_newsize; newsize <<= 1) {
372
+ }
373
+
374
+ oldtable = ht->table;
375
+ if (newsize == HASH_MINSIZE) {
376
+ if (ht->table == ht->smalltable) {
377
+ /* need to copy the data out so we can rebuild the table into
378
+ * the same space */
379
+ memcpy(smallcopy, ht->smalltable, sizeof(smallcopy));
380
+ oldtable = smallcopy;
381
+ }
382
+ else {
383
+ ht->table = ht->smalltable;
384
+ }
385
+ }
386
+
387
+ //
388
+ // TODO: Figure out why this can cause segmentation faults. :(
389
+ //
390
+ // else if (newsize + ht->mask <= ht->max_mask) {
391
+ // // Instead of re-allocating space every time, try to re-use the existing
392
+ // // space if we can. Requires that both the old and new tables fit in
393
+ // // allocated space, though.
394
+ // // printf("Copying... From %d to %d, %d entries / %d bytes (%d each).\n", ht->table, &ht->table[newsize], (ht->mask + 1), sizeof(HashEntry)*(ht->mask + 1), sizeof(HashEntry));
395
+ // memcpy(&ht->table[newsize], ht->table, sizeof(HashEntry) * (ht->mask + 1));
396
+ // oldtable = &ht->table[newsize];
397
+ // copied = true;
398
+ // }
399
+
400
+ else {
401
+ ht->table = ALLOC_N(HashEntry, newsize);
402
+ ht->max_mask = newsize - 1;
403
+ }
404
+ memset(ht->table, 0, sizeof(HashEntry) * newsize);
405
+ ht->fill = ht->size;
406
+ ht->mask = newsize - 1;
407
+
408
+ for (num_active = ht->size, he_old = oldtable; num_active > 0; he_old++) {
409
+ if (he_old->key && he_old->key != dummy_key) { /* active entry */
410
+ /*he_new = ht->lookup_i(ht, he_old->key); */
411
+ he_new = h_resize_lookup(ht, he_old->hash);
412
+ he_new->key = he_old->key;
413
+ he_new->value = he_old->value;
414
+ num_active--;
415
+ } /* else empty entry so nothing to do */
416
+ }
417
+ if (oldtable != smallcopy && oldtable != ht->smalltable && !copied) {
418
+ free(oldtable);
419
+ }
420
+ return 0;
421
+ }
422
+
423
+ int h_set(HashTable *ht, const void *key, void *value)
424
+ {
425
+ int ret_val = HASH_KEY_DOES_NOT_EXIST;
426
+ HashEntry *he = ht->lookup_i(ht, key);
427
+ if (he->key == NULL) {
428
+ if (ht->fill * 3 > ht->mask * 2) {
429
+ h_resize(ht, ht->size * ((ht->size > SLOW_DOWN) ? 4 : 2));
430
+ he = ht->lookup_i(ht, key);
431
+ }
432
+ ht->fill++;
433
+ ht->size++;
434
+ }
435
+ else if (he->key == dummy_key) {
436
+ ht->size++;
437
+ }
438
+ else if (he->key != key) {
439
+ ht->free_key_i(he->key);
440
+ if (he->value != value) {
441
+ ht->free_value_i(he->value);
442
+ }
443
+ ret_val = HASH_KEY_EQUAL;
444
+ }
445
+ else {
446
+ /* safety check. Only free old value if it isn't the new value */
447
+ if (he->value != value) {
448
+ ht->free_value_i(he->value);
449
+ }
450
+ ret_val = HASH_KEY_SAME;
451
+ }
452
+ he->key = (void *)key;
453
+ he->value = value;
454
+
455
+ /*
456
+ if ((ht->fill > fill) && (ht->fill * 3 > ht->mask * 2)) {
457
+ h_resize(ht, ht->size * ((ht->size > SLOW_DOWN) ? 4 : 2));
458
+ }
459
+ */
460
+ return ret_val;
461
+ }
462
+
463
+ HashEntry *h_set_ext(HashTable *ht, const void *key)
464
+ {
465
+ HashEntry *he = ht->lookup_i(ht, key);
466
+ if (he->key == NULL) {
467
+ if (ht->fill * 3 > ht->mask * 2) {
468
+ h_resize(ht, ht->size * ((ht->size > SLOW_DOWN) ? 4 : 2));
469
+ he = ht->lookup_i(ht, key);
470
+ }
471
+ ht->fill++;
472
+ ht->size++;
473
+ }
474
+ else if (he->key == dummy_key) {
475
+ ht->size++;
476
+ }
477
+
478
+ return he;
479
+ }
480
+
481
+ int h_set_safe(HashTable *ht, const void *key, void *value)
482
+ {
483
+ HashEntry *he = ht->lookup_i(ht, key);
484
+ int fill = ht->fill;
485
+ if (he->key == NULL) {
486
+ ht->fill++;
487
+ ht->size++;
488
+ }
489
+ else if (he->key == dummy_key) {
490
+ ht->size++;
491
+ }
492
+ else {
493
+ return false;
494
+ }
495
+ he->key = (void *)key;
496
+ he->value = value;
497
+
498
+ if ((ht->fill > fill) && (ht->fill * 3 > ht->mask * 2)) {
499
+ h_resize(ht, ht->size * ((ht->size > SLOW_DOWN) ? 4 : 2));
500
+ }
501
+ return true;
502
+ }
503
+
504
+ int h_has_key(HashTable *ht, const void *key)
505
+ {
506
+ HashEntry *he = ht->lookup_i(ht, key);
507
+ if (he->key == NULL || he->key == dummy_key) {
508
+ return HASH_KEY_DOES_NOT_EXIST;
509
+ }
510
+ else if (he->key == key) {
511
+ return HASH_KEY_SAME;
512
+ }
513
+ else {
514
+ return HASH_KEY_EQUAL;
515
+ }
516
+ }
517
+
518
+ void *h_get_int(HashTable *self, const unsigned long key)
519
+ {
520
+ return h_get(self, &key);
521
+ }
522
+
523
+ int h_del_int(HashTable *self, const unsigned long key)
524
+ {
525
+ return h_del(self, &key);
526
+ }
527
+
528
+ void *h_rem_int(HashTable *self, const unsigned long key)
529
+ {
530
+ return h_rem(self, &key, false);
531
+ }
532
+
533
+ int h_set_int(HashTable *self, const unsigned long key, void *value)
534
+ {
535
+ return h_set(self, &key, value);
536
+ }
537
+
538
+ int h_set_safe_int(HashTable *self, const unsigned long key, void *value)
539
+ {
540
+ return h_set_safe(self, &key, value);
541
+ }
542
+
543
+ int h_has_key_int(HashTable *self, const unsigned long key)
544
+ {
545
+ return h_has_key(self, &key);
546
+ }
547
+
548
+ void h_each(HashTable *ht,
549
+ void (*each_kv) (void *key, void *value, void *arg), void *arg)
550
+ {
551
+ HashEntry *he;
552
+ int i = ht->size;
553
+ for (he = ht->table; i > 0; he++) {
554
+ if (he->key && he->key != dummy_key) { /* active entry */
555
+ each_kv(he->key, he->value, arg);
556
+ i--;
557
+ }
558
+ }
559
+ }
560
+
561
+ HashTable *h_clone(HashTable *ht,
562
+ h_clone_func_t clone_key, h_clone_func_t clone_value)
563
+ {
564
+ void *key, *value;
565
+ HashEntry *he;
566
+ int i = ht->size;
567
+ HashTable *ht_clone;
568
+
569
+ if (ht->lookup_i == (lookup_ft)&h_lookup_str) {
570
+ ht_clone = h_new_str(ht->free_key_i, ht->free_value_i);
571
+ }
572
+ else {
573
+ ht_clone = h_new(ht->hash_i, ht->eq_i, ht->free_key_i, ht->free_value_i);
574
+ }
575
+
576
+ for (he = ht->table; i > 0; he++) {
577
+ if (he->key && he->key != dummy_key) { /* active entry */
578
+ key = clone_key ? clone_key(he->key) : he->key;
579
+ value = clone_value ? clone_value(he->value) : he->value;
580
+ h_set(ht_clone, key, value);
581
+ i--;
582
+ }
583
+ }
584
+ return ht_clone;
585
+ }
586
+
587
+ void h_str_print_keys(HashTable *ht)
588
+ {
589
+ HashEntry *he;
590
+ int i = ht->size;
591
+ printf("keys:\n");
592
+ for (he = ht->table; i > 0; he++) {
593
+ if (he->key && he->key != dummy_key) { /* active entry */
594
+ printf("\t%s\n", (char *)he->key);
595
+ i--;
596
+ }
597
+ }
598
+ }