tdb 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,151 @@
1
+ #include "rbtdb.h"
2
+ #include <assert.h>
3
+
4
+ /*
5
+ * https://sites.google.com/site/murmurhash/
6
+ *
7
+ * Public Domain hash functions by Austin Appleby.
8
+ *
9
+ * Trivially adapted for use with Ruby TDB by Eric Wong.
10
+ */
11
+
12
+ /*
13
+ * 'm' and 'r' are mixing constants generated offline.
14
+ * They're not really 'magic', they just happen to work well.
15
+ */
16
+ static const unsigned int m = 0xc6a4a793;
17
+ static const int r = 16;
18
+ static const unsigned int seed;
19
+
20
+ unsigned int rbtdb_murmur1(TDB_DATA * key)
21
+ {
22
+ const unsigned char *data = key->dptr;
23
+ int len = (int)key->dsize;
24
+ /* Initialize the hash to a 'random' value */
25
+ unsigned int h = seed ^ (len * m);
26
+
27
+ while (len >= 4) {
28
+ h += *(const unsigned int *)data;
29
+ h *= m;
30
+ h ^= h >> r;
31
+
32
+ data += 4;
33
+ len -= 4;
34
+ }
35
+
36
+ /* Handle the last few bytes of the input array */
37
+ switch (len) {
38
+ case 3:
39
+ h += data[2] << 16;
40
+ case 2:
41
+ h += data[1] << 8;
42
+ case 1:
43
+ h += data[0];
44
+ h *= m;
45
+ h ^= h >> r;
46
+ };
47
+
48
+ /*
49
+ * Do a few final mixes of the hash to ensure the last few
50
+ * bytes are well-incorporated.
51
+ */
52
+ h *= m;
53
+ h ^= h >> 10;
54
+ h *= m;
55
+ h ^= h >> 17;
56
+
57
+ return h;
58
+ }
59
+
60
+ /* adapted from MurmurHashAligned */
61
+ unsigned int rbtdb_murmur1_aligned(TDB_DATA * key)
62
+ {
63
+ const unsigned char *data = key->dptr;
64
+ int len = (int)key->dsize;
65
+ unsigned int h = seed ^ (len * m);
66
+ union { const unsigned char *byte; int integer; } cast = { data };
67
+ int align = cast.integer & 3;
68
+
69
+ if (align & (len >= 4)) {
70
+ /* Pre-load the temp registers */
71
+ unsigned int t = 0, d = 0;
72
+ int sl, sr, pack;
73
+
74
+ switch (align) {
75
+ case 1: t |= data[2] << 16;
76
+ case 2: t |= data[1] << 8;
77
+ case 3: t |= data[0];
78
+ }
79
+
80
+ t <<= (8 * align);
81
+
82
+ data += 4 - align;
83
+ len -= 4 - align;
84
+
85
+ sl = 8 * (4 - align);
86
+ sr = 8 * align;
87
+
88
+ /* Mix */
89
+ while (len >= 4) {
90
+ assert((cast.integer & 3) == 0);
91
+
92
+ d = *(const unsigned int *)data;
93
+ t = (t >> sr) | (d << sl);
94
+ h += t;
95
+ h *= m;
96
+ h ^= h >> r;
97
+ t = d;
98
+
99
+ data += 4;
100
+ len -= 4;
101
+ }
102
+
103
+ /* Handle leftover data in temp registers */
104
+ pack = len < align ? len : align;
105
+ d = 0;
106
+
107
+ switch (pack) {
108
+ case 3:
109
+ d |= data[2] << 16;
110
+ case 2:
111
+ d |= data[1] << 8;
112
+ case 1:
113
+ d |= data[0];
114
+ case 0:
115
+ h += (t >> sr) | (d << sl);
116
+ h *= m;
117
+ h ^= h >> r;
118
+ }
119
+
120
+ data += pack;
121
+ len -= pack;
122
+ } else {
123
+ while (len >= 4) {
124
+ h += *(const unsigned int *)data;
125
+ h *= m;
126
+ h ^= h >> r;
127
+
128
+ data += 4;
129
+ len -= 4;
130
+ }
131
+ }
132
+
133
+ /* Handle tail bytes */
134
+ switch (len) {
135
+ case 3:
136
+ h += data[2] << 16;
137
+ case 2:
138
+ h += data[1] << 8;
139
+ case 1:
140
+ h += data[0];
141
+ h *= m;
142
+ h ^= h >> r;
143
+ };
144
+
145
+ h *= m;
146
+ h ^= h >> 10;
147
+ h *= m;
148
+ h ^= h >> 17;
149
+
150
+ return h;
151
+ }
@@ -0,0 +1,290 @@
1
+ #include "rbtdb.h"
2
+ /*
3
+ * https://sites.google.com/site/murmurhash/
4
+ *
5
+ * Public Domain hash functions by Austin Appleby.
6
+ *
7
+ * Trivially adapted for use with Ruby TDB by Eric Wong.
8
+ */
9
+
10
+ /*
11
+ * 'm' and 'r' are mixing constants generated offline.
12
+ * They're not really 'magic', they just happen to work well.
13
+ */
14
+
15
+ static const unsigned int m = 0x5bd1e995;
16
+ static const int r = 24;
17
+ static const unsigned int seed;
18
+
19
+ unsigned int rbtdb_murmur2(TDB_DATA * key)
20
+ {
21
+ const unsigned char *data = key->dptr;
22
+ int len = (int)key->dsize;
23
+ /* Initialize the hash to a 'random' value */
24
+ unsigned int h = seed ^ len;
25
+
26
+ while (len >= 4) {
27
+ unsigned int k = *(const unsigned int *)data;
28
+
29
+ k *= m;
30
+ k ^= k >> r;
31
+ k *= m;
32
+
33
+ h *= m;
34
+ h ^= k;
35
+
36
+ data += 4;
37
+ len -= 4;
38
+ }
39
+
40
+ /* Handle the last few bytes of the input array */
41
+ switch (len) {
42
+ case 3:
43
+ h ^= data[2] << 16;
44
+ case 2:
45
+ h ^= data[1] << 8;
46
+ case 1:
47
+ h ^= data[0];
48
+ h *= m;
49
+ };
50
+
51
+ /*
52
+ * Do a few final mixes of the hash to ensure the last few
53
+ * bytes are well-incorporated.
54
+ */
55
+
56
+ h ^= h >> 13;
57
+ h *= m;
58
+ h ^= h >> 15;
59
+
60
+ return h;
61
+ }
62
+
63
+ /*
64
+ * This is a variant of MurmurHash2 modified to use the Merkle-Damgard
65
+ * construction. Bulk speed should be identical to Murmur2, small-key speed
66
+ * will be 10%-20% slower due to the added overhead at the end of the hash.
67
+ *
68
+ * This variant fixes a minor issue where null keys were more likely to
69
+ * collide with each other than expected, and also makes the algorithm
70
+ * more amenable to incremental implementations. All other caveats from
71
+ * MurmurHash2 still apply.
72
+ */
73
+
74
+ #define mmix(h,k) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0)
75
+
76
+ unsigned int rbtdb_murmur2a(TDB_DATA * key)
77
+ {
78
+ const unsigned char *data = key->dptr;
79
+ int len = (int)key->dsize;
80
+ unsigned int l = (unsigned int)len;
81
+ unsigned int h = seed;
82
+ unsigned int t = 0;
83
+
84
+ while (len >= 4) {
85
+ unsigned int k = *(const unsigned int *)data;
86
+
87
+ mmix(h, k);
88
+
89
+ data += 4;
90
+ len -= 4;
91
+ }
92
+
93
+ switch (len) {
94
+ case 3:
95
+ t ^= data[2] << 16;
96
+ case 2:
97
+ t ^= data[1] << 8;
98
+ case 1:
99
+ t ^= data[0];
100
+ };
101
+
102
+ mmix(h, t);
103
+ mmix(h, l);
104
+
105
+ h ^= h >> 13;
106
+ h *= m;
107
+ h ^= h >> 15;
108
+
109
+ return h;
110
+ }
111
+
112
+ /*
113
+ * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
114
+ * on certain platforms
115
+ *
116
+ * Performance will be lower than MurmurHash2
117
+ */
118
+
119
+ #define MIX(h,k,m) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0)
120
+
121
+ unsigned int rbtdb_murmur2_aligned(TDB_DATA * key)
122
+ {
123
+ const unsigned char *data = key->dptr;
124
+ int len = (int)key->dsize;
125
+ unsigned int h = seed ^ len;
126
+ union { const unsigned char *byte; int integer; } cast = { data };
127
+ int align = cast.integer & 3;
128
+
129
+ if (align && (len >= 4)) {
130
+ /* Pre-load the temp registers */
131
+ unsigned int t = 0, d = 0;
132
+ int sl, sr;
133
+
134
+ switch (align) {
135
+ case 1:
136
+ t |= data[2] << 16;
137
+ case 2:
138
+ t |= data[1] << 8;
139
+ case 3:
140
+ t |= data[0];
141
+ }
142
+
143
+ t <<= (8 * align);
144
+
145
+ data += 4 - align;
146
+ len -= 4 - align;
147
+
148
+ sl = 8 * (4 - align);
149
+ sr = 8 * align;
150
+
151
+ /* Mix */
152
+ while (len >= 4) {
153
+ unsigned int k;
154
+
155
+ d = *(const unsigned int *)data;
156
+ t = (t >> sr) | (d << sl);
157
+
158
+ k = t;
159
+
160
+ MIX(h, k, m);
161
+
162
+ t = d;
163
+
164
+ data += 4;
165
+ len -= 4;
166
+ }
167
+
168
+ /* Handle leftover data in temp registers */
169
+ d = 0;
170
+ if (len >= align) {
171
+ unsigned int k;
172
+
173
+ switch (align) {
174
+ case 3:
175
+ d |= data[2] << 16;
176
+ case 2:
177
+ d |= data[1] << 8;
178
+ case 1:
179
+ d |= data[0];
180
+ }
181
+
182
+ k = (t >> sr) | (d << sl);
183
+ MIX(h, k, m);
184
+
185
+ data += align;
186
+ len -= align;
187
+
188
+ /* Handle tail bytes */
189
+ switch (len) {
190
+ case 3:
191
+ h ^= data[2] << 16;
192
+ case 2:
193
+ h ^= data[1] << 8;
194
+ case 1:
195
+ h ^= data[0];
196
+ h *= m;
197
+ };
198
+ } else {
199
+ switch (len) {
200
+ case 3:
201
+ d |= data[2] << 16;
202
+ case 2:
203
+ d |= data[1] << 8;
204
+ case 1:
205
+ d |= data[0];
206
+ case 0:
207
+ h ^= (t >> sr) | (d << sl);
208
+ h *= m;
209
+ }
210
+ }
211
+
212
+ h ^= h >> 13;
213
+ h *= m;
214
+ h ^= h >> 15;
215
+
216
+ return h;
217
+ } else {
218
+ while (len >= 4) {
219
+ unsigned int k = *(const unsigned int *)data;
220
+
221
+ MIX(h, k, m);
222
+
223
+ data += 4;
224
+ len -= 4;
225
+ }
226
+
227
+ /* Handle tail bytes */
228
+ switch (len) {
229
+ case 3:
230
+ h ^= data[2] << 16;
231
+ case 2:
232
+ h ^= data[1] << 8;
233
+ case 1:
234
+ h ^= data[0];
235
+ h *= m;
236
+ };
237
+
238
+ h ^= h >> 13;
239
+ h *= m;
240
+ h ^= h >> 15;
241
+
242
+ return h;
243
+ }
244
+ }
245
+
246
+ /*
247
+ * Same as MurmurHash2, but endian- and alignment-neutral.
248
+ * Half the speed though, alas.
249
+ */
250
+ unsigned int rbtdb_murmur2_neutral(TDB_DATA * key)
251
+ {
252
+ const unsigned char *data = key->dptr;
253
+ int len = (int)key->dsize;
254
+ unsigned int h = seed ^ len;
255
+
256
+ while (len >= 4) {
257
+ unsigned int k;
258
+
259
+ k = data[0];
260
+ k |= data[1] << 8;
261
+ k |= data[2] << 16;
262
+ k |= data[3] << 24;
263
+
264
+ k *= m;
265
+ k ^= k >> r;
266
+ k *= m;
267
+
268
+ h *= m;
269
+ h ^= k;
270
+
271
+ data += 4;
272
+ len -= 4;
273
+ }
274
+
275
+ switch (len) {
276
+ case 3:
277
+ h ^= data[2] << 16;
278
+ case 2:
279
+ h ^= data[1] << 8;
280
+ case 1:
281
+ h ^= data[0];
282
+ h *= m;
283
+ };
284
+
285
+ h ^= h >> 13;
286
+ h *= m;
287
+ h ^= h >> 15;
288
+
289
+ return h;
290
+ }
@@ -0,0 +1,22 @@
1
+ #ifndef RBTDB_H
2
+ #define RBTDB_H
3
+ #include <ruby.h>
4
+ #include <tdb.h>
5
+
6
+ unsigned int rbtdb_murmur1(TDB_DATA *key);
7
+ unsigned int rbtdb_murmur1_aligned(TDB_DATA *key);
8
+ unsigned int rbtdb_murmur2(TDB_DATA *key);
9
+ unsigned int rbtdb_murmur2a(TDB_DATA *key);
10
+ unsigned int rbtdb_murmur2_neutral(TDB_DATA *key);
11
+ unsigned int rbtdb_murmur2_aligned(TDB_DATA *key);
12
+ unsigned int rbtdb_fnv1a(TDB_DATA *key);
13
+ unsigned int rbtdb_djb2(TDB_DATA *key);
14
+ unsigned int rbtdb_djb3(TDB_DATA *key);
15
+ #ifdef HAVE_TDB_JENKINS_HASH
16
+ # define rbtdb_jenkins_lookup3 tdb_jenkins_hash
17
+ #else
18
+ unsigned int rbtdb_jenkins_lookup3(TDB_DATA *key);
19
+ #endif
20
+ #define rbtdb_default 0
21
+
22
+ #endif /* RBTDB_H */