tdb 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +8 -0
- data/.gitignore +19 -0
- data/COPYING +165 -0
- data/GIT-VERSION-GEN +40 -0
- data/GNUmakefile +190 -0
- data/Hash_Functions +67 -0
- data/LICENSE +17 -0
- data/README +57 -0
- data/Rakefile +140 -0
- data/TODO +7 -0
- data/ext/tdb/djb.c +26 -0
- data/ext/tdb/extconf.rb +12 -0
- data/ext/tdb/fnv.c +28 -0
- data/ext/tdb/lookup3.c +429 -0
- data/ext/tdb/murmur1.c +151 -0
- data/ext/tdb/murmur2.c +290 -0
- data/ext/tdb/rbtdb.h +22 -0
- data/ext/tdb/tdb.c +690 -0
- data/lib/tdb.rb +2 -0
- data/setup.rb +1586 -0
- data/tdb.gemspec +36 -0
- data/test/test_tdb.rb +260 -0
- metadata +105 -0
data/ext/tdb/murmur1.c
ADDED
@@ -0,0 +1,151 @@
|
|
1
|
+
#include "rbtdb.h"
|
2
|
+
#include <assert.h>
|
3
|
+
|
4
|
+
/*
|
5
|
+
* https://sites.google.com/site/murmurhash/
|
6
|
+
*
|
7
|
+
* Public Domain hash functions by Austin Appleby.
|
8
|
+
*
|
9
|
+
* Trivially adapted for use with Ruby TDB by Eric Wong.
|
10
|
+
*/
|
11
|
+
|
12
|
+
/*
|
13
|
+
* 'm' and 'r' are mixing constants generated offline.
|
14
|
+
* They're not really 'magic', they just happen to work well.
|
15
|
+
*/
|
16
|
+
static const unsigned int m = 0xc6a4a793;
|
17
|
+
static const int r = 16;
|
18
|
+
static const unsigned int seed;
|
19
|
+
|
20
|
+
unsigned int rbtdb_murmur1(TDB_DATA * key)
|
21
|
+
{
|
22
|
+
const unsigned char *data = key->dptr;
|
23
|
+
int len = (int)key->dsize;
|
24
|
+
/* Initialize the hash to a 'random' value */
|
25
|
+
unsigned int h = seed ^ (len * m);
|
26
|
+
|
27
|
+
while (len >= 4) {
|
28
|
+
h += *(const unsigned int *)data;
|
29
|
+
h *= m;
|
30
|
+
h ^= h >> r;
|
31
|
+
|
32
|
+
data += 4;
|
33
|
+
len -= 4;
|
34
|
+
}
|
35
|
+
|
36
|
+
/* Handle the last few bytes of the input array */
|
37
|
+
switch (len) {
|
38
|
+
case 3:
|
39
|
+
h += data[2] << 16;
|
40
|
+
case 2:
|
41
|
+
h += data[1] << 8;
|
42
|
+
case 1:
|
43
|
+
h += data[0];
|
44
|
+
h *= m;
|
45
|
+
h ^= h >> r;
|
46
|
+
};
|
47
|
+
|
48
|
+
/*
|
49
|
+
* Do a few final mixes of the hash to ensure the last few
|
50
|
+
* bytes are well-incorporated.
|
51
|
+
*/
|
52
|
+
h *= m;
|
53
|
+
h ^= h >> 10;
|
54
|
+
h *= m;
|
55
|
+
h ^= h >> 17;
|
56
|
+
|
57
|
+
return h;
|
58
|
+
}
|
59
|
+
|
60
|
+
/* adapted from MurmurHashAligned */
|
61
|
+
unsigned int rbtdb_murmur1_aligned(TDB_DATA * key)
|
62
|
+
{
|
63
|
+
const unsigned char *data = key->dptr;
|
64
|
+
int len = (int)key->dsize;
|
65
|
+
unsigned int h = seed ^ (len * m);
|
66
|
+
union { const unsigned char *byte; int integer; } cast = { data };
|
67
|
+
int align = cast.integer & 3;
|
68
|
+
|
69
|
+
if (align & (len >= 4)) {
|
70
|
+
/* Pre-load the temp registers */
|
71
|
+
unsigned int t = 0, d = 0;
|
72
|
+
int sl, sr, pack;
|
73
|
+
|
74
|
+
switch (align) {
|
75
|
+
case 1: t |= data[2] << 16;
|
76
|
+
case 2: t |= data[1] << 8;
|
77
|
+
case 3: t |= data[0];
|
78
|
+
}
|
79
|
+
|
80
|
+
t <<= (8 * align);
|
81
|
+
|
82
|
+
data += 4 - align;
|
83
|
+
len -= 4 - align;
|
84
|
+
|
85
|
+
sl = 8 * (4 - align);
|
86
|
+
sr = 8 * align;
|
87
|
+
|
88
|
+
/* Mix */
|
89
|
+
while (len >= 4) {
|
90
|
+
assert((cast.integer & 3) == 0);
|
91
|
+
|
92
|
+
d = *(const unsigned int *)data;
|
93
|
+
t = (t >> sr) | (d << sl);
|
94
|
+
h += t;
|
95
|
+
h *= m;
|
96
|
+
h ^= h >> r;
|
97
|
+
t = d;
|
98
|
+
|
99
|
+
data += 4;
|
100
|
+
len -= 4;
|
101
|
+
}
|
102
|
+
|
103
|
+
/* Handle leftover data in temp registers */
|
104
|
+
pack = len < align ? len : align;
|
105
|
+
d = 0;
|
106
|
+
|
107
|
+
switch (pack) {
|
108
|
+
case 3:
|
109
|
+
d |= data[2] << 16;
|
110
|
+
case 2:
|
111
|
+
d |= data[1] << 8;
|
112
|
+
case 1:
|
113
|
+
d |= data[0];
|
114
|
+
case 0:
|
115
|
+
h += (t >> sr) | (d << sl);
|
116
|
+
h *= m;
|
117
|
+
h ^= h >> r;
|
118
|
+
}
|
119
|
+
|
120
|
+
data += pack;
|
121
|
+
len -= pack;
|
122
|
+
} else {
|
123
|
+
while (len >= 4) {
|
124
|
+
h += *(const unsigned int *)data;
|
125
|
+
h *= m;
|
126
|
+
h ^= h >> r;
|
127
|
+
|
128
|
+
data += 4;
|
129
|
+
len -= 4;
|
130
|
+
}
|
131
|
+
}
|
132
|
+
|
133
|
+
/* Handle tail bytes */
|
134
|
+
switch (len) {
|
135
|
+
case 3:
|
136
|
+
h += data[2] << 16;
|
137
|
+
case 2:
|
138
|
+
h += data[1] << 8;
|
139
|
+
case 1:
|
140
|
+
h += data[0];
|
141
|
+
h *= m;
|
142
|
+
h ^= h >> r;
|
143
|
+
};
|
144
|
+
|
145
|
+
h *= m;
|
146
|
+
h ^= h >> 10;
|
147
|
+
h *= m;
|
148
|
+
h ^= h >> 17;
|
149
|
+
|
150
|
+
return h;
|
151
|
+
}
|
data/ext/tdb/murmur2.c
ADDED
@@ -0,0 +1,290 @@
|
|
1
|
+
#include "rbtdb.h"
|
2
|
+
/*
|
3
|
+
* https://sites.google.com/site/murmurhash/
|
4
|
+
*
|
5
|
+
* Public Domain hash functions by Austin Appleby.
|
6
|
+
*
|
7
|
+
* Trivially adapted for use with Ruby TDB by Eric Wong.
|
8
|
+
*/
|
9
|
+
|
10
|
+
/*
|
11
|
+
* 'm' and 'r' are mixing constants generated offline.
|
12
|
+
* They're not really 'magic', they just happen to work well.
|
13
|
+
*/
|
14
|
+
|
15
|
+
static const unsigned int m = 0x5bd1e995;
|
16
|
+
static const int r = 24;
|
17
|
+
static const unsigned int seed;
|
18
|
+
|
19
|
+
unsigned int rbtdb_murmur2(TDB_DATA * key)
|
20
|
+
{
|
21
|
+
const unsigned char *data = key->dptr;
|
22
|
+
int len = (int)key->dsize;
|
23
|
+
/* Initialize the hash to a 'random' value */
|
24
|
+
unsigned int h = seed ^ len;
|
25
|
+
|
26
|
+
while (len >= 4) {
|
27
|
+
unsigned int k = *(const unsigned int *)data;
|
28
|
+
|
29
|
+
k *= m;
|
30
|
+
k ^= k >> r;
|
31
|
+
k *= m;
|
32
|
+
|
33
|
+
h *= m;
|
34
|
+
h ^= k;
|
35
|
+
|
36
|
+
data += 4;
|
37
|
+
len -= 4;
|
38
|
+
}
|
39
|
+
|
40
|
+
/* Handle the last few bytes of the input array */
|
41
|
+
switch (len) {
|
42
|
+
case 3:
|
43
|
+
h ^= data[2] << 16;
|
44
|
+
case 2:
|
45
|
+
h ^= data[1] << 8;
|
46
|
+
case 1:
|
47
|
+
h ^= data[0];
|
48
|
+
h *= m;
|
49
|
+
};
|
50
|
+
|
51
|
+
/*
|
52
|
+
* Do a few final mixes of the hash to ensure the last few
|
53
|
+
* bytes are well-incorporated.
|
54
|
+
*/
|
55
|
+
|
56
|
+
h ^= h >> 13;
|
57
|
+
h *= m;
|
58
|
+
h ^= h >> 15;
|
59
|
+
|
60
|
+
return h;
|
61
|
+
}
|
62
|
+
|
63
|
+
/*
|
64
|
+
* This is a variant of MurmurHash2 modified to use the Merkle-Damgard
|
65
|
+
* construction. Bulk speed should be identical to Murmur2, small-key speed
|
66
|
+
* will be 10%-20% slower due to the added overhead at the end of the hash.
|
67
|
+
*
|
68
|
+
* This variant fixes a minor issue where null keys were more likely to
|
69
|
+
* collide with each other than expected, and also makes the algorithm
|
70
|
+
* more amenable to incremental implementations. All other caveats from
|
71
|
+
* MurmurHash2 still apply.
|
72
|
+
*/
|
73
|
+
|
74
|
+
#define mmix(h,k) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0)
|
75
|
+
|
76
|
+
unsigned int rbtdb_murmur2a(TDB_DATA * key)
|
77
|
+
{
|
78
|
+
const unsigned char *data = key->dptr;
|
79
|
+
int len = (int)key->dsize;
|
80
|
+
unsigned int l = (unsigned int)len;
|
81
|
+
unsigned int h = seed;
|
82
|
+
unsigned int t = 0;
|
83
|
+
|
84
|
+
while (len >= 4) {
|
85
|
+
unsigned int k = *(const unsigned int *)data;
|
86
|
+
|
87
|
+
mmix(h, k);
|
88
|
+
|
89
|
+
data += 4;
|
90
|
+
len -= 4;
|
91
|
+
}
|
92
|
+
|
93
|
+
switch (len) {
|
94
|
+
case 3:
|
95
|
+
t ^= data[2] << 16;
|
96
|
+
case 2:
|
97
|
+
t ^= data[1] << 8;
|
98
|
+
case 1:
|
99
|
+
t ^= data[0];
|
100
|
+
};
|
101
|
+
|
102
|
+
mmix(h, t);
|
103
|
+
mmix(h, l);
|
104
|
+
|
105
|
+
h ^= h >> 13;
|
106
|
+
h *= m;
|
107
|
+
h ^= h >> 15;
|
108
|
+
|
109
|
+
return h;
|
110
|
+
}
|
111
|
+
|
112
|
+
/*
|
113
|
+
* Same algorithm as MurmurHash2, but only does aligned reads - should be safer
|
114
|
+
* on certain platforms
|
115
|
+
*
|
116
|
+
* Performance will be lower than MurmurHash2
|
117
|
+
*/
|
118
|
+
|
119
|
+
#define MIX(h,k,m) do { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } while (0)
|
120
|
+
|
121
|
+
unsigned int rbtdb_murmur2_aligned(TDB_DATA * key)
|
122
|
+
{
|
123
|
+
const unsigned char *data = key->dptr;
|
124
|
+
int len = (int)key->dsize;
|
125
|
+
unsigned int h = seed ^ len;
|
126
|
+
union { const unsigned char *byte; int integer; } cast = { data };
|
127
|
+
int align = cast.integer & 3;
|
128
|
+
|
129
|
+
if (align && (len >= 4)) {
|
130
|
+
/* Pre-load the temp registers */
|
131
|
+
unsigned int t = 0, d = 0;
|
132
|
+
int sl, sr;
|
133
|
+
|
134
|
+
switch (align) {
|
135
|
+
case 1:
|
136
|
+
t |= data[2] << 16;
|
137
|
+
case 2:
|
138
|
+
t |= data[1] << 8;
|
139
|
+
case 3:
|
140
|
+
t |= data[0];
|
141
|
+
}
|
142
|
+
|
143
|
+
t <<= (8 * align);
|
144
|
+
|
145
|
+
data += 4 - align;
|
146
|
+
len -= 4 - align;
|
147
|
+
|
148
|
+
sl = 8 * (4 - align);
|
149
|
+
sr = 8 * align;
|
150
|
+
|
151
|
+
/* Mix */
|
152
|
+
while (len >= 4) {
|
153
|
+
unsigned int k;
|
154
|
+
|
155
|
+
d = *(const unsigned int *)data;
|
156
|
+
t = (t >> sr) | (d << sl);
|
157
|
+
|
158
|
+
k = t;
|
159
|
+
|
160
|
+
MIX(h, k, m);
|
161
|
+
|
162
|
+
t = d;
|
163
|
+
|
164
|
+
data += 4;
|
165
|
+
len -= 4;
|
166
|
+
}
|
167
|
+
|
168
|
+
/* Handle leftover data in temp registers */
|
169
|
+
d = 0;
|
170
|
+
if (len >= align) {
|
171
|
+
unsigned int k;
|
172
|
+
|
173
|
+
switch (align) {
|
174
|
+
case 3:
|
175
|
+
d |= data[2] << 16;
|
176
|
+
case 2:
|
177
|
+
d |= data[1] << 8;
|
178
|
+
case 1:
|
179
|
+
d |= data[0];
|
180
|
+
}
|
181
|
+
|
182
|
+
k = (t >> sr) | (d << sl);
|
183
|
+
MIX(h, k, m);
|
184
|
+
|
185
|
+
data += align;
|
186
|
+
len -= align;
|
187
|
+
|
188
|
+
/* Handle tail bytes */
|
189
|
+
switch (len) {
|
190
|
+
case 3:
|
191
|
+
h ^= data[2] << 16;
|
192
|
+
case 2:
|
193
|
+
h ^= data[1] << 8;
|
194
|
+
case 1:
|
195
|
+
h ^= data[0];
|
196
|
+
h *= m;
|
197
|
+
};
|
198
|
+
} else {
|
199
|
+
switch (len) {
|
200
|
+
case 3:
|
201
|
+
d |= data[2] << 16;
|
202
|
+
case 2:
|
203
|
+
d |= data[1] << 8;
|
204
|
+
case 1:
|
205
|
+
d |= data[0];
|
206
|
+
case 0:
|
207
|
+
h ^= (t >> sr) | (d << sl);
|
208
|
+
h *= m;
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
h ^= h >> 13;
|
213
|
+
h *= m;
|
214
|
+
h ^= h >> 15;
|
215
|
+
|
216
|
+
return h;
|
217
|
+
} else {
|
218
|
+
while (len >= 4) {
|
219
|
+
unsigned int k = *(const unsigned int *)data;
|
220
|
+
|
221
|
+
MIX(h, k, m);
|
222
|
+
|
223
|
+
data += 4;
|
224
|
+
len -= 4;
|
225
|
+
}
|
226
|
+
|
227
|
+
/* Handle tail bytes */
|
228
|
+
switch (len) {
|
229
|
+
case 3:
|
230
|
+
h ^= data[2] << 16;
|
231
|
+
case 2:
|
232
|
+
h ^= data[1] << 8;
|
233
|
+
case 1:
|
234
|
+
h ^= data[0];
|
235
|
+
h *= m;
|
236
|
+
};
|
237
|
+
|
238
|
+
h ^= h >> 13;
|
239
|
+
h *= m;
|
240
|
+
h ^= h >> 15;
|
241
|
+
|
242
|
+
return h;
|
243
|
+
}
|
244
|
+
}
|
245
|
+
|
246
|
+
/*
|
247
|
+
* Same as MurmurHash2, but endian- and alignment-neutral.
|
248
|
+
* Half the speed though, alas.
|
249
|
+
*/
|
250
|
+
unsigned int rbtdb_murmur2_neutral(TDB_DATA * key)
|
251
|
+
{
|
252
|
+
const unsigned char *data = key->dptr;
|
253
|
+
int len = (int)key->dsize;
|
254
|
+
unsigned int h = seed ^ len;
|
255
|
+
|
256
|
+
while (len >= 4) {
|
257
|
+
unsigned int k;
|
258
|
+
|
259
|
+
k = data[0];
|
260
|
+
k |= data[1] << 8;
|
261
|
+
k |= data[2] << 16;
|
262
|
+
k |= data[3] << 24;
|
263
|
+
|
264
|
+
k *= m;
|
265
|
+
k ^= k >> r;
|
266
|
+
k *= m;
|
267
|
+
|
268
|
+
h *= m;
|
269
|
+
h ^= k;
|
270
|
+
|
271
|
+
data += 4;
|
272
|
+
len -= 4;
|
273
|
+
}
|
274
|
+
|
275
|
+
switch (len) {
|
276
|
+
case 3:
|
277
|
+
h ^= data[2] << 16;
|
278
|
+
case 2:
|
279
|
+
h ^= data[1] << 8;
|
280
|
+
case 1:
|
281
|
+
h ^= data[0];
|
282
|
+
h *= m;
|
283
|
+
};
|
284
|
+
|
285
|
+
h ^= h >> 13;
|
286
|
+
h *= m;
|
287
|
+
h ^= h >> 15;
|
288
|
+
|
289
|
+
return h;
|
290
|
+
}
|
data/ext/tdb/rbtdb.h
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#ifndef RBTDB_H
|
2
|
+
#define RBTDB_H
|
3
|
+
#include <ruby.h>
|
4
|
+
#include <tdb.h>
|
5
|
+
|
6
|
+
unsigned int rbtdb_murmur1(TDB_DATA *key);
|
7
|
+
unsigned int rbtdb_murmur1_aligned(TDB_DATA *key);
|
8
|
+
unsigned int rbtdb_murmur2(TDB_DATA *key);
|
9
|
+
unsigned int rbtdb_murmur2a(TDB_DATA *key);
|
10
|
+
unsigned int rbtdb_murmur2_neutral(TDB_DATA *key);
|
11
|
+
unsigned int rbtdb_murmur2_aligned(TDB_DATA *key);
|
12
|
+
unsigned int rbtdb_fnv1a(TDB_DATA *key);
|
13
|
+
unsigned int rbtdb_djb2(TDB_DATA *key);
|
14
|
+
unsigned int rbtdb_djb3(TDB_DATA *key);
|
15
|
+
#ifdef HAVE_TDB_JENKINS_HASH
|
16
|
+
# define rbtdb_jenkins_lookup3 tdb_jenkins_hash
|
17
|
+
#else
|
18
|
+
unsigned int rbtdb_jenkins_lookup3(TDB_DATA *key);
|
19
|
+
#endif
|
20
|
+
#define rbtdb_default 0
|
21
|
+
|
22
|
+
#endif /* RBTDB_H */
|