wordtriez 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,86 @@
1
+ /*
2
+ * This file is part of hat-trie
3
+ *
4
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
5
+ *
6
+ *
7
+ * This is an implementation of the HAT-trie data structure described in,
8
+ *
9
+ * Askitis, N., & Sinha, R. (2007). HAT-trie: a cache-conscious trie-based data
10
+ * structure for strings. Proceedings of the thirtieth Australasian conference on
11
+ * Computer science-Volume 62 (pp. 97–105). Australian Computer Society, Inc.
12
+ *
13
+ * The HAT-trie is in essence a hybrid data structure, combining tries and hash
14
+ * tables in a clever way to try to get the best of both worlds.
15
+ *
16
+ */
17
+
18
+ #ifndef HATTRIE_HATTRIE_H
19
+ #define HATTRIE_HATTRIE_H
20
+
21
+ #ifdef __cplusplus
22
+ extern "C" {
23
+ #endif
24
+
25
+ #include "common.h"
26
+ #include <stdlib.h>
27
+ #include <stdbool.h>
28
+
29
+ typedef struct hattrie_t_ hattrie_t;
30
+
31
+ hattrie_t* hattrie_create (void); //< Create an empty hat-trie.
32
+ void hattrie_free (hattrie_t*); //< Free all memory used by a trie.
33
+ hattrie_t* hattrie_dup (const hattrie_t*); //< Duplicate an existing trie.
34
+ void hattrie_clear (hattrie_t*); //< Remove all entries.
35
+
36
+ /** number of inserted keys
37
+ */
38
+ size_t hattrie_size (hattrie_t*);
39
+
40
+ /** Find the given key in the trie, inserting it if it does not exist, and
41
+ * returning a pointer to it's key.
42
+ *
43
+ * This pointer is not guaranteed to be valid after additional calls to
44
+ * hattrie_get, hattrie_del, hattrie_clear, or other functions that modifies the
45
+ * trie.
46
+ */
47
+ value_t* hattrie_get (hattrie_t*, const char* key, size_t len);
48
+
49
+ /** Find a given key in the table, returning a NULL pointer if it does not
50
+ * exist. */
51
+ value_t* hattrie_tryget (hattrie_t*, const char* key, size_t len);
52
+
53
+ /** hattrie_walk callback signature */
54
+ typedef int (*hattrie_walk_cb)(const char* key, size_t len, value_t* val, void* user_data);
55
+
56
+ /** hattrie_walk callback return values, controls whether should stop the walk or not */
57
+ #define hattrie_walk_stop 0
58
+ #define hattrie_walk_continue 1
59
+
60
+ /** Find stored keys which are prefices of key, and invoke callback for every found key and val.
61
+ * The invocation order is: short key to long key.
62
+ */
63
+ void hattrie_walk (hattrie_t*, const char* key, size_t len, void* user_data, hattrie_walk_cb);
64
+
65
+ /** Delete a given key from trie. Returns 0 if successful or -1 if not found.
66
+ */
67
+ int hattrie_del(hattrie_t* T, const char* key, size_t len);
68
+
69
+ typedef struct hattrie_iter_t_ hattrie_iter_t;
70
+
71
+ hattrie_iter_t* hattrie_iter_begin (const hattrie_t*, bool sorted);
72
+ void hattrie_iter_next (hattrie_iter_t*);
73
+ bool hattrie_iter_finished (hattrie_iter_t*);
74
+ void hattrie_iter_free (hattrie_iter_t*);
75
+ const char* hattrie_iter_key (hattrie_iter_t*, size_t* len);
76
+ value_t* hattrie_iter_val (hattrie_iter_t*);
77
+
78
+ /** Note the hattrie_iter_key() for prefixed search gets the suffix instead of the whole key
79
+ */
80
+ hattrie_iter_t* hattrie_iter_with_prefix(const hattrie_t*, bool sorted, const char* prefix, size_t prefix_len);
81
+
82
+ #ifdef __cplusplus
83
+ }
84
+ #endif
85
+
86
+ #endif
@@ -0,0 +1,46 @@
1
+ /*
2
+ * This file is part of hat-trie.
3
+ *
4
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
5
+ *
6
+ */
7
+
8
+ #include "misc.h"
9
+ #include <stdlib.h>
10
+
11
+
12
+ void* malloc_or_die(size_t n)
13
+ {
14
+ void* p = malloc(n);
15
+ if (p == NULL && n != 0) {
16
+ fprintf(stderr, "Cannot allocate %zu bytes.\n", n);
17
+ exit(EXIT_FAILURE);
18
+ }
19
+ return p;
20
+ }
21
+
22
+
23
+ void* realloc_or_die(void* ptr, size_t n)
24
+ {
25
+ void* p = realloc(ptr, n);
26
+ if (p == NULL && n != 0) {
27
+ fprintf(stderr, "Cannot allocate %zu bytes.\n", n);
28
+ exit(EXIT_FAILURE);
29
+ }
30
+ return p;
31
+ }
32
+
33
+
34
+ FILE* fopen_or_die(const char* path, const char* mode)
35
+ {
36
+ FILE* f = fopen(path, mode);
37
+ if (f == NULL) {
38
+ fprintf(stderr, "Cannot open file %s with mode %s.\n", path, mode);
39
+ exit(EXIT_FAILURE);
40
+ }
41
+ return f;
42
+ }
43
+
44
+
45
+
46
+
@@ -0,0 +1,22 @@
1
+ /*
2
+ * This file is part of hat-trie.
3
+ *
4
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
5
+ *
6
+ * misc :
7
+ * miscelaneous functions.
8
+ *
9
+ */
10
+
11
+ #ifndef LINESET_MISC_H
12
+ #define LINESET_MISC_H
13
+
14
+ #include <stdio.h>
15
+
16
+ void* malloc_or_die(size_t);
17
+ void* realloc_or_die(void*, size_t);
18
+ FILE* fopen_or_die(const char*, const char*);
19
+
20
+ #endif
21
+
22
+
@@ -0,0 +1,77 @@
1
+ /* This is MurmurHash3. The original C++ code was placed in the public domain
2
+ * by its author, Austin Appleby. */
3
+
4
+ #include "murmurhash3.h"
5
+
6
+ static inline uint32_t fmix(uint32_t h)
7
+ {
8
+ h ^= h >> 16;
9
+ h *= 0x85ebca6b;
10
+ h ^= h >> 13;
11
+ h *= 0xc2b2ae35;
12
+ h ^= h >> 16;
13
+
14
+ return h;
15
+ }
16
+
17
+
18
+ static inline uint32_t rotl32(uint32_t x, int8_t r)
19
+ {
20
+ return (x << r) | (x >> (32 - r));
21
+ }
22
+
23
+
24
+ uint32_t hash(const char* data, size_t len_)
25
+ {
26
+ const int len = (int) len_;
27
+ const int nblocks = len / 4;
28
+
29
+ uint32_t h1 = 0xc062fb4a;
30
+
31
+ uint32_t c1 = 0xcc9e2d51;
32
+ uint32_t c2 = 0x1b873593;
33
+
34
+ //----------
35
+ // body
36
+
37
+ const uint32_t * blocks = (const uint32_t*) (data + nblocks * 4);
38
+
39
+ int i;
40
+ for(i = -nblocks; i; i++)
41
+ {
42
+ uint32_t k1 = blocks[i];
43
+
44
+ k1 *= c1;
45
+ k1 = rotl32(k1, 15);
46
+ k1 *= c2;
47
+
48
+ h1 ^= k1;
49
+ h1 = rotl32(h1, 13);
50
+ h1 = h1*5+0xe6546b64;
51
+ }
52
+
53
+ //----------
54
+ // tail
55
+
56
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
57
+
58
+ uint32_t k1 = 0;
59
+
60
+ switch(len & 3)
61
+ {
62
+ case 3: k1 ^= tail[2] << 16;
63
+ case 2: k1 ^= tail[1] << 8;
64
+ case 1: k1 ^= tail[0];
65
+ k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
66
+ }
67
+
68
+ //----------
69
+ // finalization
70
+
71
+ h1 ^= len;
72
+
73
+ h1 = fmix(h1);
74
+
75
+ return h1;
76
+ }
77
+
@@ -0,0 +1,12 @@
1
+
2
+ #ifndef MURMURHASH3_H
3
+ #define MURMURHASH3_H
4
+
5
+ #include <stdlib.h>
6
+
7
+ #include "pstdint.h"
8
+
9
+ uint32_t hash(const char* data, size_t len);
10
+
11
+ #endif
12
+