wordtriez 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/changes +21 -0
- data/copying +18 -0
- data/ext/common.h +8 -0
- data/ext/extconf.rb +32 -0
- data/ext/hat-trie/ahtable.c +550 -0
- data/ext/hat-trie/ahtable.h +93 -0
- data/ext/hat-trie/common.h +19 -0
- data/ext/hat-trie/hat-trie.c +771 -0
- data/ext/hat-trie/hat-trie.h +86 -0
- data/ext/hat-trie/misc.c +46 -0
- data/ext/hat-trie/misc.h +22 -0
- data/ext/hat-trie/murmurhash3.c +77 -0
- data/ext/hat-trie/murmurhash3.h +12 -0
- data/ext/hat-trie/pstdint.h +800 -0
- data/ext/hat-trie/text.c +174 -0
- data/ext/hat-trie/text.h +22 -0
- data/ext/triez.cc +313 -0
- data/lib/wordtriez.rb +65 -0
- data/readme.md +223 -0
- data/test/triez_test.rb +225 -0
- metadata +67 -0
@@ -0,0 +1,93 @@
|
|
1
|
+
/*
|
2
|
+
* This file is part of hat-trie.
|
3
|
+
*
|
4
|
+
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
|
5
|
+
*
|
6
|
+
*
|
7
|
+
* This is an implementation of the 'cache-conscious' hash tables described in,
|
8
|
+
*
|
9
|
+
* Askitis, N., & Zobel, J. (2005). Cache-conscious collision resolution in
|
10
|
+
* string hash tables. String Processing and Information Retrieval (pp.
|
11
|
+
* 91–102). Springer.
|
12
|
+
*
|
13
|
+
* Briefly, the idea is, as opposed to separate chaining with linked lists, to
|
14
|
+
* store keys contiguously in one big array, thereby improving the caching
|
15
|
+
* behavior, and reducing space requirments.
|
16
|
+
*
|
17
|
+
*/
|
18
|
+
|
19
|
+
#ifndef HATTRIE_AHTABLE_H
|
20
|
+
#define HATTRIE_AHTABLE_H
|
21
|
+
|
22
|
+
#ifdef __cplusplus
|
23
|
+
extern "C" {
|
24
|
+
#endif
|
25
|
+
|
26
|
+
#include <stdlib.h>
|
27
|
+
#include <stdbool.h>
|
28
|
+
#include "pstdint.h"
|
29
|
+
#include "common.h"
|
30
|
+
|
31
|
+
typedef unsigned char* slot_t;
|
32
|
+
|
33
|
+
typedef struct ahtable_t_
|
34
|
+
{
|
35
|
+
/* these fields are reserved for hattrie to fiddle with */
|
36
|
+
uint8_t flag;
|
37
|
+
unsigned char c0;
|
38
|
+
unsigned char c1;
|
39
|
+
|
40
|
+
size_t n; // number of slots
|
41
|
+
size_t m; // numbur of key/value pairs stored
|
42
|
+
size_t max_m; // number of stored keys before we resize
|
43
|
+
|
44
|
+
size_t* slot_sizes;
|
45
|
+
slot_t* slots;
|
46
|
+
} ahtable_t;
|
47
|
+
|
48
|
+
extern const double ahtable_max_load_factor;
|
49
|
+
extern const size_t ahtable_initial_size;
|
50
|
+
|
51
|
+
ahtable_t* ahtable_create (void); // Create an empty hash table.
|
52
|
+
ahtable_t* ahtable_create_n (size_t n); // Create an empty hash table, with
|
53
|
+
// n slots reserved.
|
54
|
+
|
55
|
+
void ahtable_free (ahtable_t*); // Free all memory used by a table.
|
56
|
+
void ahtable_clear (ahtable_t*); // Remove all entries.
|
57
|
+
size_t ahtable_size (const ahtable_t*); // Number of stored keys.
|
58
|
+
|
59
|
+
|
60
|
+
/** Find the given key in the table, inserting it if it does not exist, and
|
61
|
+
* returning a pointer to it's key.
|
62
|
+
*
|
63
|
+
* This pointer is not guaranteed to be valid after additional calls to
|
64
|
+
* ahtable_get, ahtable_del, ahtable_clear, or other functions that modifies the
|
65
|
+
* table.
|
66
|
+
*/
|
67
|
+
value_t* ahtable_get (ahtable_t*, const char* key, size_t len);
|
68
|
+
|
69
|
+
|
70
|
+
/** Find a given key in the table, returning a NULL pointer if it does not
|
71
|
+
* exist. */
|
72
|
+
value_t* ahtable_tryget (ahtable_t*, const char* key, size_t len);
|
73
|
+
|
74
|
+
|
75
|
+
int ahtable_del(ahtable_t*, const char* key, size_t len);
|
76
|
+
|
77
|
+
|
78
|
+
typedef struct ahtable_iter_t_ ahtable_iter_t;
|
79
|
+
|
80
|
+
ahtable_iter_t* ahtable_iter_begin (const ahtable_t*, bool sorted);
|
81
|
+
void ahtable_iter_next (ahtable_iter_t*);
|
82
|
+
bool ahtable_iter_finished (ahtable_iter_t*);
|
83
|
+
void ahtable_iter_free (ahtable_iter_t*);
|
84
|
+
const char* ahtable_iter_key (ahtable_iter_t*, size_t* len);
|
85
|
+
value_t* ahtable_iter_val (ahtable_iter_t*);
|
86
|
+
|
87
|
+
|
88
|
+
#ifdef __cplusplus
|
89
|
+
}
|
90
|
+
#endif
|
91
|
+
|
92
|
+
#endif
|
93
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
/*
|
2
|
+
* This file is part of hat-trie.
|
3
|
+
*
|
4
|
+
* Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
|
5
|
+
*
|
6
|
+
*
|
7
|
+
* Common typedefs, etc.
|
8
|
+
*
|
9
|
+
*/
|
10
|
+
|
11
|
+
|
12
|
+
#ifndef HATTRIE_COMMON_H
|
13
|
+
#define HATTRIE_COMMON_H
|
14
|
+
|
15
|
+
typedef unsigned long value_t;
|
16
|
+
|
17
|
+
#endif
|
18
|
+
|
19
|
+
|