fast_trie 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
data/ext/trie/trie.h ADDED
@@ -0,0 +1,40 @@
1
+ #include "darray.h"
2
+ #include "tail.h"
3
+
4
+ typedef struct _Trie {
5
+ DArray *da;
6
+ Tail *tail;
7
+ } Trie;
8
+
9
+ typedef struct _TrieState {
10
+ const Trie *trie; /**< the corresponding trie */
11
+ TrieIndex index; /**< index in double-array/tail structures */
12
+ short suffix_idx; /**< suffix character offset, if in suffix */
13
+ short is_suffix; /**< whether it is currently in suffix part */
14
+ } TrieState;
15
+
16
+
17
+ #define trie_da_is_separate(da,s) (da_get_base ((da), (s)) < 0)
18
+ #define trie_da_get_tail_index(da,s) (-da_get_base ((da), (s)))
19
+ #define trie_da_set_tail_index(da,s,v) (da_set_base ((da), (s), -(v)))
20
+ #define trie_state_is_terminal(s) trie_state_is_walkable((s),TRIE_CHAR_TERM)
21
+
22
+
23
+ Trie* trie_new();
24
+ void trie_free(Trie *trie);
25
+ static Bool trie_branch_in_branch (Trie *trie, TrieIndex sep_node, const TrieChar *suffix, TrieData data);
26
+ static Bool trie_branch_in_tail(Trie *trie, TrieIndex sep_node, const TrieChar *suffix, TrieData data);
27
+ Bool trie_store (Trie *trie, const TrieChar *key, TrieData data);
28
+ Bool trie_retrieve (const Trie *trie, const TrieChar *key, TrieData *o_data);
29
+ Bool trie_delete (Trie *trie, const TrieChar *key);
30
+ TrieState * trie_root (const Trie *trie);
31
+ static TrieState * trie_state_new (const Trie *trie, TrieIndex index, short suffix_idx, short is_suffix);
32
+ TrieState * trie_state_clone (const TrieState *s);
33
+ void trie_state_free (TrieState *s);
34
+ void trie_state_rewind (TrieState *s);
35
+ Bool trie_state_walk (TrieState *s, TrieChar c);
36
+ Bool trie_state_is_walkable (const TrieState *s, TrieChar c);
37
+ Bool trie_state_is_leaf (const TrieState *s);
38
+ TrieData trie_state_get_data (const TrieState *s);
39
+
40
+
@@ -0,0 +1,73 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
+ /*
3
+ * triedefs.h - General typedefs for trie
4
+ * Created: 2006-08-11
5
+ * Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
6
+ */
7
+
8
+ #ifndef __TRIEDEFS_H
9
+ #define __TRIEDEFS_H
10
+
11
+ #include "typedefs.h"
12
+
13
+ /**
14
+ * @file triedefs.h
15
+ * @brief General typedefs for trie
16
+ */
17
+
18
+ /**
19
+ * @brief Trie IO modes
20
+ */
21
+ typedef enum {
22
+ TRIE_IO_READ = 0x01,
23
+ TRIE_IO_WRITE = 0x02,
24
+ TRIE_IO_CREATE = 0x04
25
+ } TrieIOMode;
26
+
27
+ /**
28
+ * @brief Trie character type for alphabet
29
+ */
30
+ typedef uint32 AlphaChar;
31
+
32
+ /**
33
+ * @brief Error value for alphabet character
34
+ */
35
+ #define ALPHA_CHAR_ERROR (~(AlphaChar)0)
36
+
37
+ /**
38
+ * @brief Trie character type for key
39
+ */
40
+ typedef unsigned char TrieChar;
41
+ /**
42
+ * @brief Trie terminator character
43
+ */
44
+ #define TRIE_CHAR_TERM '\0'
45
+ #define TRIE_CHAR_MAX 255
46
+
47
+ /**
48
+ * @brief Type of Trie index
49
+ */
50
+ typedef int32 TrieIndex;
51
+ /**
52
+ * @brief Trie error index
53
+ */
54
+ #define TRIE_INDEX_ERROR 0
55
+ /**
56
+ * @brief Maximum trie index value
57
+ */
58
+ #define TRIE_INDEX_MAX 0x7fffffff
59
+
60
+ /**
61
+ * @brief Type of value associated to trie entries
62
+ */
63
+ typedef unsigned long TrieData;
64
+ /**
65
+ * @brief Trie error data
66
+ */
67
+ #define TRIE_DATA_ERROR -1
68
+
69
+ #endif /* __TRIEDEFS_H */
70
+
71
+ /*
72
+ vi:ts=4:ai:expandtab
73
+ */
@@ -0,0 +1,113 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
+ /*
3
+ * typedefs.h - general types
4
+ * Created : 11 Aug 2006
5
+ * Author : Theppitak Karoonboonyanan <thep@linux.thai.net>
6
+ */
7
+
8
+ #ifndef __TYPEDEFS_H
9
+ #define __TYPEDEFS_H
10
+
11
+ #include <limits.h>
12
+
13
+ typedef enum { FALSE = 0, TRUE = 1 } Bool;
14
+
15
+ # if UCHAR_MAX == 0xff
16
+ # ifndef UINT8_TYPEDEF
17
+ # define UINT8_TYPEDEF
18
+ typedef unsigned char uint8;
19
+ # endif /* UINT8_TYPEDEF */
20
+ # endif /* UCHAR_MAX */
21
+
22
+ # if SCHAR_MAX == 0x7f
23
+ # ifndef INT8_TYPEDEF
24
+ # define INT8_TYPEDEF
25
+ typedef signed char int8;
26
+ # endif /* INT8_TYPEDEF */
27
+ # endif /* SCHAR_MAX */
28
+
29
+ # if UINT_MAX == 0xffff
30
+ # ifndef UINT16_TYPEDEF
31
+ # define UINT16_TYPEDEF
32
+ typedef unsigned int uint16;
33
+ # endif /* UINT16_TYPEDEF */
34
+ # endif /* UINT_MAX */
35
+
36
+ # if INT_MAX == 0x7fff
37
+ # ifndef INT16_TYPEDEF
38
+ # define INT16_TYPEDEF
39
+ typedef int int16;
40
+ # endif /* INT16_TYPEDEF */
41
+ # endif /* INT_MAX */
42
+
43
+ # if USHRT_MAX == 0xffff
44
+ # ifndef UINT16_TYPEDEF
45
+ # define UINT16_TYPEDEF
46
+ typedef unsigned short uint16;
47
+ # endif /* UINT16_TYPEDEF */
48
+ # endif /* USHRT_MAX */
49
+
50
+ # if SHRT_MAX == 0x7fff
51
+ # ifndef INT16_TYPEDEF
52
+ # define INT16_TYPEDEF
53
+ typedef short int16;
54
+ # endif /* INT16_TYPEDEF */
55
+ # endif /* SHRT_MAX */
56
+
57
+ # if UINT_MAX == 0xffffffff
58
+ # ifndef UINT32_TYPEDEF
59
+ # define UINT32_TYPEDEF
60
+ typedef unsigned int uint32;
61
+ # endif /* UINT32_TYPEDEF */
62
+ # endif /* UINT_MAX */
63
+
64
+ # if INT_MAX == 0x7fffffff
65
+ # ifndef INT32_TYPEDEF
66
+ # define INT32_TYPEDEF
67
+ typedef int int32;
68
+ # endif /* INT32_TYPEDEF */
69
+ # endif /* INT_MAX */
70
+
71
+ # if ULONG_MAX == 0xffffffff
72
+ # ifndef UINT32_TYPEDEF
73
+ # define UINT32_TYPEDEF
74
+ typedef unsigned long uint32;
75
+ # endif /* UINT32_TYPEDEF */
76
+ # endif /* ULONG_MAX */
77
+
78
+ # if LONG_MAX == 0x7fffffff
79
+ # ifndef INT32_TYPEDEF
80
+ # define INT32_TYPEDEF
81
+ typedef long int32;
82
+ # endif /* INT32_TYPEDEF */
83
+ # endif /* LONG_MAX */
84
+
85
+ # ifndef UINT8_TYPEDEF
86
+ # error "uint8 type is undefined!"
87
+ # endif
88
+ # ifndef INT8_TYPEDEF
89
+ # error "int8 type is undefined!"
90
+ # endif
91
+ # ifndef UINT16_TYPEDEF
92
+ # error "uint16 type is undefined!"
93
+ # endif
94
+ # ifndef INT16_TYPEDEF
95
+ # error "int16 type is undefined!"
96
+ # endif
97
+ # ifndef UINT32_TYPEDEF
98
+ # error "uint32 type is undefined!"
99
+ # endif
100
+ # ifndef INT32_TYPEDEF
101
+ # error "int32 type is undefined!"
102
+ # endif
103
+
104
+ typedef uint8 byte;
105
+ typedef uint16 word;
106
+ typedef uint32 dword;
107
+
108
+
109
+ #endif /* __TYPEDEFS_H */
110
+
111
+ /*
112
+ vi:ts=4:ai:expandtab
113
+ */
data/lib/trie.rb ADDED
@@ -0,0 +1 @@
1
+ require File.dirname(__FILE__) + '/../ext/trie'
data/spec/trie_spec.rb ADDED
@@ -0,0 +1,266 @@
1
+ require File.dirname(__FILE__) + '/../ext/trie/trie'
2
+
3
+ describe Trie do
4
+ before :each do
5
+ @trie = Trie.new;
6
+ @trie.add('rocket')
7
+ @trie.add('rock')
8
+ @trie.add('frederico')
9
+ end
10
+
11
+ #describe :path do
12
+ # it 'returns the correct path' do
13
+ # @trie.path.should == TRIE_PATH
14
+ # end
15
+ #end
16
+
17
+ describe :has_key? do
18
+ it 'returns true for words in the trie' do
19
+ @trie.has_key?('rocket').should be_true
20
+ end
21
+
22
+ it 'returns nil for words that are not in the trie' do
23
+ @trie.has_key?('not_in_the_trie').should be_nil
24
+ end
25
+ end
26
+
27
+ describe :get do
28
+ it 'returns -1 for words in the trie without a weight' do
29
+ @trie.get('rocket').should == -1
30
+ end
31
+
32
+ it 'returns nil if the word is not in the trie' do
33
+ @trie.get('not_in_the_trie').should be_nil
34
+ end
35
+ end
36
+
37
+ describe :add do
38
+ it 'adds a word to the trie' do
39
+ @trie.add('forsooth').should == true
40
+ @trie.get('forsooth').should == -1
41
+ end
42
+
43
+ it 'adds a word with a weight to the trie' do
44
+ @trie.add('chicka',123).should == true
45
+ @trie.get('chicka').should == 123
46
+ end
47
+
48
+ it 'adds values greater than 16-bit allows' do
49
+ @trie.add('chicka', 72_000).should == true
50
+ @trie.get('chicka').should == 72_000
51
+ end
52
+
53
+ it 'adds a word with a non-numeric value to the trie' do
54
+ @trie.add('doot', 'Heeey').should == true
55
+ @trie.get('doot').should == 'Heeey'
56
+ end
57
+ end
58
+
59
+ describe :delete do
60
+ it 'deletes a word from the trie' do
61
+ @trie.delete('rocket').should == true
62
+ @trie.has_key?('rocket').should be_nil
63
+ end
64
+ end
65
+
66
+ describe :children do
67
+ it 'returns all words beginning with a given prefix' do
68
+ children = @trie.children('roc')
69
+ children.size.should == 2
70
+ children.should include('rock')
71
+ children.should include('rocket')
72
+ end
73
+
74
+ it 'returns blank array if prefix does not exist' do
75
+ @trie.children('ajsodij').should == []
76
+ end
77
+
78
+ it 'includes the prefix if the prefix is a word' do
79
+ children = @trie.children('rock')
80
+ children.size.should == 2
81
+ children.should include('rock')
82
+ children.should include('rocket')
83
+ end
84
+
85
+ it 'returns blank array if prefix is nil' do
86
+ @trie.children(nil).should == []
87
+ end
88
+ end
89
+
90
+ describe :children_with_values do
91
+ before :each do
92
+ @trie.add('abc',2)
93
+ @trie.add('abcd',4)
94
+ end
95
+
96
+ it 'returns all words with values beginning with a given prefix' do
97
+ children = @trie.children_with_values('ab')
98
+ children.size.should == 2
99
+ children.should include(['abc',2])
100
+ children.should include(['abcd',4])
101
+ end
102
+
103
+ it 'returns nil if prefix does not exist' do
104
+ @trie.children_with_values('ajsodij').should == []
105
+ end
106
+
107
+ it 'includes the prefix if the prefix is a word' do
108
+ children = @trie.children_with_values('abc')
109
+ children.size.should == 2
110
+ children.should include(['abc',2])
111
+ children.should include(['abcd',4])
112
+ end
113
+
114
+ it 'returns blank array if prefix is nil' do
115
+ @trie.children_with_values(nil).should == []
116
+ end
117
+ end
118
+
119
+ #describe :walk_to_terminal do
120
+ # it 'returns the first word found along a path' do
121
+ # @trie.add 'anderson'
122
+ # @trie.add 'andreas'
123
+ # @trie.add 'and'
124
+
125
+ # @trie.walk_to_terminal('anderson').should == 'and'
126
+ # end
127
+
128
+ # it 'returns the first word and value along a path' do
129
+ # @trie.add 'anderson'
130
+ # @trie.add 'andreas'
131
+ # @trie.add 'and', 15
132
+
133
+ # @trie.walk_to_terminal('anderson',true).should == ['and', 15]
134
+ # end
135
+ #end
136
+
137
+ describe :root do
138
+ it 'returns a TrieNode' do
139
+ @trie.root.should be_an_instance_of(TrieNode)
140
+ end
141
+
142
+ it 'returns a different TrieNode each time' do
143
+ @trie.root.should_not == @trie.root
144
+ end
145
+ end
146
+
147
+ #describe :save do
148
+ # it 'saves the trie to disk such that another trie can be spawned which will read succesfully' do
149
+ # @trie.add('omgwtf',123)
150
+ # @trie.save
151
+ #
152
+ # trie2 = Trie.new(TRIE_PATH)
153
+ # trie2.get('omgwtf').should == 123
154
+ # end
155
+ #end
156
+ end
157
+
158
+ describe TrieNode do
159
+ before :each do
160
+ @trie = Trie.new;
161
+ @trie.add('rocket',1)
162
+ @trie.add('rock',2)
163
+ @trie.add('frederico',3)
164
+ @node = @trie.root
165
+ end
166
+
167
+ describe :state do
168
+ it 'returns the most recent state character' do
169
+ @node.walk!('r')
170
+ @node.state.should == 'r'
171
+ @node.walk!('o')
172
+ @node.state.should == 'o'
173
+ end
174
+
175
+ it 'is nil when no walk has occurred' do
176
+ @node.state.should == nil
177
+ end
178
+ end
179
+
180
+ describe :full_state do
181
+ it 'returns the current string' do
182
+ @node.walk!('r').walk!('o').walk!('c')
183
+ @node.full_state.should == 'roc'
184
+ end
185
+
186
+ it 'is a blank string when no walk has occurred' do
187
+ @node.full_state.should == ''
188
+ end
189
+ end
190
+
191
+ describe :walk! do
192
+ it 'returns the updated object when the walk succeeds' do
193
+ other = @node.walk!('r')
194
+ other.should == @node
195
+ end
196
+
197
+ it 'returns nil when the walk fails' do
198
+ @node.walk!('q').should be_nil
199
+ end
200
+ end
201
+
202
+ describe :walk do
203
+ it 'returns a new node object when the walk succeeds' do
204
+ other = @node.walk('r')
205
+ other.should != @node
206
+ end
207
+
208
+ it 'returns nil when the walk fails' do
209
+ @node.walk('q').should be_nil
210
+ end
211
+ end
212
+
213
+
214
+ describe :value do
215
+ it 'returns nil when the node is not terminal' do
216
+ @node.walk!('r')
217
+ @node.value.should be_nil
218
+ end
219
+
220
+ it 'returns a value when the node is terminal' do
221
+ @node.walk!('r').walk!('o').walk!('c').walk!('k')
222
+ @node.value.should == 2
223
+ end
224
+ end
225
+
226
+ describe :terminal? do
227
+ it 'returns true when the node is a word end' do
228
+ @node.walk!('r').walk!('o').walk!('c').walk!('k')
229
+ @node.should be_terminal
230
+ end
231
+
232
+ it 'returns nil when the node is not a word end' do
233
+ @node.walk!('r').walk!('o').walk!('c')
234
+ @node.should_not be_terminal
235
+ end
236
+ end
237
+
238
+ describe :leaf? do
239
+ it 'returns true when this is the end of a branch of the trie' do
240
+ @node.walk!('r').walk!('o').walk!('c').walk!('k').walk!('e').walk!('t')
241
+ @node.should be_leaf
242
+ end
243
+
244
+ it 'returns nil when there are more splits on this branch' do
245
+ @node.walk!('r').walk!('o').walk!('c').walk!('k')
246
+ @node.should_not be_leaf
247
+ end
248
+ end
249
+
250
+ describe :clone do
251
+ it 'creates a new instance of this node which is not this node' do
252
+ new_node = @node.clone
253
+ new_node.should_not == @node
254
+ end
255
+
256
+ it 'matches the state of the current node' do
257
+ new_node = @node.clone
258
+ new_node.state.should == @node.state
259
+ end
260
+
261
+ it 'matches the full_state of the current node' do
262
+ new_node = @node.clone
263
+ new_node.full_state.should == @node.full_state
264
+ end
265
+ end
266
+ end