fast_trie 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/trie/trie.h ADDED
@@ -0,0 +1,40 @@
1
+ #include "darray.h"
2
+ #include "tail.h"
3
+
4
+ typedef struct _Trie {
5
+ DArray *da;
6
+ Tail *tail;
7
+ } Trie;
8
+
9
+ typedef struct _TrieState {
10
+ const Trie *trie; /**< the corresponding trie */
11
+ TrieIndex index; /**< index in double-array/tail structures */
12
+ short suffix_idx; /**< suffix character offset, if in suffix */
13
+ short is_suffix; /**< whether it is currently in suffix part */
14
+ } TrieState;
15
+
16
+
17
+ #define trie_da_is_separate(da,s) (da_get_base ((da), (s)) < 0)
18
+ #define trie_da_get_tail_index(da,s) (-da_get_base ((da), (s)))
19
+ #define trie_da_set_tail_index(da,s,v) (da_set_base ((da), (s), -(v)))
20
+ #define trie_state_is_terminal(s) trie_state_is_walkable((s),TRIE_CHAR_TERM)
21
+
22
+
23
+ Trie* trie_new();
24
+ void trie_free(Trie *trie);
25
+ static Bool trie_branch_in_branch (Trie *trie, TrieIndex sep_node, const TrieChar *suffix, TrieData data);
26
+ static Bool trie_branch_in_tail(Trie *trie, TrieIndex sep_node, const TrieChar *suffix, TrieData data);
27
+ Bool trie_store (Trie *trie, const TrieChar *key, TrieData data);
28
+ Bool trie_retrieve (const Trie *trie, const TrieChar *key, TrieData *o_data);
29
+ Bool trie_delete (Trie *trie, const TrieChar *key);
30
+ TrieState * trie_root (const Trie *trie);
31
+ static TrieState * trie_state_new (const Trie *trie, TrieIndex index, short suffix_idx, short is_suffix);
32
+ TrieState * trie_state_clone (const TrieState *s);
33
+ void trie_state_free (TrieState *s);
34
+ void trie_state_rewind (TrieState *s);
35
+ Bool trie_state_walk (TrieState *s, TrieChar c);
36
+ Bool trie_state_is_walkable (const TrieState *s, TrieChar c);
37
+ Bool trie_state_is_leaf (const TrieState *s);
38
+ TrieData trie_state_get_data (const TrieState *s);
39
+
40
+
@@ -0,0 +1,73 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
+ /*
3
+ * triedefs.h - General typedefs for trie
4
+ * Created: 2006-08-11
5
+ * Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
6
+ */
7
+
8
+ #ifndef __TRIEDEFS_H
9
+ #define __TRIEDEFS_H
10
+
11
+ #include "typedefs.h"
12
+
13
+ /**
14
+ * @file triedefs.h
15
+ * @brief General typedefs for trie
16
+ */
17
+
18
+ /**
19
+ * @brief Trie IO modes
20
+ */
21
+ typedef enum {
22
+ TRIE_IO_READ = 0x01,
23
+ TRIE_IO_WRITE = 0x02,
24
+ TRIE_IO_CREATE = 0x04
25
+ } TrieIOMode;
26
+
27
+ /**
28
+ * @brief Trie character type for alphabet
29
+ */
30
+ typedef uint32 AlphaChar;
31
+
32
+ /**
33
+ * @brief Error value for alphabet character
34
+ */
35
+ #define ALPHA_CHAR_ERROR (~(AlphaChar)0)
36
+
37
+ /**
38
+ * @brief Trie character type for key
39
+ */
40
+ typedef unsigned char TrieChar;
41
+ /**
42
+ * @brief Trie terminator character
43
+ */
44
+ #define TRIE_CHAR_TERM '\0'
45
+ #define TRIE_CHAR_MAX 255
46
+
47
+ /**
48
+ * @brief Type of Trie index
49
+ */
50
+ typedef int32 TrieIndex;
51
+ /**
52
+ * @brief Trie error index
53
+ */
54
+ #define TRIE_INDEX_ERROR 0
55
+ /**
56
+ * @brief Maximum trie index value
57
+ */
58
+ #define TRIE_INDEX_MAX 0x7fffffff
59
+
60
+ /**
61
+ * @brief Type of value associated to trie entries
62
+ */
63
+ typedef unsigned long TrieData;
64
+ /**
65
+ * @brief Trie error data
66
+ */
67
+ #define TRIE_DATA_ERROR -1
68
+
69
+ #endif /* __TRIEDEFS_H */
70
+
71
+ /*
72
+ vi:ts=4:ai:expandtab
73
+ */
@@ -0,0 +1,113 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
+ /*
3
+ * typedefs.h - general types
4
+ * Created : 11 Aug 2006
5
+ * Author : Theppitak Karoonboonyanan <thep@linux.thai.net>
6
+ */
7
+
8
+ #ifndef __TYPEDEFS_H
9
+ #define __TYPEDEFS_H
10
+
11
+ #include <limits.h>
12
+
13
+ typedef enum { FALSE = 0, TRUE = 1 } Bool;
14
+
15
+ # if UCHAR_MAX == 0xff
16
+ # ifndef UINT8_TYPEDEF
17
+ # define UINT8_TYPEDEF
18
+ typedef unsigned char uint8;
19
+ # endif /* UINT8_TYPEDEF */
20
+ # endif /* UCHAR_MAX */
21
+
22
+ # if SCHAR_MAX == 0x7f
23
+ # ifndef INT8_TYPEDEF
24
+ # define INT8_TYPEDEF
25
+ typedef signed char int8;
26
+ # endif /* INT8_TYPEDEF */
27
+ # endif /* SCHAR_MAX */
28
+
29
+ # if UINT_MAX == 0xffff
30
+ # ifndef UINT16_TYPEDEF
31
+ # define UINT16_TYPEDEF
32
+ typedef unsigned int uint16;
33
+ # endif /* UINT16_TYPEDEF */
34
+ # endif /* UINT_MAX */
35
+
36
+ # if INT_MAX == 0x7fff
37
+ # ifndef INT16_TYPEDEF
38
+ # define INT16_TYPEDEF
39
+ typedef int int16;
40
+ # endif /* INT16_TYPEDEF */
41
+ # endif /* INT_MAX */
42
+
43
+ # if USHRT_MAX == 0xffff
44
+ # ifndef UINT16_TYPEDEF
45
+ # define UINT16_TYPEDEF
46
+ typedef unsigned short uint16;
47
+ # endif /* UINT16_TYPEDEF */
48
+ # endif /* USHRT_MAX */
49
+
50
+ # if SHRT_MAX == 0x7fff
51
+ # ifndef INT16_TYPEDEF
52
+ # define INT16_TYPEDEF
53
+ typedef short int16;
54
+ # endif /* INT16_TYPEDEF */
55
+ # endif /* SHRT_MAX */
56
+
57
+ # if UINT_MAX == 0xffffffff
58
+ # ifndef UINT32_TYPEDEF
59
+ # define UINT32_TYPEDEF
60
+ typedef unsigned int uint32;
61
+ # endif /* UINT32_TYPEDEF */
62
+ # endif /* UINT_MAX */
63
+
64
+ # if INT_MAX == 0x7fffffff
65
+ # ifndef INT32_TYPEDEF
66
+ # define INT32_TYPEDEF
67
+ typedef int int32;
68
+ # endif /* INT32_TYPEDEF */
69
+ # endif /* INT_MAX */
70
+
71
+ # if ULONG_MAX == 0xffffffff
72
+ # ifndef UINT32_TYPEDEF
73
+ # define UINT32_TYPEDEF
74
+ typedef unsigned long uint32;
75
+ # endif /* UINT32_TYPEDEF */
76
+ # endif /* ULONG_MAX */
77
+
78
+ # if LONG_MAX == 0x7fffffff
79
+ # ifndef INT32_TYPEDEF
80
+ # define INT32_TYPEDEF
81
+ typedef long int32;
82
+ # endif /* INT32_TYPEDEF */
83
+ # endif /* LONG_MAX */
84
+
85
+ # ifndef UINT8_TYPEDEF
86
+ # error "uint8 type is undefined!"
87
+ # endif
88
+ # ifndef INT8_TYPEDEF
89
+ # error "int8 type is undefined!"
90
+ # endif
91
+ # ifndef UINT16_TYPEDEF
92
+ # error "uint16 type is undefined!"
93
+ # endif
94
+ # ifndef INT16_TYPEDEF
95
+ # error "int16 type is undefined!"
96
+ # endif
97
+ # ifndef UINT32_TYPEDEF
98
+ # error "uint32 type is undefined!"
99
+ # endif
100
+ # ifndef INT32_TYPEDEF
101
+ # error "int32 type is undefined!"
102
+ # endif
103
+
104
+ typedef uint8 byte;
105
+ typedef uint16 word;
106
+ typedef uint32 dword;
107
+
108
+
109
+ #endif /* __TYPEDEFS_H */
110
+
111
+ /*
112
+ vi:ts=4:ai:expandtab
113
+ */
data/lib/trie.rb ADDED
@@ -0,0 +1 @@
1
+ require File.dirname(__FILE__) + '/../ext/trie'
data/spec/trie_spec.rb ADDED
@@ -0,0 +1,266 @@
1
+ require File.dirname(__FILE__) + '/../ext/trie/trie'
2
+
3
+ describe Trie do
4
+ before :each do
5
+ @trie = Trie.new;
6
+ @trie.add('rocket')
7
+ @trie.add('rock')
8
+ @trie.add('frederico')
9
+ end
10
+
11
+ #describe :path do
12
+ # it 'returns the correct path' do
13
+ # @trie.path.should == TRIE_PATH
14
+ # end
15
+ #end
16
+
17
+ describe :has_key? do
18
+ it 'returns true for words in the trie' do
19
+ @trie.has_key?('rocket').should be_true
20
+ end
21
+
22
+ it 'returns nil for words that are not in the trie' do
23
+ @trie.has_key?('not_in_the_trie').should be_nil
24
+ end
25
+ end
26
+
27
+ describe :get do
28
+ it 'returns -1 for words in the trie without a weight' do
29
+ @trie.get('rocket').should == -1
30
+ end
31
+
32
+ it 'returns nil if the word is not in the trie' do
33
+ @trie.get('not_in_the_trie').should be_nil
34
+ end
35
+ end
36
+
37
+ describe :add do
38
+ it 'adds a word to the trie' do
39
+ @trie.add('forsooth').should == true
40
+ @trie.get('forsooth').should == -1
41
+ end
42
+
43
+ it 'adds a word with a weight to the trie' do
44
+ @trie.add('chicka',123).should == true
45
+ @trie.get('chicka').should == 123
46
+ end
47
+
48
+ it 'adds values greater than 16-bit allows' do
49
+ @trie.add('chicka', 72_000).should == true
50
+ @trie.get('chicka').should == 72_000
51
+ end
52
+
53
+ it 'adds a word with a non-numeric value to the trie' do
54
+ @trie.add('doot', 'Heeey').should == true
55
+ @trie.get('doot').should == 'Heeey'
56
+ end
57
+ end
58
+
59
+ describe :delete do
60
+ it 'deletes a word from the trie' do
61
+ @trie.delete('rocket').should == true
62
+ @trie.has_key?('rocket').should be_nil
63
+ end
64
+ end
65
+
66
+ describe :children do
67
+ it 'returns all words beginning with a given prefix' do
68
+ children = @trie.children('roc')
69
+ children.size.should == 2
70
+ children.should include('rock')
71
+ children.should include('rocket')
72
+ end
73
+
74
+ it 'returns blank array if prefix does not exist' do
75
+ @trie.children('ajsodij').should == []
76
+ end
77
+
78
+ it 'includes the prefix if the prefix is a word' do
79
+ children = @trie.children('rock')
80
+ children.size.should == 2
81
+ children.should include('rock')
82
+ children.should include('rocket')
83
+ end
84
+
85
+ it 'returns blank array if prefix is nil' do
86
+ @trie.children(nil).should == []
87
+ end
88
+ end
89
+
90
+ describe :children_with_values do
91
+ before :each do
92
+ @trie.add('abc',2)
93
+ @trie.add('abcd',4)
94
+ end
95
+
96
+ it 'returns all words with values beginning with a given prefix' do
97
+ children = @trie.children_with_values('ab')
98
+ children.size.should == 2
99
+ children.should include(['abc',2])
100
+ children.should include(['abcd',4])
101
+ end
102
+
103
+ it 'returns nil if prefix does not exist' do
104
+ @trie.children_with_values('ajsodij').should == []
105
+ end
106
+
107
+ it 'includes the prefix if the prefix is a word' do
108
+ children = @trie.children_with_values('abc')
109
+ children.size.should == 2
110
+ children.should include(['abc',2])
111
+ children.should include(['abcd',4])
112
+ end
113
+
114
+ it 'returns blank array if prefix is nil' do
115
+ @trie.children_with_values(nil).should == []
116
+ end
117
+ end
118
+
119
+ #describe :walk_to_terminal do
120
+ # it 'returns the first word found along a path' do
121
+ # @trie.add 'anderson'
122
+ # @trie.add 'andreas'
123
+ # @trie.add 'and'
124
+
125
+ # @trie.walk_to_terminal('anderson').should == 'and'
126
+ # end
127
+
128
+ # it 'returns the first word and value along a path' do
129
+ # @trie.add 'anderson'
130
+ # @trie.add 'andreas'
131
+ # @trie.add 'and', 15
132
+
133
+ # @trie.walk_to_terminal('anderson',true).should == ['and', 15]
134
+ # end
135
+ #end
136
+
137
+ describe :root do
138
+ it 'returns a TrieNode' do
139
+ @trie.root.should be_an_instance_of(TrieNode)
140
+ end
141
+
142
+ it 'returns a different TrieNode each time' do
143
+ @trie.root.should_not == @trie.root
144
+ end
145
+ end
146
+
147
+ #describe :save do
148
+ # it 'saves the trie to disk such that another trie can be spawned which will read succesfully' do
149
+ # @trie.add('omgwtf',123)
150
+ # @trie.save
151
+ #
152
+ # trie2 = Trie.new(TRIE_PATH)
153
+ # trie2.get('omgwtf').should == 123
154
+ # end
155
+ #end
156
+ end
157
+
158
+ describe TrieNode do
159
+ before :each do
160
+ @trie = Trie.new;
161
+ @trie.add('rocket',1)
162
+ @trie.add('rock',2)
163
+ @trie.add('frederico',3)
164
+ @node = @trie.root
165
+ end
166
+
167
+ describe :state do
168
+ it 'returns the most recent state character' do
169
+ @node.walk!('r')
170
+ @node.state.should == 'r'
171
+ @node.walk!('o')
172
+ @node.state.should == 'o'
173
+ end
174
+
175
+ it 'is nil when no walk has occurred' do
176
+ @node.state.should == nil
177
+ end
178
+ end
179
+
180
+ describe :full_state do
181
+ it 'returns the current string' do
182
+ @node.walk!('r').walk!('o').walk!('c')
183
+ @node.full_state.should == 'roc'
184
+ end
185
+
186
+ it 'is a blank string when no walk has occurred' do
187
+ @node.full_state.should == ''
188
+ end
189
+ end
190
+
191
+ describe :walk! do
192
+ it 'returns the updated object when the walk succeeds' do
193
+ other = @node.walk!('r')
194
+ other.should == @node
195
+ end
196
+
197
+ it 'returns nil when the walk fails' do
198
+ @node.walk!('q').should be_nil
199
+ end
200
+ end
201
+
202
+ describe :walk do
203
+ it 'returns a new node object when the walk succeeds' do
204
+ other = @node.walk('r')
205
+ other.should != @node
206
+ end
207
+
208
+ it 'returns nil when the walk fails' do
209
+ @node.walk('q').should be_nil
210
+ end
211
+ end
212
+
213
+
214
+ describe :value do
215
+ it 'returns nil when the node is not terminal' do
216
+ @node.walk!('r')
217
+ @node.value.should be_nil
218
+ end
219
+
220
+ it 'returns a value when the node is terminal' do
221
+ @node.walk!('r').walk!('o').walk!('c').walk!('k')
222
+ @node.value.should == 2
223
+ end
224
+ end
225
+
226
+ describe :terminal? do
227
+ it 'returns true when the node is a word end' do
228
+ @node.walk!('r').walk!('o').walk!('c').walk!('k')
229
+ @node.should be_terminal
230
+ end
231
+
232
+ it 'returns nil when the node is not a word end' do
233
+ @node.walk!('r').walk!('o').walk!('c')
234
+ @node.should_not be_terminal
235
+ end
236
+ end
237
+
238
+ describe :leaf? do
239
+ it 'returns true when this is the end of a branch of the trie' do
240
+ @node.walk!('r').walk!('o').walk!('c').walk!('k').walk!('e').walk!('t')
241
+ @node.should be_leaf
242
+ end
243
+
244
+ it 'returns nil when there are more splits on this branch' do
245
+ @node.walk!('r').walk!('o').walk!('c').walk!('k')
246
+ @node.should_not be_leaf
247
+ end
248
+ end
249
+
250
+ describe :clone do
251
+ it 'creates a new instance of this node which is not this node' do
252
+ new_node = @node.clone
253
+ new_node.should_not == @node
254
+ end
255
+
256
+ it 'matches the state of the current node' do
257
+ new_node = @node.clone
258
+ new_node.state.should == @node.state
259
+ end
260
+
261
+ it 'matches the full_state of the current node' do
262
+ new_node = @node.clone
263
+ new_node.full_state.should == @node.full_state
264
+ end
265
+ end
266
+ end