fast_trie 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +132 -0
- data/VERSION.yml +4 -0
- data/ext/trie/Makefile +149 -0
- data/ext/trie/darray.c +673 -0
- data/ext/trie/darray.h +233 -0
- data/ext/trie/extconf.rb +3 -0
- data/ext/trie/fileutils.c +151 -0
- data/ext/trie/fileutils.h +36 -0
- data/ext/trie/tail.c +340 -0
- data/ext/trie/tail.h +207 -0
- data/ext/trie/trie-private.c +299 -0
- data/ext/trie/trie-private.h +31 -0
- data/ext/trie/trie.c +452 -0
- data/ext/trie/trie.h +40 -0
- data/ext/trie/triedefs.h +73 -0
- data/ext/trie/typedefs.h +113 -0
- data/lib/trie.rb +1 -0
- data/spec/trie_spec.rb +266 -0
- metadata +80 -0
data/ext/trie/trie.h
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#include "darray.h"
|
2
|
+
#include "tail.h"
|
3
|
+
|
4
|
+
typedef struct _Trie {
|
5
|
+
DArray *da;
|
6
|
+
Tail *tail;
|
7
|
+
} Trie;
|
8
|
+
|
9
|
+
typedef struct _TrieState {
|
10
|
+
const Trie *trie; /**< the corresponding trie */
|
11
|
+
TrieIndex index; /**< index in double-array/tail structures */
|
12
|
+
short suffix_idx; /**< suffix character offset, if in suffix */
|
13
|
+
short is_suffix; /**< whether it is currently in suffix part */
|
14
|
+
} TrieState;
|
15
|
+
|
16
|
+
|
17
|
+
#define trie_da_is_separate(da,s) (da_get_base ((da), (s)) < 0)
|
18
|
+
#define trie_da_get_tail_index(da,s) (-da_get_base ((da), (s)))
|
19
|
+
#define trie_da_set_tail_index(da,s,v) (da_set_base ((da), (s), -(v)))
|
20
|
+
#define trie_state_is_terminal(s) trie_state_is_walkable((s),TRIE_CHAR_TERM)
|
21
|
+
|
22
|
+
|
23
|
+
Trie* trie_new();
|
24
|
+
void trie_free(Trie *trie);
|
25
|
+
static Bool trie_branch_in_branch (Trie *trie, TrieIndex sep_node, const TrieChar *suffix, TrieData data);
|
26
|
+
static Bool trie_branch_in_tail(Trie *trie, TrieIndex sep_node, const TrieChar *suffix, TrieData data);
|
27
|
+
Bool trie_store (Trie *trie, const TrieChar *key, TrieData data);
|
28
|
+
Bool trie_retrieve (const Trie *trie, const TrieChar *key, TrieData *o_data);
|
29
|
+
Bool trie_delete (Trie *trie, const TrieChar *key);
|
30
|
+
TrieState * trie_root (const Trie *trie);
|
31
|
+
static TrieState * trie_state_new (const Trie *trie, TrieIndex index, short suffix_idx, short is_suffix);
|
32
|
+
TrieState * trie_state_clone (const TrieState *s);
|
33
|
+
void trie_state_free (TrieState *s);
|
34
|
+
void trie_state_rewind (TrieState *s);
|
35
|
+
Bool trie_state_walk (TrieState *s, TrieChar c);
|
36
|
+
Bool trie_state_is_walkable (const TrieState *s, TrieChar c);
|
37
|
+
Bool trie_state_is_leaf (const TrieState *s);
|
38
|
+
TrieData trie_state_get_data (const TrieState *s);
|
39
|
+
|
40
|
+
|
data/ext/trie/triedefs.h
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* triedefs.h - General typedefs for trie
|
4
|
+
* Created: 2006-08-11
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef __TRIEDEFS_H
|
9
|
+
#define __TRIEDEFS_H
|
10
|
+
|
11
|
+
#include "typedefs.h"
|
12
|
+
|
13
|
+
/**
|
14
|
+
* @file triedefs.h
|
15
|
+
* @brief General typedefs for trie
|
16
|
+
*/
|
17
|
+
|
18
|
+
/**
|
19
|
+
* @brief Trie IO modes
|
20
|
+
*/
|
21
|
+
typedef enum {
|
22
|
+
TRIE_IO_READ = 0x01,
|
23
|
+
TRIE_IO_WRITE = 0x02,
|
24
|
+
TRIE_IO_CREATE = 0x04
|
25
|
+
} TrieIOMode;
|
26
|
+
|
27
|
+
/**
|
28
|
+
* @brief Trie character type for alphabet
|
29
|
+
*/
|
30
|
+
typedef uint32 AlphaChar;
|
31
|
+
|
32
|
+
/**
|
33
|
+
* @brief Error value for alphabet character
|
34
|
+
*/
|
35
|
+
#define ALPHA_CHAR_ERROR (~(AlphaChar)0)
|
36
|
+
|
37
|
+
/**
|
38
|
+
* @brief Trie character type for key
|
39
|
+
*/
|
40
|
+
typedef unsigned char TrieChar;
|
41
|
+
/**
|
42
|
+
* @brief Trie terminator character
|
43
|
+
*/
|
44
|
+
#define TRIE_CHAR_TERM '\0'
|
45
|
+
#define TRIE_CHAR_MAX 255
|
46
|
+
|
47
|
+
/**
|
48
|
+
* @brief Type of Trie index
|
49
|
+
*/
|
50
|
+
typedef int32 TrieIndex;
|
51
|
+
/**
|
52
|
+
* @brief Trie error index
|
53
|
+
*/
|
54
|
+
#define TRIE_INDEX_ERROR 0
|
55
|
+
/**
|
56
|
+
* @brief Maximum trie index value
|
57
|
+
*/
|
58
|
+
#define TRIE_INDEX_MAX 0x7fffffff
|
59
|
+
|
60
|
+
/**
|
61
|
+
* @brief Type of value associated to trie entries
|
62
|
+
*/
|
63
|
+
typedef unsigned long TrieData;
|
64
|
+
/**
|
65
|
+
* @brief Trie error data
|
66
|
+
*/
|
67
|
+
#define TRIE_DATA_ERROR -1
|
68
|
+
|
69
|
+
#endif /* __TRIEDEFS_H */
|
70
|
+
|
71
|
+
/*
|
72
|
+
vi:ts=4:ai:expandtab
|
73
|
+
*/
|
data/ext/trie/typedefs.h
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* typedefs.h - general types
|
4
|
+
* Created : 11 Aug 2006
|
5
|
+
* Author : Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef __TYPEDEFS_H
|
9
|
+
#define __TYPEDEFS_H
|
10
|
+
|
11
|
+
#include <limits.h>
|
12
|
+
|
13
|
+
typedef enum { FALSE = 0, TRUE = 1 } Bool;
|
14
|
+
|
15
|
+
# if UCHAR_MAX == 0xff
|
16
|
+
# ifndef UINT8_TYPEDEF
|
17
|
+
# define UINT8_TYPEDEF
|
18
|
+
typedef unsigned char uint8;
|
19
|
+
# endif /* UINT8_TYPEDEF */
|
20
|
+
# endif /* UCHAR_MAX */
|
21
|
+
|
22
|
+
# if SCHAR_MAX == 0x7f
|
23
|
+
# ifndef INT8_TYPEDEF
|
24
|
+
# define INT8_TYPEDEF
|
25
|
+
typedef signed char int8;
|
26
|
+
# endif /* INT8_TYPEDEF */
|
27
|
+
# endif /* SCHAR_MAX */
|
28
|
+
|
29
|
+
# if UINT_MAX == 0xffff
|
30
|
+
# ifndef UINT16_TYPEDEF
|
31
|
+
# define UINT16_TYPEDEF
|
32
|
+
typedef unsigned int uint16;
|
33
|
+
# endif /* UINT16_TYPEDEF */
|
34
|
+
# endif /* UINT_MAX */
|
35
|
+
|
36
|
+
# if INT_MAX == 0x7fff
|
37
|
+
# ifndef INT16_TYPEDEF
|
38
|
+
# define INT16_TYPEDEF
|
39
|
+
typedef int int16;
|
40
|
+
# endif /* INT16_TYPEDEF */
|
41
|
+
# endif /* INT_MAX */
|
42
|
+
|
43
|
+
# if USHRT_MAX == 0xffff
|
44
|
+
# ifndef UINT16_TYPEDEF
|
45
|
+
# define UINT16_TYPEDEF
|
46
|
+
typedef unsigned short uint16;
|
47
|
+
# endif /* UINT16_TYPEDEF */
|
48
|
+
# endif /* USHRT_MAX */
|
49
|
+
|
50
|
+
# if SHRT_MAX == 0x7fff
|
51
|
+
# ifndef INT16_TYPEDEF
|
52
|
+
# define INT16_TYPEDEF
|
53
|
+
typedef short int16;
|
54
|
+
# endif /* INT16_TYPEDEF */
|
55
|
+
# endif /* SHRT_MAX */
|
56
|
+
|
57
|
+
# if UINT_MAX == 0xffffffff
|
58
|
+
# ifndef UINT32_TYPEDEF
|
59
|
+
# define UINT32_TYPEDEF
|
60
|
+
typedef unsigned int uint32;
|
61
|
+
# endif /* UINT32_TYPEDEF */
|
62
|
+
# endif /* UINT_MAX */
|
63
|
+
|
64
|
+
# if INT_MAX == 0x7fffffff
|
65
|
+
# ifndef INT32_TYPEDEF
|
66
|
+
# define INT32_TYPEDEF
|
67
|
+
typedef int int32;
|
68
|
+
# endif /* INT32_TYPEDEF */
|
69
|
+
# endif /* INT_MAX */
|
70
|
+
|
71
|
+
# if ULONG_MAX == 0xffffffff
|
72
|
+
# ifndef UINT32_TYPEDEF
|
73
|
+
# define UINT32_TYPEDEF
|
74
|
+
typedef unsigned long uint32;
|
75
|
+
# endif /* UINT32_TYPEDEF */
|
76
|
+
# endif /* ULONG_MAX */
|
77
|
+
|
78
|
+
# if LONG_MAX == 0x7fffffff
|
79
|
+
# ifndef INT32_TYPEDEF
|
80
|
+
# define INT32_TYPEDEF
|
81
|
+
typedef long int32;
|
82
|
+
# endif /* INT32_TYPEDEF */
|
83
|
+
# endif /* LONG_MAX */
|
84
|
+
|
85
|
+
# ifndef UINT8_TYPEDEF
|
86
|
+
# error "uint8 type is undefined!"
|
87
|
+
# endif
|
88
|
+
# ifndef INT8_TYPEDEF
|
89
|
+
# error "int8 type is undefined!"
|
90
|
+
# endif
|
91
|
+
# ifndef UINT16_TYPEDEF
|
92
|
+
# error "uint16 type is undefined!"
|
93
|
+
# endif
|
94
|
+
# ifndef INT16_TYPEDEF
|
95
|
+
# error "int16 type is undefined!"
|
96
|
+
# endif
|
97
|
+
# ifndef UINT32_TYPEDEF
|
98
|
+
# error "uint32 type is undefined!"
|
99
|
+
# endif
|
100
|
+
# ifndef INT32_TYPEDEF
|
101
|
+
# error "int32 type is undefined!"
|
102
|
+
# endif
|
103
|
+
|
104
|
+
typedef uint8 byte;
|
105
|
+
typedef uint16 word;
|
106
|
+
typedef uint32 dword;
|
107
|
+
|
108
|
+
|
109
|
+
#endif /* __TYPEDEFS_H */
|
110
|
+
|
111
|
+
/*
|
112
|
+
vi:ts=4:ai:expandtab
|
113
|
+
*/
|
data/lib/trie.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../ext/trie'
|
data/spec/trie_spec.rb
ADDED
@@ -0,0 +1,266 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../ext/trie/trie'
|
2
|
+
|
3
|
+
describe Trie do
|
4
|
+
before :each do
|
5
|
+
@trie = Trie.new;
|
6
|
+
@trie.add('rocket')
|
7
|
+
@trie.add('rock')
|
8
|
+
@trie.add('frederico')
|
9
|
+
end
|
10
|
+
|
11
|
+
#describe :path do
|
12
|
+
# it 'returns the correct path' do
|
13
|
+
# @trie.path.should == TRIE_PATH
|
14
|
+
# end
|
15
|
+
#end
|
16
|
+
|
17
|
+
describe :has_key? do
|
18
|
+
it 'returns true for words in the trie' do
|
19
|
+
@trie.has_key?('rocket').should be_true
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'returns nil for words that are not in the trie' do
|
23
|
+
@trie.has_key?('not_in_the_trie').should be_nil
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe :get do
|
28
|
+
it 'returns -1 for words in the trie without a weight' do
|
29
|
+
@trie.get('rocket').should == -1
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns nil if the word is not in the trie' do
|
33
|
+
@trie.get('not_in_the_trie').should be_nil
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe :add do
|
38
|
+
it 'adds a word to the trie' do
|
39
|
+
@trie.add('forsooth').should == true
|
40
|
+
@trie.get('forsooth').should == -1
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'adds a word with a weight to the trie' do
|
44
|
+
@trie.add('chicka',123).should == true
|
45
|
+
@trie.get('chicka').should == 123
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'adds values greater than 16-bit allows' do
|
49
|
+
@trie.add('chicka', 72_000).should == true
|
50
|
+
@trie.get('chicka').should == 72_000
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'adds a word with a non-numeric value to the trie' do
|
54
|
+
@trie.add('doot', 'Heeey').should == true
|
55
|
+
@trie.get('doot').should == 'Heeey'
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
describe :delete do
|
60
|
+
it 'deletes a word from the trie' do
|
61
|
+
@trie.delete('rocket').should == true
|
62
|
+
@trie.has_key?('rocket').should be_nil
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe :children do
|
67
|
+
it 'returns all words beginning with a given prefix' do
|
68
|
+
children = @trie.children('roc')
|
69
|
+
children.size.should == 2
|
70
|
+
children.should include('rock')
|
71
|
+
children.should include('rocket')
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'returns blank array if prefix does not exist' do
|
75
|
+
@trie.children('ajsodij').should == []
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'includes the prefix if the prefix is a word' do
|
79
|
+
children = @trie.children('rock')
|
80
|
+
children.size.should == 2
|
81
|
+
children.should include('rock')
|
82
|
+
children.should include('rocket')
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'returns blank array if prefix is nil' do
|
86
|
+
@trie.children(nil).should == []
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe :children_with_values do
|
91
|
+
before :each do
|
92
|
+
@trie.add('abc',2)
|
93
|
+
@trie.add('abcd',4)
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'returns all words with values beginning with a given prefix' do
|
97
|
+
children = @trie.children_with_values('ab')
|
98
|
+
children.size.should == 2
|
99
|
+
children.should include(['abc',2])
|
100
|
+
children.should include(['abcd',4])
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'returns nil if prefix does not exist' do
|
104
|
+
@trie.children_with_values('ajsodij').should == []
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'includes the prefix if the prefix is a word' do
|
108
|
+
children = @trie.children_with_values('abc')
|
109
|
+
children.size.should == 2
|
110
|
+
children.should include(['abc',2])
|
111
|
+
children.should include(['abcd',4])
|
112
|
+
end
|
113
|
+
|
114
|
+
it 'returns blank array if prefix is nil' do
|
115
|
+
@trie.children_with_values(nil).should == []
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
#describe :walk_to_terminal do
|
120
|
+
# it 'returns the first word found along a path' do
|
121
|
+
# @trie.add 'anderson'
|
122
|
+
# @trie.add 'andreas'
|
123
|
+
# @trie.add 'and'
|
124
|
+
|
125
|
+
# @trie.walk_to_terminal('anderson').should == 'and'
|
126
|
+
# end
|
127
|
+
|
128
|
+
# it 'returns the first word and value along a path' do
|
129
|
+
# @trie.add 'anderson'
|
130
|
+
# @trie.add 'andreas'
|
131
|
+
# @trie.add 'and', 15
|
132
|
+
|
133
|
+
# @trie.walk_to_terminal('anderson',true).should == ['and', 15]
|
134
|
+
# end
|
135
|
+
#end
|
136
|
+
|
137
|
+
describe :root do
|
138
|
+
it 'returns a TrieNode' do
|
139
|
+
@trie.root.should be_an_instance_of(TrieNode)
|
140
|
+
end
|
141
|
+
|
142
|
+
it 'returns a different TrieNode each time' do
|
143
|
+
@trie.root.should_not == @trie.root
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
#describe :save do
|
148
|
+
# it 'saves the trie to disk such that another trie can be spawned which will read succesfully' do
|
149
|
+
# @trie.add('omgwtf',123)
|
150
|
+
# @trie.save
|
151
|
+
#
|
152
|
+
# trie2 = Trie.new(TRIE_PATH)
|
153
|
+
# trie2.get('omgwtf').should == 123
|
154
|
+
# end
|
155
|
+
#end
|
156
|
+
end
|
157
|
+
|
158
|
+
describe TrieNode do
|
159
|
+
before :each do
|
160
|
+
@trie = Trie.new;
|
161
|
+
@trie.add('rocket',1)
|
162
|
+
@trie.add('rock',2)
|
163
|
+
@trie.add('frederico',3)
|
164
|
+
@node = @trie.root
|
165
|
+
end
|
166
|
+
|
167
|
+
describe :state do
|
168
|
+
it 'returns the most recent state character' do
|
169
|
+
@node.walk!('r')
|
170
|
+
@node.state.should == 'r'
|
171
|
+
@node.walk!('o')
|
172
|
+
@node.state.should == 'o'
|
173
|
+
end
|
174
|
+
|
175
|
+
it 'is nil when no walk has occurred' do
|
176
|
+
@node.state.should == nil
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
describe :full_state do
|
181
|
+
it 'returns the current string' do
|
182
|
+
@node.walk!('r').walk!('o').walk!('c')
|
183
|
+
@node.full_state.should == 'roc'
|
184
|
+
end
|
185
|
+
|
186
|
+
it 'is a blank string when no walk has occurred' do
|
187
|
+
@node.full_state.should == ''
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
describe :walk! do
|
192
|
+
it 'returns the updated object when the walk succeeds' do
|
193
|
+
other = @node.walk!('r')
|
194
|
+
other.should == @node
|
195
|
+
end
|
196
|
+
|
197
|
+
it 'returns nil when the walk fails' do
|
198
|
+
@node.walk!('q').should be_nil
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
describe :walk do
|
203
|
+
it 'returns a new node object when the walk succeeds' do
|
204
|
+
other = @node.walk('r')
|
205
|
+
other.should != @node
|
206
|
+
end
|
207
|
+
|
208
|
+
it 'returns nil when the walk fails' do
|
209
|
+
@node.walk('q').should be_nil
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
|
214
|
+
describe :value do
|
215
|
+
it 'returns nil when the node is not terminal' do
|
216
|
+
@node.walk!('r')
|
217
|
+
@node.value.should be_nil
|
218
|
+
end
|
219
|
+
|
220
|
+
it 'returns a value when the node is terminal' do
|
221
|
+
@node.walk!('r').walk!('o').walk!('c').walk!('k')
|
222
|
+
@node.value.should == 2
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
describe :terminal? do
|
227
|
+
it 'returns true when the node is a word end' do
|
228
|
+
@node.walk!('r').walk!('o').walk!('c').walk!('k')
|
229
|
+
@node.should be_terminal
|
230
|
+
end
|
231
|
+
|
232
|
+
it 'returns nil when the node is not a word end' do
|
233
|
+
@node.walk!('r').walk!('o').walk!('c')
|
234
|
+
@node.should_not be_terminal
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
describe :leaf? do
|
239
|
+
it 'returns true when this is the end of a branch of the trie' do
|
240
|
+
@node.walk!('r').walk!('o').walk!('c').walk!('k').walk!('e').walk!('t')
|
241
|
+
@node.should be_leaf
|
242
|
+
end
|
243
|
+
|
244
|
+
it 'returns nil when there are more splits on this branch' do
|
245
|
+
@node.walk!('r').walk!('o').walk!('c').walk!('k')
|
246
|
+
@node.should_not be_leaf
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
describe :clone do
|
251
|
+
it 'creates a new instance of this node which is not this node' do
|
252
|
+
new_node = @node.clone
|
253
|
+
new_node.should_not == @node
|
254
|
+
end
|
255
|
+
|
256
|
+
it 'matches the state of the current node' do
|
257
|
+
new_node = @node.clone
|
258
|
+
new_node.state.should == @node.state
|
259
|
+
end
|
260
|
+
|
261
|
+
it 'matches the full_state of the current node' do
|
262
|
+
new_node = @node.clone
|
263
|
+
new_node.full_state.should == @node.full_state
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|