fast_trie 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +132 -0
- data/VERSION.yml +4 -0
- data/ext/trie/Makefile +149 -0
- data/ext/trie/darray.c +673 -0
- data/ext/trie/darray.h +233 -0
- data/ext/trie/extconf.rb +3 -0
- data/ext/trie/fileutils.c +151 -0
- data/ext/trie/fileutils.h +36 -0
- data/ext/trie/tail.c +340 -0
- data/ext/trie/tail.h +207 -0
- data/ext/trie/trie-private.c +299 -0
- data/ext/trie/trie-private.h +31 -0
- data/ext/trie/trie.c +452 -0
- data/ext/trie/trie.h +40 -0
- data/ext/trie/triedefs.h +73 -0
- data/ext/trie/typedefs.h +113 -0
- data/lib/trie.rb +1 -0
- data/spec/trie_spec.rb +266 -0
- metadata +80 -0
data/ext/trie/trie.h
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#include "darray.h"
|
2
|
+
#include "tail.h"
|
3
|
+
|
4
|
+
typedef struct _Trie {
|
5
|
+
DArray *da;
|
6
|
+
Tail *tail;
|
7
|
+
} Trie;
|
8
|
+
|
9
|
+
typedef struct _TrieState {
|
10
|
+
const Trie *trie; /**< the corresponding trie */
|
11
|
+
TrieIndex index; /**< index in double-array/tail structures */
|
12
|
+
short suffix_idx; /**< suffix character offset, if in suffix */
|
13
|
+
short is_suffix; /**< whether it is currently in suffix part */
|
14
|
+
} TrieState;
|
15
|
+
|
16
|
+
|
17
|
+
#define trie_da_is_separate(da,s) (da_get_base ((da), (s)) < 0)
|
18
|
+
#define trie_da_get_tail_index(da,s) (-da_get_base ((da), (s)))
|
19
|
+
#define trie_da_set_tail_index(da,s,v) (da_set_base ((da), (s), -(v)))
|
20
|
+
#define trie_state_is_terminal(s) trie_state_is_walkable((s),TRIE_CHAR_TERM)
|
21
|
+
|
22
|
+
|
23
|
+
Trie* trie_new();
|
24
|
+
void trie_free(Trie *trie);
|
25
|
+
static Bool trie_branch_in_branch (Trie *trie, TrieIndex sep_node, const TrieChar *suffix, TrieData data);
|
26
|
+
static Bool trie_branch_in_tail(Trie *trie, TrieIndex sep_node, const TrieChar *suffix, TrieData data);
|
27
|
+
Bool trie_store (Trie *trie, const TrieChar *key, TrieData data);
|
28
|
+
Bool trie_retrieve (const Trie *trie, const TrieChar *key, TrieData *o_data);
|
29
|
+
Bool trie_delete (Trie *trie, const TrieChar *key);
|
30
|
+
TrieState * trie_root (const Trie *trie);
|
31
|
+
static TrieState * trie_state_new (const Trie *trie, TrieIndex index, short suffix_idx, short is_suffix);
|
32
|
+
TrieState * trie_state_clone (const TrieState *s);
|
33
|
+
void trie_state_free (TrieState *s);
|
34
|
+
void trie_state_rewind (TrieState *s);
|
35
|
+
Bool trie_state_walk (TrieState *s, TrieChar c);
|
36
|
+
Bool trie_state_is_walkable (const TrieState *s, TrieChar c);
|
37
|
+
Bool trie_state_is_leaf (const TrieState *s);
|
38
|
+
TrieData trie_state_get_data (const TrieState *s);
|
39
|
+
|
40
|
+
|
data/ext/trie/triedefs.h
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* triedefs.h - General typedefs for trie
|
4
|
+
* Created: 2006-08-11
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef __TRIEDEFS_H
|
9
|
+
#define __TRIEDEFS_H
|
10
|
+
|
11
|
+
#include "typedefs.h"
|
12
|
+
|
13
|
+
/**
|
14
|
+
* @file triedefs.h
|
15
|
+
* @brief General typedefs for trie
|
16
|
+
*/
|
17
|
+
|
18
|
+
/**
|
19
|
+
* @brief Trie IO modes
|
20
|
+
*/
|
21
|
+
typedef enum {
|
22
|
+
TRIE_IO_READ = 0x01,
|
23
|
+
TRIE_IO_WRITE = 0x02,
|
24
|
+
TRIE_IO_CREATE = 0x04
|
25
|
+
} TrieIOMode;
|
26
|
+
|
27
|
+
/**
|
28
|
+
* @brief Trie character type for alphabet
|
29
|
+
*/
|
30
|
+
typedef uint32 AlphaChar;
|
31
|
+
|
32
|
+
/**
|
33
|
+
* @brief Error value for alphabet character
|
34
|
+
*/
|
35
|
+
#define ALPHA_CHAR_ERROR (~(AlphaChar)0)
|
36
|
+
|
37
|
+
/**
|
38
|
+
* @brief Trie character type for key
|
39
|
+
*/
|
40
|
+
typedef unsigned char TrieChar;
|
41
|
+
/**
|
42
|
+
* @brief Trie terminator character
|
43
|
+
*/
|
44
|
+
#define TRIE_CHAR_TERM '\0'
|
45
|
+
#define TRIE_CHAR_MAX 255
|
46
|
+
|
47
|
+
/**
|
48
|
+
* @brief Type of Trie index
|
49
|
+
*/
|
50
|
+
typedef int32 TrieIndex;
|
51
|
+
/**
|
52
|
+
* @brief Trie error index
|
53
|
+
*/
|
54
|
+
#define TRIE_INDEX_ERROR 0
|
55
|
+
/**
|
56
|
+
* @brief Maximum trie index value
|
57
|
+
*/
|
58
|
+
#define TRIE_INDEX_MAX 0x7fffffff
|
59
|
+
|
60
|
+
/**
|
61
|
+
* @brief Type of value associated to trie entries
|
62
|
+
*/
|
63
|
+
typedef unsigned long TrieData;
|
64
|
+
/**
|
65
|
+
* @brief Trie error data
|
66
|
+
*/
|
67
|
+
#define TRIE_DATA_ERROR -1
|
68
|
+
|
69
|
+
#endif /* __TRIEDEFS_H */
|
70
|
+
|
71
|
+
/*
|
72
|
+
vi:ts=4:ai:expandtab
|
73
|
+
*/
|
data/ext/trie/typedefs.h
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* typedefs.h - general types
|
4
|
+
* Created : 11 Aug 2006
|
5
|
+
* Author : Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef __TYPEDEFS_H
|
9
|
+
#define __TYPEDEFS_H
|
10
|
+
|
11
|
+
#include <limits.h>
|
12
|
+
|
13
|
+
typedef enum { FALSE = 0, TRUE = 1 } Bool;
|
14
|
+
|
15
|
+
# if UCHAR_MAX == 0xff
|
16
|
+
# ifndef UINT8_TYPEDEF
|
17
|
+
# define UINT8_TYPEDEF
|
18
|
+
typedef unsigned char uint8;
|
19
|
+
# endif /* UINT8_TYPEDEF */
|
20
|
+
# endif /* UCHAR_MAX */
|
21
|
+
|
22
|
+
# if SCHAR_MAX == 0x7f
|
23
|
+
# ifndef INT8_TYPEDEF
|
24
|
+
# define INT8_TYPEDEF
|
25
|
+
typedef signed char int8;
|
26
|
+
# endif /* INT8_TYPEDEF */
|
27
|
+
# endif /* SCHAR_MAX */
|
28
|
+
|
29
|
+
# if UINT_MAX == 0xffff
|
30
|
+
# ifndef UINT16_TYPEDEF
|
31
|
+
# define UINT16_TYPEDEF
|
32
|
+
typedef unsigned int uint16;
|
33
|
+
# endif /* UINT16_TYPEDEF */
|
34
|
+
# endif /* UINT_MAX */
|
35
|
+
|
36
|
+
# if INT_MAX == 0x7fff
|
37
|
+
# ifndef INT16_TYPEDEF
|
38
|
+
# define INT16_TYPEDEF
|
39
|
+
typedef int int16;
|
40
|
+
# endif /* INT16_TYPEDEF */
|
41
|
+
# endif /* INT_MAX */
|
42
|
+
|
43
|
+
# if USHRT_MAX == 0xffff
|
44
|
+
# ifndef UINT16_TYPEDEF
|
45
|
+
# define UINT16_TYPEDEF
|
46
|
+
typedef unsigned short uint16;
|
47
|
+
# endif /* UINT16_TYPEDEF */
|
48
|
+
# endif /* USHRT_MAX */
|
49
|
+
|
50
|
+
# if SHRT_MAX == 0x7fff
|
51
|
+
# ifndef INT16_TYPEDEF
|
52
|
+
# define INT16_TYPEDEF
|
53
|
+
typedef short int16;
|
54
|
+
# endif /* INT16_TYPEDEF */
|
55
|
+
# endif /* SHRT_MAX */
|
56
|
+
|
57
|
+
# if UINT_MAX == 0xffffffff
|
58
|
+
# ifndef UINT32_TYPEDEF
|
59
|
+
# define UINT32_TYPEDEF
|
60
|
+
typedef unsigned int uint32;
|
61
|
+
# endif /* UINT32_TYPEDEF */
|
62
|
+
# endif /* UINT_MAX */
|
63
|
+
|
64
|
+
# if INT_MAX == 0x7fffffff
|
65
|
+
# ifndef INT32_TYPEDEF
|
66
|
+
# define INT32_TYPEDEF
|
67
|
+
typedef int int32;
|
68
|
+
# endif /* INT32_TYPEDEF */
|
69
|
+
# endif /* INT_MAX */
|
70
|
+
|
71
|
+
# if ULONG_MAX == 0xffffffff
|
72
|
+
# ifndef UINT32_TYPEDEF
|
73
|
+
# define UINT32_TYPEDEF
|
74
|
+
typedef unsigned long uint32;
|
75
|
+
# endif /* UINT32_TYPEDEF */
|
76
|
+
# endif /* ULONG_MAX */
|
77
|
+
|
78
|
+
# if LONG_MAX == 0x7fffffff
|
79
|
+
# ifndef INT32_TYPEDEF
|
80
|
+
# define INT32_TYPEDEF
|
81
|
+
typedef long int32;
|
82
|
+
# endif /* INT32_TYPEDEF */
|
83
|
+
# endif /* LONG_MAX */
|
84
|
+
|
85
|
+
# ifndef UINT8_TYPEDEF
|
86
|
+
# error "uint8 type is undefined!"
|
87
|
+
# endif
|
88
|
+
# ifndef INT8_TYPEDEF
|
89
|
+
# error "int8 type is undefined!"
|
90
|
+
# endif
|
91
|
+
# ifndef UINT16_TYPEDEF
|
92
|
+
# error "uint16 type is undefined!"
|
93
|
+
# endif
|
94
|
+
# ifndef INT16_TYPEDEF
|
95
|
+
# error "int16 type is undefined!"
|
96
|
+
# endif
|
97
|
+
# ifndef UINT32_TYPEDEF
|
98
|
+
# error "uint32 type is undefined!"
|
99
|
+
# endif
|
100
|
+
# ifndef INT32_TYPEDEF
|
101
|
+
# error "int32 type is undefined!"
|
102
|
+
# endif
|
103
|
+
|
104
|
+
typedef uint8 byte;
|
105
|
+
typedef uint16 word;
|
106
|
+
typedef uint32 dword;
|
107
|
+
|
108
|
+
|
109
|
+
#endif /* __TYPEDEFS_H */
|
110
|
+
|
111
|
+
/*
|
112
|
+
vi:ts=4:ai:expandtab
|
113
|
+
*/
|
data/lib/trie.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../ext/trie'
|
data/spec/trie_spec.rb
ADDED
@@ -0,0 +1,266 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../ext/trie/trie'
|
2
|
+
|
3
|
+
describe Trie do
|
4
|
+
before :each do
|
5
|
+
@trie = Trie.new;
|
6
|
+
@trie.add('rocket')
|
7
|
+
@trie.add('rock')
|
8
|
+
@trie.add('frederico')
|
9
|
+
end
|
10
|
+
|
11
|
+
#describe :path do
|
12
|
+
# it 'returns the correct path' do
|
13
|
+
# @trie.path.should == TRIE_PATH
|
14
|
+
# end
|
15
|
+
#end
|
16
|
+
|
17
|
+
describe :has_key? do
|
18
|
+
it 'returns true for words in the trie' do
|
19
|
+
@trie.has_key?('rocket').should be_true
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'returns nil for words that are not in the trie' do
|
23
|
+
@trie.has_key?('not_in_the_trie').should be_nil
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe :get do
|
28
|
+
it 'returns -1 for words in the trie without a weight' do
|
29
|
+
@trie.get('rocket').should == -1
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns nil if the word is not in the trie' do
|
33
|
+
@trie.get('not_in_the_trie').should be_nil
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe :add do
|
38
|
+
it 'adds a word to the trie' do
|
39
|
+
@trie.add('forsooth').should == true
|
40
|
+
@trie.get('forsooth').should == -1
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'adds a word with a weight to the trie' do
|
44
|
+
@trie.add('chicka',123).should == true
|
45
|
+
@trie.get('chicka').should == 123
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'adds values greater than 16-bit allows' do
|
49
|
+
@trie.add('chicka', 72_000).should == true
|
50
|
+
@trie.get('chicka').should == 72_000
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'adds a word with a non-numeric value to the trie' do
|
54
|
+
@trie.add('doot', 'Heeey').should == true
|
55
|
+
@trie.get('doot').should == 'Heeey'
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
describe :delete do
|
60
|
+
it 'deletes a word from the trie' do
|
61
|
+
@trie.delete('rocket').should == true
|
62
|
+
@trie.has_key?('rocket').should be_nil
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe :children do
|
67
|
+
it 'returns all words beginning with a given prefix' do
|
68
|
+
children = @trie.children('roc')
|
69
|
+
children.size.should == 2
|
70
|
+
children.should include('rock')
|
71
|
+
children.should include('rocket')
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'returns blank array if prefix does not exist' do
|
75
|
+
@trie.children('ajsodij').should == []
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'includes the prefix if the prefix is a word' do
|
79
|
+
children = @trie.children('rock')
|
80
|
+
children.size.should == 2
|
81
|
+
children.should include('rock')
|
82
|
+
children.should include('rocket')
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'returns blank array if prefix is nil' do
|
86
|
+
@trie.children(nil).should == []
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
describe :children_with_values do
|
91
|
+
before :each do
|
92
|
+
@trie.add('abc',2)
|
93
|
+
@trie.add('abcd',4)
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'returns all words with values beginning with a given prefix' do
|
97
|
+
children = @trie.children_with_values('ab')
|
98
|
+
children.size.should == 2
|
99
|
+
children.should include(['abc',2])
|
100
|
+
children.should include(['abcd',4])
|
101
|
+
end
|
102
|
+
|
103
|
+
it 'returns nil if prefix does not exist' do
|
104
|
+
@trie.children_with_values('ajsodij').should == []
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'includes the prefix if the prefix is a word' do
|
108
|
+
children = @trie.children_with_values('abc')
|
109
|
+
children.size.should == 2
|
110
|
+
children.should include(['abc',2])
|
111
|
+
children.should include(['abcd',4])
|
112
|
+
end
|
113
|
+
|
114
|
+
it 'returns blank array if prefix is nil' do
|
115
|
+
@trie.children_with_values(nil).should == []
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
#describe :walk_to_terminal do
|
120
|
+
# it 'returns the first word found along a path' do
|
121
|
+
# @trie.add 'anderson'
|
122
|
+
# @trie.add 'andreas'
|
123
|
+
# @trie.add 'and'
|
124
|
+
|
125
|
+
# @trie.walk_to_terminal('anderson').should == 'and'
|
126
|
+
# end
|
127
|
+
|
128
|
+
# it 'returns the first word and value along a path' do
|
129
|
+
# @trie.add 'anderson'
|
130
|
+
# @trie.add 'andreas'
|
131
|
+
# @trie.add 'and', 15
|
132
|
+
|
133
|
+
# @trie.walk_to_terminal('anderson',true).should == ['and', 15]
|
134
|
+
# end
|
135
|
+
#end
|
136
|
+
|
137
|
+
describe :root do
|
138
|
+
it 'returns a TrieNode' do
|
139
|
+
@trie.root.should be_an_instance_of(TrieNode)
|
140
|
+
end
|
141
|
+
|
142
|
+
it 'returns a different TrieNode each time' do
|
143
|
+
@trie.root.should_not == @trie.root
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
#describe :save do
|
148
|
+
# it 'saves the trie to disk such that another trie can be spawned which will read succesfully' do
|
149
|
+
# @trie.add('omgwtf',123)
|
150
|
+
# @trie.save
|
151
|
+
#
|
152
|
+
# trie2 = Trie.new(TRIE_PATH)
|
153
|
+
# trie2.get('omgwtf').should == 123
|
154
|
+
# end
|
155
|
+
#end
|
156
|
+
end
|
157
|
+
|
158
|
+
describe TrieNode do
|
159
|
+
before :each do
|
160
|
+
@trie = Trie.new;
|
161
|
+
@trie.add('rocket',1)
|
162
|
+
@trie.add('rock',2)
|
163
|
+
@trie.add('frederico',3)
|
164
|
+
@node = @trie.root
|
165
|
+
end
|
166
|
+
|
167
|
+
describe :state do
|
168
|
+
it 'returns the most recent state character' do
|
169
|
+
@node.walk!('r')
|
170
|
+
@node.state.should == 'r'
|
171
|
+
@node.walk!('o')
|
172
|
+
@node.state.should == 'o'
|
173
|
+
end
|
174
|
+
|
175
|
+
it 'is nil when no walk has occurred' do
|
176
|
+
@node.state.should == nil
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
describe :full_state do
|
181
|
+
it 'returns the current string' do
|
182
|
+
@node.walk!('r').walk!('o').walk!('c')
|
183
|
+
@node.full_state.should == 'roc'
|
184
|
+
end
|
185
|
+
|
186
|
+
it 'is a blank string when no walk has occurred' do
|
187
|
+
@node.full_state.should == ''
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
describe :walk! do
|
192
|
+
it 'returns the updated object when the walk succeeds' do
|
193
|
+
other = @node.walk!('r')
|
194
|
+
other.should == @node
|
195
|
+
end
|
196
|
+
|
197
|
+
it 'returns nil when the walk fails' do
|
198
|
+
@node.walk!('q').should be_nil
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
describe :walk do
|
203
|
+
it 'returns a new node object when the walk succeeds' do
|
204
|
+
other = @node.walk('r')
|
205
|
+
other.should != @node
|
206
|
+
end
|
207
|
+
|
208
|
+
it 'returns nil when the walk fails' do
|
209
|
+
@node.walk('q').should be_nil
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
|
214
|
+
describe :value do
|
215
|
+
it 'returns nil when the node is not terminal' do
|
216
|
+
@node.walk!('r')
|
217
|
+
@node.value.should be_nil
|
218
|
+
end
|
219
|
+
|
220
|
+
it 'returns a value when the node is terminal' do
|
221
|
+
@node.walk!('r').walk!('o').walk!('c').walk!('k')
|
222
|
+
@node.value.should == 2
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
describe :terminal? do
|
227
|
+
it 'returns true when the node is a word end' do
|
228
|
+
@node.walk!('r').walk!('o').walk!('c').walk!('k')
|
229
|
+
@node.should be_terminal
|
230
|
+
end
|
231
|
+
|
232
|
+
it 'returns nil when the node is not a word end' do
|
233
|
+
@node.walk!('r').walk!('o').walk!('c')
|
234
|
+
@node.should_not be_terminal
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
describe :leaf? do
|
239
|
+
it 'returns true when this is the end of a branch of the trie' do
|
240
|
+
@node.walk!('r').walk!('o').walk!('c').walk!('k').walk!('e').walk!('t')
|
241
|
+
@node.should be_leaf
|
242
|
+
end
|
243
|
+
|
244
|
+
it 'returns nil when there are more splits on this branch' do
|
245
|
+
@node.walk!('r').walk!('o').walk!('c').walk!('k')
|
246
|
+
@node.should_not be_leaf
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
describe :clone do
|
251
|
+
it 'creates a new instance of this node which is not this node' do
|
252
|
+
new_node = @node.clone
|
253
|
+
new_node.should_not == @node
|
254
|
+
end
|
255
|
+
|
256
|
+
it 'matches the state of the current node' do
|
257
|
+
new_node = @node.clone
|
258
|
+
new_node.state.should == @node.state
|
259
|
+
end
|
260
|
+
|
261
|
+
it 'matches the full_state of the current node' do
|
262
|
+
new_node = @node.clone
|
263
|
+
new_node.full_state.should == @node.full_state
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|