melisa 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +11 -0
- data/ext/marisa/bindings/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
- data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/python/marisa-swig.h +183 -0
- data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
- data/ext/marisa/bindings/ruby/extconf.rb +5 -0
- data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
- data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
- data/ext/marisa/lib/marisa.h +14 -0
- data/ext/marisa/lib/marisa/agent.cc +51 -0
- data/ext/marisa/lib/marisa/agent.h +73 -0
- data/ext/marisa/lib/marisa/base.h +193 -0
- data/ext/marisa/lib/marisa/exception.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
- data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
- data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
- data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
- data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
- data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
- data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
- data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
- data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
- data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
- data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
- data/ext/marisa/lib/marisa/iostream.h +18 -0
- data/ext/marisa/lib/marisa/key.h +85 -0
- data/ext/marisa/lib/marisa/keyset.cc +181 -0
- data/ext/marisa/lib/marisa/keyset.h +80 -0
- data/ext/marisa/lib/marisa/query.h +71 -0
- data/ext/marisa/lib/marisa/scoped-array.h +48 -0
- data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
- data/ext/marisa/lib/marisa/stdio.h +15 -0
- data/ext/marisa/lib/marisa/trie.cc +249 -0
- data/ext/marisa/lib/marisa/trie.h +64 -0
- data/ext/marisa/tests/base-test.cc +309 -0
- data/ext/marisa/tests/io-test.cc +252 -0
- data/ext/marisa/tests/marisa-assert.h +26 -0
- data/ext/marisa/tests/marisa-test.cc +388 -0
- data/ext/marisa/tests/trie-test.cc +507 -0
- data/ext/marisa/tests/vector-test.cc +466 -0
- data/ext/marisa/tools/cmdopt.cc +298 -0
- data/ext/marisa/tools/cmdopt.h +58 -0
- data/ext/marisa/tools/marisa-benchmark.cc +418 -0
- data/ext/marisa/tools/marisa-build.cc +206 -0
- data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
- data/ext/marisa/tools/marisa-dump.cc +151 -0
- data/ext/marisa/tools/marisa-lookup.cc +110 -0
- data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
- data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
- data/lib/melisa.rb +7 -0
- data/lib/melisa/base_config_flags.rb +76 -0
- data/lib/melisa/bytes_trie.rb +55 -0
- data/lib/melisa/int_trie.rb +14 -0
- data/lib/melisa/search.rb +55 -0
- data/lib/melisa/trie.rb +96 -0
- data/lib/melisa/version.rb +3 -0
- data/melisa.gemspec +36 -0
- data/spec/base_config_flags_spec.rb +73 -0
- data/spec/bytes_trie_spec.rb +16 -0
- data/spec/int_trie_spec.rb +16 -0
- data/spec/search_spec.rb +29 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/trie_spec.rb +30 -0
- metadata +207 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
#ifndef MARISA_ASSERT_H_
|
2
|
+
#define MARISA_ASSERT_H_
|
3
|
+
|
4
|
+
#include <iostream>
|
5
|
+
#include <cstdlib>
|
6
|
+
|
7
|
+
#define ASSERT(cond) (void)((!!(cond)) || \
|
8
|
+
((std::cout << __LINE__ << ": Assertion `" << #cond << "' failed." \
|
9
|
+
<< std::endl), std::exit(-1), 0))
|
10
|
+
|
11
|
+
#define EXCEPT(code, expected_error_code) try { \
|
12
|
+
code; \
|
13
|
+
std::cout << __LINE__ << ": Exception `" << #code << "' failed." \
|
14
|
+
<< std::endl; \
|
15
|
+
std::exit(-1); \
|
16
|
+
} catch (const marisa::Exception &ex) { \
|
17
|
+
ASSERT(ex.error_code() == expected_error_code); \
|
18
|
+
}
|
19
|
+
|
20
|
+
#define TEST_START() \
|
21
|
+
(std::cout << __FILE__ << ":" << __LINE__ << ": " << __FUNCTION__ << "(): ")
|
22
|
+
|
23
|
+
#define TEST_END() \
|
24
|
+
(std::cout << "ok" << std::endl)
|
25
|
+
|
26
|
+
#endif // MARISA_ASSERT_H_
|
@@ -0,0 +1,388 @@
|
|
1
|
+
#include <cstdlib>
|
2
|
+
#include <cstring>
|
3
|
+
#include <ctime>
|
4
|
+
#include <sstream>
|
5
|
+
|
6
|
+
#include <marisa.h>
|
7
|
+
|
8
|
+
#include "marisa-assert.h"
|
9
|
+
|
10
|
+
namespace {
|
11
|
+
|
12
|
+
void TestEmptyTrie() {
|
13
|
+
TEST_START();
|
14
|
+
|
15
|
+
marisa::Trie trie;
|
16
|
+
|
17
|
+
EXCEPT(trie.save("marisa-test.dat"), MARISA_STATE_ERROR);
|
18
|
+
#ifdef _MSC_VER
|
19
|
+
EXCEPT(trie.write(::_fileno(stdout)), MARISA_STATE_ERROR);
|
20
|
+
#else // _MSC_VER
|
21
|
+
EXCEPT(trie.write(::fileno(stdout)), MARISA_STATE_ERROR);
|
22
|
+
#endif // _MSC_VER
|
23
|
+
EXCEPT(std::cout << trie, MARISA_STATE_ERROR);
|
24
|
+
EXCEPT(marisa::fwrite(stdout, trie), MARISA_STATE_ERROR);
|
25
|
+
|
26
|
+
marisa::Agent agent;
|
27
|
+
|
28
|
+
EXCEPT(trie.lookup(agent), MARISA_STATE_ERROR);
|
29
|
+
EXCEPT(trie.reverse_lookup(agent), MARISA_STATE_ERROR);
|
30
|
+
EXCEPT(trie.common_prefix_search(agent), MARISA_STATE_ERROR);
|
31
|
+
EXCEPT(trie.predictive_search(agent), MARISA_STATE_ERROR);
|
32
|
+
|
33
|
+
EXCEPT(trie.num_tries(), MARISA_STATE_ERROR);
|
34
|
+
EXCEPT(trie.num_keys(), MARISA_STATE_ERROR);
|
35
|
+
EXCEPT(trie.num_nodes(), MARISA_STATE_ERROR);
|
36
|
+
|
37
|
+
EXCEPT(trie.tail_mode(), MARISA_STATE_ERROR);
|
38
|
+
EXCEPT(trie.node_order(), MARISA_STATE_ERROR);
|
39
|
+
|
40
|
+
EXCEPT(trie.empty(), MARISA_STATE_ERROR);
|
41
|
+
EXCEPT(trie.size(), MARISA_STATE_ERROR);
|
42
|
+
EXCEPT(trie.total_size(), MARISA_STATE_ERROR);
|
43
|
+
EXCEPT(trie.io_size(), MARISA_STATE_ERROR);
|
44
|
+
|
45
|
+
marisa::Keyset keyset;
|
46
|
+
trie.build(keyset);
|
47
|
+
|
48
|
+
ASSERT(!trie.lookup(agent));
|
49
|
+
EXCEPT(trie.reverse_lookup(agent), MARISA_BOUND_ERROR);
|
50
|
+
ASSERT(!trie.common_prefix_search(agent));
|
51
|
+
ASSERT(!trie.predictive_search(agent));
|
52
|
+
|
53
|
+
ASSERT(trie.num_tries() == 1);
|
54
|
+
ASSERT(trie.num_keys() == 0);
|
55
|
+
ASSERT(trie.num_nodes() == 1);
|
56
|
+
|
57
|
+
ASSERT(trie.tail_mode() == MARISA_DEFAULT_TAIL);
|
58
|
+
ASSERT(trie.node_order() == MARISA_DEFAULT_ORDER);
|
59
|
+
|
60
|
+
ASSERT(trie.empty());
|
61
|
+
ASSERT(trie.size() == 0);
|
62
|
+
ASSERT(trie.total_size() != 0);
|
63
|
+
ASSERT(trie.io_size() != 0);
|
64
|
+
|
65
|
+
keyset.push_back("");
|
66
|
+
trie.build(keyset);
|
67
|
+
|
68
|
+
ASSERT(trie.lookup(agent));
|
69
|
+
trie.reverse_lookup(agent);
|
70
|
+
ASSERT(trie.common_prefix_search(agent));
|
71
|
+
ASSERT(!trie.common_prefix_search(agent));
|
72
|
+
ASSERT(trie.predictive_search(agent));
|
73
|
+
ASSERT(!trie.predictive_search(agent));
|
74
|
+
|
75
|
+
ASSERT(trie.num_keys() == 1);
|
76
|
+
ASSERT(trie.num_nodes() == 1);
|
77
|
+
|
78
|
+
ASSERT(!trie.empty());
|
79
|
+
ASSERT(trie.size() == 1);
|
80
|
+
ASSERT(trie.total_size() != 0);
|
81
|
+
ASSERT(trie.io_size() != 0);
|
82
|
+
|
83
|
+
TEST_END();
|
84
|
+
}
|
85
|
+
|
86
|
+
void TestTinyTrie() {
|
87
|
+
TEST_START();
|
88
|
+
|
89
|
+
marisa::Keyset keyset;
|
90
|
+
keyset.push_back("bach");
|
91
|
+
keyset.push_back("bet");
|
92
|
+
keyset.push_back("chat");
|
93
|
+
keyset.push_back("check");
|
94
|
+
keyset.push_back("check");
|
95
|
+
|
96
|
+
marisa::Trie trie;
|
97
|
+
trie.build(keyset, 1);
|
98
|
+
|
99
|
+
ASSERT(trie.num_tries() == 1);
|
100
|
+
ASSERT(trie.num_keys() == 4);
|
101
|
+
ASSERT(trie.num_nodes() == 7);
|
102
|
+
|
103
|
+
ASSERT(trie.tail_mode() == MARISA_DEFAULT_TAIL);
|
104
|
+
ASSERT(trie.node_order() == MARISA_DEFAULT_ORDER);
|
105
|
+
|
106
|
+
ASSERT(keyset[0].id() == 2);
|
107
|
+
ASSERT(keyset[1].id() == 3);
|
108
|
+
ASSERT(keyset[2].id() == 1);
|
109
|
+
ASSERT(keyset[3].id() == 0);
|
110
|
+
ASSERT(keyset[4].id() == 0);
|
111
|
+
|
112
|
+
marisa::Agent agent;
|
113
|
+
for (std::size_t i = 0; i < keyset.size(); ++i) {
|
114
|
+
agent.set_query(keyset[i].ptr(), keyset[i].length());
|
115
|
+
ASSERT(trie.lookup(agent));
|
116
|
+
ASSERT(agent.key().id() == keyset[i].id());
|
117
|
+
|
118
|
+
agent.set_query(keyset[i].id());
|
119
|
+
trie.reverse_lookup(agent);
|
120
|
+
ASSERT(agent.key().length() == keyset[i].length());
|
121
|
+
ASSERT(std::memcmp(agent.key().ptr(), keyset[i].ptr(),
|
122
|
+
agent.key().length()) == 0);
|
123
|
+
}
|
124
|
+
|
125
|
+
agent.set_query("be");
|
126
|
+
ASSERT(!trie.common_prefix_search(agent));
|
127
|
+
agent.set_query("beX");
|
128
|
+
ASSERT(!trie.common_prefix_search(agent));
|
129
|
+
agent.set_query("bet");
|
130
|
+
ASSERT(trie.common_prefix_search(agent));
|
131
|
+
ASSERT(!trie.common_prefix_search(agent));
|
132
|
+
agent.set_query("betX");
|
133
|
+
ASSERT(trie.common_prefix_search(agent));
|
134
|
+
ASSERT(!trie.common_prefix_search(agent));
|
135
|
+
|
136
|
+
agent.set_query("chatX");
|
137
|
+
ASSERT(!trie.predictive_search(agent));
|
138
|
+
agent.set_query("chat");
|
139
|
+
ASSERT(trie.predictive_search(agent));
|
140
|
+
ASSERT(agent.key().length() == 4);
|
141
|
+
ASSERT(!trie.predictive_search(agent));
|
142
|
+
|
143
|
+
agent.set_query("cha");
|
144
|
+
ASSERT(trie.predictive_search(agent));
|
145
|
+
ASSERT(agent.key().length() == 4);
|
146
|
+
ASSERT(!trie.predictive_search(agent));
|
147
|
+
|
148
|
+
agent.set_query("c");
|
149
|
+
ASSERT(trie.predictive_search(agent));
|
150
|
+
ASSERT(agent.key().length() == 5);
|
151
|
+
ASSERT(std::memcmp(agent.key().ptr(), "check", 5) == 0);
|
152
|
+
ASSERT(trie.predictive_search(agent));
|
153
|
+
ASSERT(agent.key().length() == 4);
|
154
|
+
ASSERT(std::memcmp(agent.key().ptr(), "chat", 4) == 0);
|
155
|
+
ASSERT(!trie.predictive_search(agent));
|
156
|
+
|
157
|
+
agent.set_query("ch");
|
158
|
+
ASSERT(trie.predictive_search(agent));
|
159
|
+
ASSERT(agent.key().length() == 5);
|
160
|
+
ASSERT(std::memcmp(agent.key().ptr(), "check", 5) == 0);
|
161
|
+
ASSERT(trie.predictive_search(agent));
|
162
|
+
ASSERT(agent.key().length() == 4);
|
163
|
+
ASSERT(std::memcmp(agent.key().ptr(), "chat", 4) == 0);
|
164
|
+
ASSERT(!trie.predictive_search(agent));
|
165
|
+
|
166
|
+
trie.build(keyset, 1 | MARISA_LABEL_ORDER);
|
167
|
+
|
168
|
+
ASSERT(trie.num_tries() == 1);
|
169
|
+
ASSERT(trie.num_keys() == 4);
|
170
|
+
ASSERT(trie.num_nodes() == 7);
|
171
|
+
|
172
|
+
ASSERT(trie.tail_mode() == MARISA_DEFAULT_TAIL);
|
173
|
+
ASSERT(trie.node_order() == MARISA_LABEL_ORDER);
|
174
|
+
|
175
|
+
ASSERT(keyset[0].id() == 0);
|
176
|
+
ASSERT(keyset[1].id() == 1);
|
177
|
+
ASSERT(keyset[2].id() == 2);
|
178
|
+
ASSERT(keyset[3].id() == 3);
|
179
|
+
ASSERT(keyset[4].id() == 3);
|
180
|
+
|
181
|
+
for (std::size_t i = 0; i < keyset.size(); ++i) {
|
182
|
+
agent.set_query(keyset[i].ptr(), keyset[i].length());
|
183
|
+
ASSERT(trie.lookup(agent));
|
184
|
+
ASSERT(agent.key().id() == keyset[i].id());
|
185
|
+
|
186
|
+
agent.set_query(keyset[i].id());
|
187
|
+
trie.reverse_lookup(agent);
|
188
|
+
ASSERT(agent.key().length() == keyset[i].length());
|
189
|
+
ASSERT(std::memcmp(agent.key().ptr(), keyset[i].ptr(),
|
190
|
+
agent.key().length()) == 0);
|
191
|
+
}
|
192
|
+
|
193
|
+
agent.set_query("");
|
194
|
+
for (std::size_t i = 0; i < trie.size(); ++i) {
|
195
|
+
ASSERT(trie.predictive_search(agent));
|
196
|
+
ASSERT(agent.key().id() == i);
|
197
|
+
}
|
198
|
+
ASSERT(!trie.predictive_search(agent));
|
199
|
+
|
200
|
+
TEST_END();
|
201
|
+
}
|
202
|
+
|
203
|
+
void MakeKeyset(std::size_t num_keys, marisa::TailMode tail_mode,
|
204
|
+
marisa::Keyset *keyset) {
|
205
|
+
char key_buf[16];
|
206
|
+
for (std::size_t i = 0; i < num_keys; ++i) {
|
207
|
+
const std::size_t length = std::rand() % sizeof(key_buf);
|
208
|
+
for (std::size_t j = 0; j < length; ++j) {
|
209
|
+
key_buf[j] = (char)(std::rand() % 10);
|
210
|
+
if (tail_mode == MARISA_TEXT_TAIL) {
|
211
|
+
key_buf[j] += '0';
|
212
|
+
}
|
213
|
+
}
|
214
|
+
keyset->push_back(key_buf, length);
|
215
|
+
}
|
216
|
+
}
|
217
|
+
|
218
|
+
void TestLookup(const marisa::Trie &trie, const marisa::Keyset &keyset) {
|
219
|
+
marisa::Agent agent;
|
220
|
+
for (std::size_t i = 0; i < keyset.size(); ++i) {
|
221
|
+
agent.set_query(keyset[i].ptr(), keyset[i].length());
|
222
|
+
ASSERT(trie.lookup(agent));
|
223
|
+
ASSERT(agent.key().id() == keyset[i].id());
|
224
|
+
|
225
|
+
agent.set_query(keyset[i].id());
|
226
|
+
trie.reverse_lookup(agent);
|
227
|
+
ASSERT(agent.key().length() == keyset[i].length());
|
228
|
+
ASSERT(std::memcmp(agent.key().ptr(), keyset[i].ptr(),
|
229
|
+
agent.key().length()) == 0);
|
230
|
+
}
|
231
|
+
}
|
232
|
+
|
233
|
+
void TestCommonPrefixSearch(const marisa::Trie &trie,
|
234
|
+
const marisa::Keyset &keyset) {
|
235
|
+
marisa::Agent agent;
|
236
|
+
for (std::size_t i = 0; i < keyset.size(); ++i) {
|
237
|
+
agent.set_query(keyset[i].ptr(), keyset[i].length());
|
238
|
+
ASSERT(trie.common_prefix_search(agent));
|
239
|
+
ASSERT(agent.key().id() <= keyset[i].id());
|
240
|
+
while (trie.common_prefix_search(agent)) {
|
241
|
+
ASSERT(agent.key().id() <= keyset[i].id());
|
242
|
+
}
|
243
|
+
ASSERT(agent.key().id() == keyset[i].id());
|
244
|
+
}
|
245
|
+
}
|
246
|
+
|
247
|
+
void TestPredictiveSearch(const marisa::Trie &trie,
|
248
|
+
const marisa::Keyset &keyset) {
|
249
|
+
marisa::Agent agent;
|
250
|
+
for (std::size_t i = 0; i < keyset.size(); ++i) {
|
251
|
+
agent.set_query(keyset[i].ptr(), keyset[i].length());
|
252
|
+
ASSERT(trie.predictive_search(agent));
|
253
|
+
ASSERT(agent.key().id() == keyset[i].id());
|
254
|
+
while (trie.predictive_search(agent)) {
|
255
|
+
ASSERT(agent.key().id() > keyset[i].id());
|
256
|
+
}
|
257
|
+
}
|
258
|
+
}
|
259
|
+
|
260
|
+
void TestTrie(int num_tries, marisa::TailMode tail_mode,
|
261
|
+
marisa::NodeOrder node_order, marisa::Keyset &keyset) {
|
262
|
+
for (std::size_t i = 0; i < keyset.size(); ++i) {
|
263
|
+
keyset[i].set_weight(1.0F);
|
264
|
+
}
|
265
|
+
|
266
|
+
marisa::Trie trie;
|
267
|
+
trie.build(keyset, num_tries | tail_mode | node_order);
|
268
|
+
|
269
|
+
ASSERT(trie.num_tries() == (std::size_t)num_tries);
|
270
|
+
ASSERT(trie.num_keys() <= keyset.size());
|
271
|
+
|
272
|
+
ASSERT(trie.tail_mode() == tail_mode);
|
273
|
+
ASSERT(trie.node_order() == node_order);
|
274
|
+
|
275
|
+
TestLookup(trie, keyset);
|
276
|
+
TestCommonPrefixSearch(trie, keyset);
|
277
|
+
TestPredictiveSearch(trie, keyset);
|
278
|
+
|
279
|
+
trie.save("marisa-test.dat");
|
280
|
+
|
281
|
+
trie.clear();
|
282
|
+
trie.load("marisa-test.dat");
|
283
|
+
|
284
|
+
ASSERT(trie.num_tries() == (std::size_t)num_tries);
|
285
|
+
ASSERT(trie.num_keys() <= keyset.size());
|
286
|
+
|
287
|
+
ASSERT(trie.tail_mode() == tail_mode);
|
288
|
+
ASSERT(trie.node_order() == node_order);
|
289
|
+
|
290
|
+
TestLookup(trie, keyset);
|
291
|
+
|
292
|
+
{
|
293
|
+
std::FILE *file;
|
294
|
+
#ifdef _MSC_VER
|
295
|
+
ASSERT(::fopen_s(&file, "marisa-test.dat", "wb") == 0);
|
296
|
+
#else // _MSC_VER
|
297
|
+
file = std::fopen("marisa-test.dat", "wb");
|
298
|
+
ASSERT(file != NULL);
|
299
|
+
#endif // _MSC_VER
|
300
|
+
marisa::fwrite(file, trie);
|
301
|
+
std::fclose(file);
|
302
|
+
trie.clear();
|
303
|
+
#ifdef _MSC_VER
|
304
|
+
ASSERT(::fopen_s(&file, "marisa-test.dat", "rb") == 0);
|
305
|
+
#else // _MSC_VER
|
306
|
+
file = std::fopen("marisa-test.dat", "rb");
|
307
|
+
ASSERT(file != NULL);
|
308
|
+
#endif // _MSC_VER
|
309
|
+
marisa::fread(file, &trie);
|
310
|
+
std::fclose(file);
|
311
|
+
}
|
312
|
+
|
313
|
+
ASSERT(trie.num_tries() == (std::size_t)num_tries);
|
314
|
+
ASSERT(trie.num_keys() <= keyset.size());
|
315
|
+
|
316
|
+
ASSERT(trie.tail_mode() == tail_mode);
|
317
|
+
ASSERT(trie.node_order() == node_order);
|
318
|
+
|
319
|
+
TestLookup(trie, keyset);
|
320
|
+
|
321
|
+
trie.clear();
|
322
|
+
trie.mmap("marisa-test.dat");
|
323
|
+
|
324
|
+
ASSERT(trie.num_tries() == (std::size_t)num_tries);
|
325
|
+
ASSERT(trie.num_keys() <= keyset.size());
|
326
|
+
|
327
|
+
ASSERT(trie.tail_mode() == tail_mode);
|
328
|
+
ASSERT(trie.node_order() == node_order);
|
329
|
+
|
330
|
+
TestLookup(trie, keyset);
|
331
|
+
|
332
|
+
{
|
333
|
+
std::stringstream stream;
|
334
|
+
stream << trie;
|
335
|
+
trie.clear();
|
336
|
+
stream >> trie;
|
337
|
+
}
|
338
|
+
|
339
|
+
ASSERT(trie.num_tries() == (std::size_t)num_tries);
|
340
|
+
ASSERT(trie.num_keys() <= keyset.size());
|
341
|
+
|
342
|
+
ASSERT(trie.tail_mode() == tail_mode);
|
343
|
+
ASSERT(trie.node_order() == node_order);
|
344
|
+
|
345
|
+
TestLookup(trie, keyset);
|
346
|
+
}
|
347
|
+
|
348
|
+
void TestTrie(marisa::TailMode tail_mode, marisa::NodeOrder node_order,
|
349
|
+
marisa::Keyset &keyset) {
|
350
|
+
TEST_START();
|
351
|
+
std::cout << ((tail_mode == MARISA_TEXT_TAIL) ? "TEXT" : "BINARY") << ", ";
|
352
|
+
std::cout << ((node_order == MARISA_WEIGHT_ORDER) ?
|
353
|
+
"WEIGHT" : "LABEL") << ": ";
|
354
|
+
|
355
|
+
for (int i = 1; i < 5; ++i) {
|
356
|
+
TestTrie(i, tail_mode, node_order, keyset);
|
357
|
+
}
|
358
|
+
|
359
|
+
TEST_END();
|
360
|
+
}
|
361
|
+
|
362
|
+
void TestTrie(marisa::TailMode tail_mode) {
|
363
|
+
marisa::Keyset keyset;
|
364
|
+
MakeKeyset(1000, tail_mode, &keyset);
|
365
|
+
|
366
|
+
TestTrie(tail_mode, MARISA_WEIGHT_ORDER, keyset);
|
367
|
+
TestTrie(tail_mode, MARISA_LABEL_ORDER, keyset);
|
368
|
+
}
|
369
|
+
|
370
|
+
void TestTrie() {
|
371
|
+
TestTrie(MARISA_TEXT_TAIL);
|
372
|
+
TestTrie(MARISA_BINARY_TAIL);
|
373
|
+
}
|
374
|
+
|
375
|
+
} // namespace
|
376
|
+
|
377
|
+
int main() try {
|
378
|
+
std::srand((unsigned int)std::time(NULL));
|
379
|
+
|
380
|
+
TestEmptyTrie();
|
381
|
+
TestTinyTrie();
|
382
|
+
TestTrie();
|
383
|
+
|
384
|
+
return 0;
|
385
|
+
} catch (const marisa::Exception &ex) {
|
386
|
+
std::cerr << ex.what() << std::endl;
|
387
|
+
throw;
|
388
|
+
}
|
@@ -0,0 +1,507 @@
|
|
1
|
+
#include <algorithm>
|
2
|
+
#include <cstring>
|
3
|
+
#include <sstream>
|
4
|
+
|
5
|
+
#include <marisa/grimoire/trie/config.h>
|
6
|
+
#include <marisa/grimoire/trie/header.h>
|
7
|
+
#include <marisa/grimoire/trie/key.h>
|
8
|
+
#include <marisa/grimoire/trie/range.h>
|
9
|
+
#include <marisa/grimoire/trie/tail.h>
|
10
|
+
#include <marisa/grimoire/trie/state.h>
|
11
|
+
|
12
|
+
#include "marisa-assert.h"
|
13
|
+
|
14
|
+
namespace {
|
15
|
+
|
16
|
+
void TestConfig() {
|
17
|
+
TEST_START();
|
18
|
+
|
19
|
+
marisa::grimoire::trie::Config config;
|
20
|
+
|
21
|
+
ASSERT(config.num_tries() == MARISA_DEFAULT_NUM_TRIES);
|
22
|
+
ASSERT(config.tail_mode() == MARISA_DEFAULT_TAIL);
|
23
|
+
ASSERT(config.node_order() == MARISA_DEFAULT_ORDER);
|
24
|
+
ASSERT(config.cache_level() == MARISA_DEFAULT_CACHE);
|
25
|
+
|
26
|
+
config.parse(10 | MARISA_BINARY_TAIL | MARISA_LABEL_ORDER |
|
27
|
+
MARISA_TINY_CACHE);
|
28
|
+
|
29
|
+
ASSERT(config.num_tries() == 10);
|
30
|
+
ASSERT(config.tail_mode() == MARISA_BINARY_TAIL);
|
31
|
+
ASSERT(config.node_order() == MARISA_LABEL_ORDER);
|
32
|
+
ASSERT(config.cache_level() == MARISA_TINY_CACHE);
|
33
|
+
|
34
|
+
config.parse(0);
|
35
|
+
|
36
|
+
ASSERT(config.num_tries() == MARISA_DEFAULT_NUM_TRIES);
|
37
|
+
ASSERT(config.tail_mode() == MARISA_DEFAULT_TAIL);
|
38
|
+
ASSERT(config.node_order() == MARISA_DEFAULT_ORDER);
|
39
|
+
ASSERT(config.cache_level() == MARISA_DEFAULT_CACHE);
|
40
|
+
|
41
|
+
TEST_END();
|
42
|
+
}
|
43
|
+
|
44
|
+
void TestHeader() {
|
45
|
+
TEST_START();
|
46
|
+
|
47
|
+
marisa::grimoire::trie::Header header;
|
48
|
+
|
49
|
+
{
|
50
|
+
marisa::grimoire::Writer writer;
|
51
|
+
writer.open("trie-test.dat");
|
52
|
+
header.write(writer);
|
53
|
+
}
|
54
|
+
|
55
|
+
{
|
56
|
+
marisa::grimoire::Mapper mapper;
|
57
|
+
mapper.open("trie-test.dat");
|
58
|
+
header.map(mapper);
|
59
|
+
}
|
60
|
+
|
61
|
+
{
|
62
|
+
marisa::grimoire::Reader reader;
|
63
|
+
reader.open("trie-test.dat");
|
64
|
+
header.read(reader);
|
65
|
+
}
|
66
|
+
|
67
|
+
TEST_END();
|
68
|
+
}
|
69
|
+
|
70
|
+
void TestKey() {
|
71
|
+
TEST_START();
|
72
|
+
|
73
|
+
marisa::grimoire::trie::Key key;
|
74
|
+
|
75
|
+
ASSERT(key.ptr() == NULL);
|
76
|
+
ASSERT(key.length() == 0);
|
77
|
+
ASSERT(key.id() == 0);
|
78
|
+
ASSERT(key.terminal() == 0);
|
79
|
+
|
80
|
+
const char *str = "xyz";
|
81
|
+
|
82
|
+
key.set_str(str, 3);
|
83
|
+
key.set_weight(10.0F);
|
84
|
+
key.set_id(20);
|
85
|
+
|
86
|
+
|
87
|
+
ASSERT(key.ptr() == str);
|
88
|
+
ASSERT(key.length() == 3);
|
89
|
+
ASSERT(key[0] == 'x');
|
90
|
+
ASSERT(key[1] == 'y');
|
91
|
+
ASSERT(key[2] == 'z');
|
92
|
+
ASSERT(key.weight() == 10.0F);
|
93
|
+
ASSERT(key.id() == 20);
|
94
|
+
|
95
|
+
key.set_terminal(30);
|
96
|
+
ASSERT(key.terminal() == 30);
|
97
|
+
|
98
|
+
key.substr(1, 2);
|
99
|
+
|
100
|
+
ASSERT(key.ptr() == str + 1);
|
101
|
+
ASSERT(key.length() == 2);
|
102
|
+
ASSERT(key[0] == 'y');
|
103
|
+
ASSERT(key[1] == 'z');
|
104
|
+
|
105
|
+
marisa::grimoire::trie::Key key2;
|
106
|
+
key2.set_str("abc", 3);
|
107
|
+
|
108
|
+
ASSERT(key == key);
|
109
|
+
ASSERT(key != key2);
|
110
|
+
ASSERT(key > key2);
|
111
|
+
ASSERT(key2 < key);
|
112
|
+
|
113
|
+
marisa::grimoire::trie::ReverseKey r_key;
|
114
|
+
|
115
|
+
ASSERT(r_key.ptr() == NULL);
|
116
|
+
ASSERT(r_key.length() == 0);
|
117
|
+
ASSERT(r_key.id() == 0);
|
118
|
+
ASSERT(r_key.terminal() == 0);
|
119
|
+
|
120
|
+
r_key.set_str(str, 3);
|
121
|
+
r_key.set_weight(100.0F);
|
122
|
+
r_key.set_id(200);
|
123
|
+
|
124
|
+
ASSERT(r_key.ptr() == str);
|
125
|
+
ASSERT(r_key.length() == 3);
|
126
|
+
ASSERT(r_key[0] == 'z');
|
127
|
+
ASSERT(r_key[1] == 'y');
|
128
|
+
ASSERT(r_key[2] == 'x');
|
129
|
+
ASSERT(r_key.weight() == 100.0F);
|
130
|
+
ASSERT(r_key.id() == 200);
|
131
|
+
|
132
|
+
r_key.set_terminal(300);
|
133
|
+
ASSERT(r_key.terminal() == 300);
|
134
|
+
|
135
|
+
r_key.substr(1, 2);
|
136
|
+
|
137
|
+
ASSERT(r_key.ptr() == str);
|
138
|
+
ASSERT(r_key.length() == 2);
|
139
|
+
ASSERT(r_key[0] == 'y');
|
140
|
+
ASSERT(r_key[1] == 'x');
|
141
|
+
|
142
|
+
marisa::grimoire::trie::ReverseKey r_key2;
|
143
|
+
r_key2.set_str("abc", 3);
|
144
|
+
|
145
|
+
ASSERT(r_key == r_key);
|
146
|
+
ASSERT(r_key != r_key2);
|
147
|
+
ASSERT(r_key > r_key2);
|
148
|
+
ASSERT(r_key2 < r_key);
|
149
|
+
|
150
|
+
TEST_END();
|
151
|
+
}
|
152
|
+
|
153
|
+
void TestRange() {
|
154
|
+
TEST_START();
|
155
|
+
|
156
|
+
marisa::grimoire::trie::Range range;
|
157
|
+
|
158
|
+
ASSERT(range.begin() == 0);
|
159
|
+
ASSERT(range.end() == 0);
|
160
|
+
ASSERT(range.key_pos() == 0);
|
161
|
+
|
162
|
+
range.set_begin(1);
|
163
|
+
range.set_end(2);
|
164
|
+
range.set_key_pos(3);
|
165
|
+
|
166
|
+
ASSERT(range.begin() == 1);
|
167
|
+
ASSERT(range.end() == 2);
|
168
|
+
ASSERT(range.key_pos() == 3);
|
169
|
+
|
170
|
+
range = marisa::grimoire::trie::make_range(10, 20, 30);
|
171
|
+
|
172
|
+
ASSERT(range.begin() == 10);
|
173
|
+
ASSERT(range.end() == 20);
|
174
|
+
ASSERT(range.key_pos() == 30);
|
175
|
+
|
176
|
+
marisa::grimoire::trie::WeightedRange w_range;
|
177
|
+
|
178
|
+
ASSERT(w_range.begin() == 0);
|
179
|
+
ASSERT(w_range.end() == 0);
|
180
|
+
ASSERT(w_range.key_pos() == 0);
|
181
|
+
ASSERT(w_range.weight() == 0.0F);
|
182
|
+
|
183
|
+
w_range.set_begin(10);
|
184
|
+
w_range.set_end(20);
|
185
|
+
w_range.set_key_pos(30);
|
186
|
+
w_range.set_weight(40.0F);
|
187
|
+
|
188
|
+
ASSERT(w_range.begin() == 10);
|
189
|
+
ASSERT(w_range.end() == 20);
|
190
|
+
ASSERT(w_range.key_pos() == 30);
|
191
|
+
ASSERT(w_range.weight() == 40.0F);
|
192
|
+
|
193
|
+
marisa::grimoire::trie::WeightedRange w_range2 =
|
194
|
+
marisa::grimoire::trie::make_weighted_range(100, 200, 300, 400.0F);
|
195
|
+
|
196
|
+
ASSERT(w_range2.begin() == 100);
|
197
|
+
ASSERT(w_range2.end() == 200);
|
198
|
+
ASSERT(w_range2.key_pos() == 300);
|
199
|
+
ASSERT(w_range2.weight() == 400.0F);
|
200
|
+
|
201
|
+
ASSERT(w_range < w_range2);
|
202
|
+
ASSERT(w_range2 > w_range);
|
203
|
+
|
204
|
+
TEST_END();
|
205
|
+
}
|
206
|
+
|
207
|
+
void TestEntry() {
|
208
|
+
TEST_START();
|
209
|
+
|
210
|
+
marisa::grimoire::trie::Entry entry;
|
211
|
+
|
212
|
+
ASSERT(entry.ptr() == NULL);
|
213
|
+
ASSERT(entry.length() == 0);
|
214
|
+
ASSERT(entry.id() == 0);
|
215
|
+
|
216
|
+
const char *str = "XYZ";
|
217
|
+
|
218
|
+
entry.set_str(str, 3);
|
219
|
+
entry.set_id(123);
|
220
|
+
|
221
|
+
ASSERT(entry.ptr() == str);
|
222
|
+
ASSERT(entry.length() == 3);
|
223
|
+
ASSERT(entry[0] == 'Z');
|
224
|
+
ASSERT(entry[1] == 'Y');
|
225
|
+
ASSERT(entry[2] == 'X');
|
226
|
+
ASSERT(entry.id() == 123);
|
227
|
+
|
228
|
+
TEST_END();
|
229
|
+
}
|
230
|
+
|
231
|
+
void TestTextTail() {
|
232
|
+
TEST_START();
|
233
|
+
|
234
|
+
marisa::grimoire::trie::Tail tail;
|
235
|
+
marisa::grimoire::Vector<marisa::grimoire::trie::Entry> entries;
|
236
|
+
marisa::grimoire::Vector<marisa::UInt32> offsets;
|
237
|
+
tail.build(entries, &offsets, MARISA_TEXT_TAIL);
|
238
|
+
|
239
|
+
ASSERT(tail.mode() == MARISA_TEXT_TAIL);
|
240
|
+
ASSERT(tail.size() == 0);
|
241
|
+
ASSERT(tail.empty());
|
242
|
+
ASSERT(tail.total_size() == tail.size());
|
243
|
+
ASSERT(tail.io_size() == (sizeof(marisa::UInt64) * 6));
|
244
|
+
|
245
|
+
ASSERT(offsets.empty());
|
246
|
+
|
247
|
+
marisa::grimoire::trie::Entry entry;
|
248
|
+
entry.set_str("X", 1);
|
249
|
+
entries.push_back(entry);
|
250
|
+
|
251
|
+
tail.build(entries, &offsets, MARISA_TEXT_TAIL);
|
252
|
+
|
253
|
+
ASSERT(tail.mode() == MARISA_TEXT_TAIL);
|
254
|
+
ASSERT(tail.size() == 2);
|
255
|
+
ASSERT(!tail.empty());
|
256
|
+
ASSERT(tail.total_size() == tail.size());
|
257
|
+
ASSERT(tail.io_size() == (sizeof(marisa::UInt64) * 7));
|
258
|
+
|
259
|
+
ASSERT(offsets.size() == entries.size());
|
260
|
+
ASSERT(offsets[0] == 0);
|
261
|
+
ASSERT(tail[offsets[0]] == 'X');
|
262
|
+
ASSERT(tail[offsets[0] + 1] == '\0');
|
263
|
+
|
264
|
+
entries.clear();
|
265
|
+
entry.set_str("abc", 3);
|
266
|
+
entries.push_back(entry);
|
267
|
+
entry.set_str("bc", 2);
|
268
|
+
entries.push_back(entry);
|
269
|
+
entry.set_str("abc", 3);
|
270
|
+
entries.push_back(entry);
|
271
|
+
entry.set_str("c", 1);
|
272
|
+
entries.push_back(entry);
|
273
|
+
entry.set_str("ABC", 3);
|
274
|
+
entries.push_back(entry);
|
275
|
+
entry.set_str("AB", 2);
|
276
|
+
entries.push_back(entry);
|
277
|
+
|
278
|
+
tail.build(entries, &offsets, MARISA_TEXT_TAIL);
|
279
|
+
std::sort(entries.begin(), entries.end(),
|
280
|
+
marisa::grimoire::trie::Entry::IDComparer());
|
281
|
+
|
282
|
+
ASSERT(tail.size() == 11);
|
283
|
+
ASSERT(offsets.size() == entries.size());
|
284
|
+
for (std::size_t i = 0; i < entries.size(); ++i) {
|
285
|
+
const char * const ptr = &tail[offsets[i]];
|
286
|
+
ASSERT(std::strlen(ptr) == entries[i].length());
|
287
|
+
ASSERT(std::strcmp(ptr, entries[i].ptr()) == 0);
|
288
|
+
}
|
289
|
+
|
290
|
+
{
|
291
|
+
marisa::grimoire::Writer writer;
|
292
|
+
writer.open("trie-test.dat");
|
293
|
+
tail.write(writer);
|
294
|
+
}
|
295
|
+
|
296
|
+
tail.clear();
|
297
|
+
|
298
|
+
ASSERT(tail.size() == 0);
|
299
|
+
ASSERT(tail.total_size() == tail.size());
|
300
|
+
|
301
|
+
{
|
302
|
+
marisa::grimoire::Mapper mapper;
|
303
|
+
mapper.open("trie-test.dat");
|
304
|
+
tail.map(mapper);
|
305
|
+
|
306
|
+
ASSERT(tail.mode() == MARISA_TEXT_TAIL);
|
307
|
+
ASSERT(tail.size() == 11);
|
308
|
+
for (std::size_t i = 0; i < entries.size(); ++i) {
|
309
|
+
const char * const ptr = &tail[offsets[i]];
|
310
|
+
ASSERT(std::strlen(ptr) == entries[i].length());
|
311
|
+
ASSERT(std::strcmp(ptr, entries[i].ptr()) == 0);
|
312
|
+
}
|
313
|
+
tail.clear();
|
314
|
+
}
|
315
|
+
|
316
|
+
{
|
317
|
+
marisa::grimoire::Reader reader;
|
318
|
+
reader.open("trie-test.dat");
|
319
|
+
tail.read(reader);
|
320
|
+
}
|
321
|
+
|
322
|
+
ASSERT(tail.size() == 11);
|
323
|
+
ASSERT(offsets.size() == entries.size());
|
324
|
+
for (std::size_t i = 0; i < entries.size(); ++i) {
|
325
|
+
const char * const ptr = &tail[offsets[i]];
|
326
|
+
ASSERT(std::strlen(ptr) == entries[i].length());
|
327
|
+
ASSERT(std::strcmp(ptr, entries[i].ptr()) == 0);
|
328
|
+
}
|
329
|
+
|
330
|
+
{
|
331
|
+
std::stringstream stream;
|
332
|
+
marisa::grimoire::Writer writer;
|
333
|
+
writer.open(stream);
|
334
|
+
tail.write(writer);
|
335
|
+
tail.clear();
|
336
|
+
marisa::grimoire::Reader reader;
|
337
|
+
reader.open(stream);
|
338
|
+
tail.read(reader);
|
339
|
+
}
|
340
|
+
|
341
|
+
ASSERT(tail.size() == 11);
|
342
|
+
ASSERT(offsets.size() == entries.size());
|
343
|
+
for (std::size_t i = 0; i < entries.size(); ++i) {
|
344
|
+
const char * const ptr = &tail[offsets[i]];
|
345
|
+
ASSERT(std::strlen(ptr) == entries[i].length());
|
346
|
+
ASSERT(std::strcmp(ptr, entries[i].ptr()) == 0);
|
347
|
+
}
|
348
|
+
|
349
|
+
TEST_END();
|
350
|
+
}
|
351
|
+
|
352
|
+
void TestBinaryTail() {
|
353
|
+
TEST_START();
|
354
|
+
|
355
|
+
marisa::grimoire::trie::Tail tail;
|
356
|
+
marisa::grimoire::Vector<marisa::grimoire::trie::Entry> entries;
|
357
|
+
marisa::grimoire::Vector<marisa::UInt32> offsets;
|
358
|
+
tail.build(entries, &offsets, MARISA_BINARY_TAIL);
|
359
|
+
|
360
|
+
ASSERT(tail.mode() == MARISA_TEXT_TAIL);
|
361
|
+
ASSERT(tail.size() == 0);
|
362
|
+
ASSERT(tail.empty());
|
363
|
+
ASSERT(tail.total_size() == tail.size());
|
364
|
+
ASSERT(tail.io_size() == (sizeof(marisa::UInt64) * 6));
|
365
|
+
|
366
|
+
ASSERT(offsets.empty());
|
367
|
+
|
368
|
+
marisa::grimoire::trie::Entry entry;
|
369
|
+
entry.set_str("X", 1);
|
370
|
+
entries.push_back(entry);
|
371
|
+
|
372
|
+
tail.build(entries, &offsets, MARISA_BINARY_TAIL);
|
373
|
+
|
374
|
+
ASSERT(tail.mode() == MARISA_BINARY_TAIL);
|
375
|
+
ASSERT(tail.size() == 1);
|
376
|
+
ASSERT(!tail.empty());
|
377
|
+
ASSERT(tail.total_size() == (tail.size() + sizeof(marisa::UInt64)));
|
378
|
+
ASSERT(tail.io_size() == (sizeof(marisa::UInt64) * 8));
|
379
|
+
|
380
|
+
ASSERT(offsets.size() == entries.size());
|
381
|
+
ASSERT(offsets[0] == 0);
|
382
|
+
|
383
|
+
const char binary_entry[] = { 'N', 'P', '\0', 'T', 'r', 'i', 'e' };
|
384
|
+
entries[0].set_str(binary_entry, sizeof(binary_entry));
|
385
|
+
|
386
|
+
tail.build(entries, &offsets, MARISA_TEXT_TAIL);
|
387
|
+
|
388
|
+
ASSERT(tail.mode() == MARISA_BINARY_TAIL);
|
389
|
+
ASSERT(tail.size() == entries[0].length());
|
390
|
+
|
391
|
+
ASSERT(offsets.size() == entries.size());
|
392
|
+
ASSERT(offsets[0] == 0);
|
393
|
+
|
394
|
+
entries.clear();
|
395
|
+
entry.set_str("abc", 3);
|
396
|
+
entries.push_back(entry);
|
397
|
+
entry.set_str("bc", 2);
|
398
|
+
entries.push_back(entry);
|
399
|
+
entry.set_str("abc", 3);
|
400
|
+
entries.push_back(entry);
|
401
|
+
entry.set_str("c", 1);
|
402
|
+
entries.push_back(entry);
|
403
|
+
entry.set_str("ABC", 3);
|
404
|
+
entries.push_back(entry);
|
405
|
+
entry.set_str("AB", 2);
|
406
|
+
entries.push_back(entry);
|
407
|
+
|
408
|
+
tail.build(entries, &offsets, MARISA_BINARY_TAIL);
|
409
|
+
std::sort(entries.begin(), entries.end(),
|
410
|
+
marisa::grimoire::trie::Entry::IDComparer());
|
411
|
+
|
412
|
+
ASSERT(tail.mode() == MARISA_BINARY_TAIL);
|
413
|
+
ASSERT(tail.size() == 8);
|
414
|
+
ASSERT(offsets.size() == entries.size());
|
415
|
+
for (std::size_t i = 0; i < entries.size(); ++i) {
|
416
|
+
const char * const ptr = &tail[offsets[i]];
|
417
|
+
ASSERT(std::memcmp(ptr, entries[i].ptr(), entries[i].length()) == 0);
|
418
|
+
}
|
419
|
+
|
420
|
+
TEST_END();
|
421
|
+
}
|
422
|
+
|
423
|
+
void TestHistory() {
|
424
|
+
TEST_START();
|
425
|
+
|
426
|
+
marisa::grimoire::trie::History history;
|
427
|
+
|
428
|
+
ASSERT(history.node_id() == 0);
|
429
|
+
ASSERT(history.louds_pos() == 0);
|
430
|
+
ASSERT(history.key_pos() == 0);
|
431
|
+
ASSERT(history.link_id() == MARISA_INVALID_LINK_ID);
|
432
|
+
ASSERT(history.key_id() == MARISA_INVALID_KEY_ID);
|
433
|
+
|
434
|
+
history.set_node_id(100);
|
435
|
+
history.set_louds_pos(200);
|
436
|
+
history.set_key_pos(300);
|
437
|
+
history.set_link_id(400);
|
438
|
+
history.set_key_id(500);
|
439
|
+
|
440
|
+
ASSERT(history.node_id() == 100);
|
441
|
+
ASSERT(history.louds_pos() == 200);
|
442
|
+
ASSERT(history.key_pos() == 300);
|
443
|
+
ASSERT(history.link_id() == 400);
|
444
|
+
ASSERT(history.key_id() == 500);
|
445
|
+
|
446
|
+
TEST_END();
|
447
|
+
}
|
448
|
+
|
449
|
+
void TestState() {
|
450
|
+
TEST_START();
|
451
|
+
|
452
|
+
marisa::grimoire::trie::State state;
|
453
|
+
|
454
|
+
ASSERT(state.key_buf().empty());
|
455
|
+
ASSERT(state.history().empty());
|
456
|
+
ASSERT(state.node_id() == 0);
|
457
|
+
ASSERT(state.query_pos() == 0);
|
458
|
+
ASSERT(state.history_pos() == 0);
|
459
|
+
ASSERT(state.status_code() == marisa::grimoire::trie::MARISA_READY_TO_ALL);
|
460
|
+
|
461
|
+
state.set_node_id(10);
|
462
|
+
state.set_query_pos(100);
|
463
|
+
state.set_history_pos(1000);
|
464
|
+
state.set_status_code(
|
465
|
+
marisa::grimoire::trie::MARISA_END_OF_PREDICTIVE_SEARCH);
|
466
|
+
|
467
|
+
ASSERT(state.node_id() == 10);
|
468
|
+
ASSERT(state.query_pos() == 100);
|
469
|
+
ASSERT(state.history_pos() == 1000);
|
470
|
+
ASSERT(state.status_code() ==
|
471
|
+
marisa::grimoire::trie::MARISA_END_OF_PREDICTIVE_SEARCH);
|
472
|
+
|
473
|
+
state.lookup_init();
|
474
|
+
ASSERT(state.status_code() == marisa::grimoire::trie::MARISA_READY_TO_ALL);
|
475
|
+
|
476
|
+
state.reverse_lookup_init();
|
477
|
+
ASSERT(state.status_code() == marisa::grimoire::trie::MARISA_READY_TO_ALL);
|
478
|
+
|
479
|
+
state.common_prefix_search_init();
|
480
|
+
ASSERT(state.status_code() ==
|
481
|
+
marisa::grimoire::trie::MARISA_READY_TO_COMMON_PREFIX_SEARCH);
|
482
|
+
|
483
|
+
state.predictive_search_init();
|
484
|
+
ASSERT(state.status_code() ==
|
485
|
+
marisa::grimoire::trie::MARISA_READY_TO_PREDICTIVE_SEARCH);
|
486
|
+
|
487
|
+
TEST_END();
|
488
|
+
}
|
489
|
+
|
490
|
+
} // namespace
|
491
|
+
|
492
|
+
int main() try {
|
493
|
+
TestConfig();
|
494
|
+
TestHeader();
|
495
|
+
TestKey();
|
496
|
+
TestRange();
|
497
|
+
TestEntry();
|
498
|
+
TestTextTail();
|
499
|
+
TestBinaryTail();
|
500
|
+
TestHistory();
|
501
|
+
TestState();
|
502
|
+
|
503
|
+
return 0;
|
504
|
+
} catch (const marisa::Exception &ex) {
|
505
|
+
std::cerr << ex.what() << std::endl;
|
506
|
+
throw;
|
507
|
+
}
|