melisa 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. data/README.md +11 -0
  2. data/ext/marisa/bindings/marisa-swig.cxx +253 -0
  3. data/ext/marisa/bindings/marisa-swig.h +183 -0
  4. data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
  5. data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
  6. data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
  7. data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
  8. data/ext/marisa/bindings/python/marisa-swig.h +183 -0
  9. data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
  10. data/ext/marisa/bindings/ruby/extconf.rb +5 -0
  11. data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
  12. data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
  13. data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
  14. data/ext/marisa/lib/marisa.h +14 -0
  15. data/ext/marisa/lib/marisa/agent.cc +51 -0
  16. data/ext/marisa/lib/marisa/agent.h +73 -0
  17. data/ext/marisa/lib/marisa/base.h +193 -0
  18. data/ext/marisa/lib/marisa/exception.h +82 -0
  19. data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
  20. data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
  21. data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
  22. data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
  23. data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
  24. data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
  25. data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
  26. data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
  27. data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
  28. data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
  29. data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
  30. data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
  31. data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
  32. data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
  33. data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
  34. data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
  35. data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
  36. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
  37. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
  38. data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
  39. data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
  40. data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
  41. data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
  42. data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
  43. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
  44. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
  45. data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
  46. data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
  47. data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
  48. data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
  49. data/ext/marisa/lib/marisa/iostream.h +18 -0
  50. data/ext/marisa/lib/marisa/key.h +85 -0
  51. data/ext/marisa/lib/marisa/keyset.cc +181 -0
  52. data/ext/marisa/lib/marisa/keyset.h +80 -0
  53. data/ext/marisa/lib/marisa/query.h +71 -0
  54. data/ext/marisa/lib/marisa/scoped-array.h +48 -0
  55. data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
  56. data/ext/marisa/lib/marisa/stdio.h +15 -0
  57. data/ext/marisa/lib/marisa/trie.cc +249 -0
  58. data/ext/marisa/lib/marisa/trie.h +64 -0
  59. data/ext/marisa/tests/base-test.cc +309 -0
  60. data/ext/marisa/tests/io-test.cc +252 -0
  61. data/ext/marisa/tests/marisa-assert.h +26 -0
  62. data/ext/marisa/tests/marisa-test.cc +388 -0
  63. data/ext/marisa/tests/trie-test.cc +507 -0
  64. data/ext/marisa/tests/vector-test.cc +466 -0
  65. data/ext/marisa/tools/cmdopt.cc +298 -0
  66. data/ext/marisa/tools/cmdopt.h +58 -0
  67. data/ext/marisa/tools/marisa-benchmark.cc +418 -0
  68. data/ext/marisa/tools/marisa-build.cc +206 -0
  69. data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
  70. data/ext/marisa/tools/marisa-dump.cc +151 -0
  71. data/ext/marisa/tools/marisa-lookup.cc +110 -0
  72. data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
  73. data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
  74. data/lib/melisa.rb +7 -0
  75. data/lib/melisa/base_config_flags.rb +76 -0
  76. data/lib/melisa/bytes_trie.rb +55 -0
  77. data/lib/melisa/int_trie.rb +14 -0
  78. data/lib/melisa/search.rb +55 -0
  79. data/lib/melisa/trie.rb +96 -0
  80. data/lib/melisa/version.rb +3 -0
  81. data/melisa.gemspec +36 -0
  82. data/spec/base_config_flags_spec.rb +73 -0
  83. data/spec/bytes_trie_spec.rb +16 -0
  84. data/spec/int_trie_spec.rb +16 -0
  85. data/spec/search_spec.rb +29 -0
  86. data/spec/spec_helper.rb +1 -0
  87. data/spec/trie_spec.rb +30 -0
  88. metadata +207 -0
data/README.md ADDED
@@ -0,0 +1,11 @@
1
+ Features
2
+ --------
3
+ - fast search for exact strings and prefixes
4
+ - has a BytesTrie that can be used to store binary data
5
+ - has an IntTrie that can be used to store integer values easily
6
+
7
+ TODO
8
+ ----
9
+ - Ruby bindings aren't yet built into the gem
10
+
11
+ License: MIT
@@ -0,0 +1,253 @@
1
+ #include <cstring>
2
+ #include <new>
3
+
4
+ #include "marisa-swig.h"
5
+
6
+ namespace marisa_swig {
7
+
8
+ void Key::str(const char **ptr_out, size_t *length_out) const {
9
+ *ptr_out = key_.ptr();
10
+ *length_out = key_.length();
11
+ }
12
+
13
+ size_t Key::id() const {
14
+ return key_.id();
15
+ }
16
+
17
+ float Key::weight() const {
18
+ return key_.weight();
19
+ }
20
+
21
+ void Query::str(const char **ptr_out, size_t *length_out) const {
22
+ *ptr_out = query_.ptr();
23
+ *length_out = query_.length();
24
+ }
25
+
26
+ size_t Query::id() const {
27
+ return query_.id();
28
+ }
29
+
30
+ Keyset::Keyset() : keyset_(new (std::nothrow) marisa::Keyset) {
31
+ MARISA_THROW_IF(keyset_ == NULL, ::MARISA_MEMORY_ERROR);
32
+ }
33
+
34
+ Keyset::~Keyset() {
35
+ delete keyset_;
36
+ }
37
+
38
+ void Keyset::push_back(const marisa::Key &key) {
39
+ keyset_->push_back(key);
40
+ }
41
+
42
+ void Keyset::push_back(const char *ptr, size_t length, float weight) {
43
+ keyset_->push_back(ptr, length, weight);
44
+ }
45
+
46
+ const Key &Keyset::key(size_t i) const {
47
+ return reinterpret_cast<const Key &>((*keyset_)[i]);
48
+ }
49
+
50
+ void Keyset::key_str(size_t i,
51
+ const char **ptr_out, size_t *length_out) const {
52
+ *ptr_out = (*keyset_)[i].ptr();
53
+ *length_out = (*keyset_)[i].length();
54
+ }
55
+
56
+ size_t Keyset::key_id(size_t i) const {
57
+ return (*keyset_)[i].id();
58
+ }
59
+
60
+ size_t Keyset::num_keys() const {
61
+ return keyset_->num_keys();
62
+ }
63
+
64
+ bool Keyset::empty() const {
65
+ return keyset_->empty();
66
+ }
67
+
68
+ size_t Keyset::size() const {
69
+ return keyset_->size();
70
+ }
71
+
72
+ size_t Keyset::total_length() const {
73
+ return keyset_->total_length();
74
+ }
75
+
76
+ void Keyset::reset() {
77
+ keyset_->reset();
78
+ }
79
+
80
+ void Keyset::clear() {
81
+ keyset_->clear();
82
+ }
83
+
84
+ Agent::Agent()
85
+ : agent_(new (std::nothrow) marisa::Agent), buf_(NULL), buf_size_(0) {
86
+ MARISA_THROW_IF(agent_ == NULL, ::MARISA_MEMORY_ERROR);
87
+ }
88
+
89
+ Agent::~Agent() {
90
+ delete agent_;
91
+ delete [] buf_;
92
+ }
93
+
94
+ void Agent::set_query(const char *ptr, size_t length) {
95
+ if (length > buf_size_) {
96
+ size_t new_buf_size = (buf_size_ != 0) ? buf_size_ : 1;
97
+ if (length >= (MARISA_SIZE_MAX / 2)) {
98
+ new_buf_size = MARISA_SIZE_MAX;
99
+ } else {
100
+ while (new_buf_size < length) {
101
+ new_buf_size *= 2;
102
+ }
103
+ }
104
+ char *new_buf = new (std::nothrow) char[new_buf_size];
105
+ MARISA_THROW_IF(new_buf == NULL, MARISA_MEMORY_ERROR);
106
+ delete [] buf_;
107
+ buf_ = new_buf;
108
+ buf_size_ = new_buf_size;
109
+ }
110
+ std::memcpy(buf_, ptr, length);
111
+ agent_->set_query(buf_, length);
112
+ }
113
+
114
+ void Agent::set_query(size_t id) {
115
+ agent_->set_query(id);
116
+ }
117
+
118
+ const Key &Agent::key() const {
119
+ return reinterpret_cast<const Key &>(agent_->key());
120
+ }
121
+
122
+ const Query &Agent::query() const {
123
+ return reinterpret_cast<const Query &>(agent_->query());
124
+ }
125
+
126
+ void Agent::key_str(const char **ptr_out, size_t *length_out) const {
127
+ *ptr_out = agent_->key().ptr();
128
+ *length_out = agent_->key().length();
129
+ }
130
+
131
+ size_t Agent::key_id() const {
132
+ return agent_->key().id();
133
+ }
134
+
135
+ void Agent::query_str(const char **ptr_out, size_t *length_out) const {
136
+ *ptr_out = agent_->query().ptr();
137
+ *length_out = agent_->query().length();
138
+ }
139
+
140
+ size_t Agent::query_id() const {
141
+ return agent_->query().id();
142
+ }
143
+
144
+ Trie::Trie() : trie_(new (std::nothrow) marisa::Trie) {
145
+ MARISA_THROW_IF(trie_ == NULL, ::MARISA_MEMORY_ERROR);
146
+ }
147
+
148
+ Trie::~Trie() {
149
+ delete trie_;
150
+ }
151
+
152
+ void Trie::build(Keyset &keyset, int config_flags) {
153
+ trie_->build(*keyset.keyset_, config_flags);
154
+ }
155
+
156
+ void Trie::mmap(const char *filename) {
157
+ trie_->mmap(filename);
158
+ }
159
+
160
+ void Trie::load(const char *filename) {
161
+ trie_->load(filename);
162
+ }
163
+
164
+ void Trie::save(const char *filename) const {
165
+ trie_->save(filename);
166
+ }
167
+
168
+ bool Trie::lookup(Agent &agent) const {
169
+ return trie_->lookup(*agent.agent_);
170
+ }
171
+
172
+ void Trie::reverse_lookup(Agent &agent) const {
173
+ trie_->reverse_lookup(*agent.agent_);
174
+ }
175
+
176
+ bool Trie::common_prefix_search(Agent &agent) const {
177
+ return trie_->common_prefix_search(*agent.agent_);
178
+ }
179
+
180
+ bool Trie::predictive_search(Agent &agent) const {
181
+ return trie_->predictive_search(*agent.agent_);
182
+ }
183
+
184
+ size_t Trie::lookup(const char *ptr, size_t length) const {
185
+ marisa::Agent agent;
186
+ agent.set_query(ptr, length);
187
+ if (!trie_->lookup(agent)) {
188
+ return MARISA_INVALID_KEY_ID;
189
+ }
190
+ return agent.key().id();
191
+ }
192
+
193
+ void Trie::reverse_lookup(size_t id,
194
+ const char **ptr_out_to_be_deleted, size_t *length_out) const {
195
+ marisa::Agent agent;
196
+ agent.set_query(id);
197
+ trie_->reverse_lookup(agent);
198
+ char * const buf = new (std::nothrow) char[agent.key().length()];
199
+ MARISA_THROW_IF(buf == NULL, MARISA_MEMORY_ERROR);
200
+ std::memcpy(buf, agent.key().ptr(), agent.key().length());
201
+ *ptr_out_to_be_deleted = buf;
202
+ *length_out = agent.key().length();
203
+ }
204
+
205
+ size_t Trie::num_tries() const {
206
+ return trie_->num_tries();
207
+ }
208
+
209
+ size_t Trie::num_keys() const {
210
+ return trie_->num_keys();
211
+ }
212
+
213
+ size_t Trie::num_nodes() const {
214
+ return trie_->num_nodes();
215
+ }
216
+
217
+ TailMode Trie::tail_mode() const {
218
+ if (trie_->tail_mode() == ::MARISA_TEXT_TAIL) {
219
+ return TEXT_TAIL;
220
+ } else {
221
+ return BINARY_TAIL;
222
+ }
223
+ }
224
+
225
+ NodeOrder Trie::node_order() const {
226
+ if (trie_->node_order() == ::MARISA_LABEL_ORDER) {
227
+ return LABEL_ORDER;
228
+ } else {
229
+ return WEIGHT_ORDER;
230
+ }
231
+ }
232
+
233
+ bool Trie::empty() const {
234
+ return trie_->empty();
235
+ }
236
+
237
+ size_t Trie::size() const {
238
+ return trie_->size();
239
+ }
240
+
241
+ size_t Trie::total_size() const {
242
+ return trie_->total_size();
243
+ }
244
+
245
+ size_t Trie::io_size() const {
246
+ return trie_->io_size();
247
+ }
248
+
249
+ void Trie::clear() {
250
+ trie_->clear();
251
+ }
252
+
253
+ } // namespace marisa_swig
@@ -0,0 +1,183 @@
1
+ #ifndef MARISA_SWIG_H_
2
+ #define MARISA_SWIG_H_
3
+
4
+ #include <marisa.h>
5
+
6
+ namespace marisa_swig {
7
+
8
+ #define MARISA_SWIG_ENUM_COPY(name) name = MARISA_ ## name
9
+
10
+ enum ErrorCode {
11
+ MARISA_SWIG_ENUM_COPY(OK),
12
+ MARISA_SWIG_ENUM_COPY(STATE_ERROR),
13
+ MARISA_SWIG_ENUM_COPY(NULL_ERROR),
14
+ MARISA_SWIG_ENUM_COPY(BOUND_ERROR),
15
+ MARISA_SWIG_ENUM_COPY(RANGE_ERROR),
16
+ MARISA_SWIG_ENUM_COPY(CODE_ERROR),
17
+ MARISA_SWIG_ENUM_COPY(RESET_ERROR),
18
+ MARISA_SWIG_ENUM_COPY(SIZE_ERROR),
19
+ MARISA_SWIG_ENUM_COPY(MEMORY_ERROR),
20
+ MARISA_SWIG_ENUM_COPY(IO_ERROR),
21
+ MARISA_SWIG_ENUM_COPY(FORMAT_ERROR)
22
+ };
23
+
24
+ enum NumTries {
25
+ MARISA_SWIG_ENUM_COPY(MIN_NUM_TRIES),
26
+ MARISA_SWIG_ENUM_COPY(MAX_NUM_TRIES),
27
+ MARISA_SWIG_ENUM_COPY(DEFAULT_NUM_TRIES)
28
+ };
29
+
30
+ enum CacheLevel {
31
+ MARISA_SWIG_ENUM_COPY(HUGE_CACHE),
32
+ MARISA_SWIG_ENUM_COPY(LARGE_CACHE),
33
+ MARISA_SWIG_ENUM_COPY(NORMAL_CACHE),
34
+ MARISA_SWIG_ENUM_COPY(SMALL_CACHE),
35
+ MARISA_SWIG_ENUM_COPY(TINY_CACHE),
36
+ MARISA_SWIG_ENUM_COPY(DEFAULT_CACHE)
37
+ };
38
+
39
+ enum TailMode {
40
+ MARISA_SWIG_ENUM_COPY(TEXT_TAIL),
41
+ MARISA_SWIG_ENUM_COPY(BINARY_TAIL),
42
+ MARISA_SWIG_ENUM_COPY(DEFAULT_TAIL)
43
+ };
44
+
45
+ enum NodeOrder {
46
+ MARISA_SWIG_ENUM_COPY(LABEL_ORDER),
47
+ MARISA_SWIG_ENUM_COPY(WEIGHT_ORDER),
48
+ MARISA_SWIG_ENUM_COPY(DEFAULT_ORDER)
49
+ };
50
+
51
+ #undef MARISA_SWIG_ENUM_COPY
52
+
53
+ class Key {
54
+ public:
55
+ void str(const char **ptr_out, std::size_t *length_out) const;
56
+ std::size_t id() const;
57
+ float weight() const;
58
+
59
+ private:
60
+ const marisa::Key key_;
61
+
62
+ Key();
63
+ Key(const Key &key);
64
+ Key &operator=(const Key &);
65
+ };
66
+
67
+ class Query {
68
+ public:
69
+ void str(const char **ptr_out, std::size_t *length_out) const;
70
+ std::size_t id() const;
71
+
72
+ private:
73
+ const marisa::Query query_;
74
+
75
+ Query();
76
+ Query(const Query &query);
77
+ Query &operator=(const Query &);
78
+ };
79
+
80
+ class Keyset {
81
+ friend class Trie;
82
+
83
+ public:
84
+ Keyset();
85
+ ~Keyset();
86
+
87
+ void push_back(const marisa::Key &key);
88
+ void push_back(const char *ptr, std::size_t length, float weight = 1.0);
89
+
90
+ const Key &key(std::size_t i) const;
91
+
92
+ void key_str(std::size_t i,
93
+ const char **ptr_out, std::size_t *length_out) const;
94
+ std::size_t key_id(std::size_t i) const;
95
+
96
+ std::size_t num_keys() const;
97
+
98
+ bool empty() const;
99
+ std::size_t size() const;
100
+ std::size_t total_length() const;
101
+
102
+ void reset();
103
+ void clear();
104
+
105
+ private:
106
+ marisa::Keyset *keyset_;
107
+
108
+ Keyset(const Keyset &);
109
+ Keyset &operator=(const Keyset &);
110
+ };
111
+
112
+ class Agent {
113
+ friend class Trie;
114
+
115
+ public:
116
+ Agent();
117
+ ~Agent();
118
+
119
+ void set_query(const char *ptr, std::size_t length);
120
+ void set_query(std::size_t id);
121
+
122
+ const Key &key() const;
123
+ const Query &query() const;
124
+
125
+ void key_str(const char **ptr_out, std::size_t *length_out) const;
126
+ std::size_t key_id() const;
127
+
128
+ void query_str(const char **ptr_out, std::size_t *length_out) const;
129
+ std::size_t query_id() const;
130
+
131
+ private:
132
+ marisa::Agent *agent_;
133
+ char *buf_;
134
+ std::size_t buf_size_;
135
+
136
+ Agent(const Agent &);
137
+ Agent &operator=(const Agent &);
138
+ };
139
+
140
+ class Trie {
141
+ public:
142
+ Trie();
143
+ ~Trie();
144
+
145
+ void build(Keyset &keyset, int config_flags = 0);
146
+
147
+ void mmap(const char *filename);
148
+ void load(const char *filename);
149
+ void save(const char *filename) const;
150
+
151
+ bool lookup(Agent &agent) const;
152
+ void reverse_lookup(Agent &agent) const;
153
+ bool common_prefix_search(Agent &agent) const;
154
+ bool predictive_search(Agent &agent) const;
155
+
156
+ std::size_t lookup(const char *ptr, std::size_t length) const;
157
+ void reverse_lookup(std::size_t id,
158
+ const char **ptr_out_to_be_deleted, std::size_t *length_out) const;
159
+
160
+ std::size_t num_tries() const;
161
+ std::size_t num_keys() const;
162
+ std::size_t num_nodes() const;
163
+
164
+ TailMode tail_mode() const;
165
+ NodeOrder node_order() const;
166
+
167
+ bool empty() const;
168
+ std::size_t size() const;
169
+ std::size_t total_size() const;
170
+ std::size_t io_size() const;
171
+
172
+ void clear();
173
+
174
+ private:
175
+ marisa::Trie *trie_;
176
+
177
+ Trie(const Trie &);
178
+ Trie &operator=(const Trie &);
179
+ };
180
+
181
+ } // namespace marisa_swig
182
+
183
+ #endif // MARISA_SWIG_H_
@@ -0,0 +1,253 @@
1
+ #include <cstring>
2
+ #include <new>
3
+
4
+ #include "marisa-swig.h"
5
+
6
+ namespace marisa_swig {
7
+
8
+ void Key::str(const char **ptr_out, size_t *length_out) const {
9
+ *ptr_out = key_.ptr();
10
+ *length_out = key_.length();
11
+ }
12
+
13
+ size_t Key::id() const {
14
+ return key_.id();
15
+ }
16
+
17
+ float Key::weight() const {
18
+ return key_.weight();
19
+ }
20
+
21
+ void Query::str(const char **ptr_out, size_t *length_out) const {
22
+ *ptr_out = query_.ptr();
23
+ *length_out = query_.length();
24
+ }
25
+
26
+ size_t Query::id() const {
27
+ return query_.id();
28
+ }
29
+
30
+ Keyset::Keyset() : keyset_(new (std::nothrow) marisa::Keyset) {
31
+ MARISA_THROW_IF(keyset_ == NULL, ::MARISA_MEMORY_ERROR);
32
+ }
33
+
34
+ Keyset::~Keyset() {
35
+ delete keyset_;
36
+ }
37
+
38
+ void Keyset::push_back(const marisa::Key &key) {
39
+ keyset_->push_back(key);
40
+ }
41
+
42
+ void Keyset::push_back(const char *ptr, size_t length, float weight) {
43
+ keyset_->push_back(ptr, length, weight);
44
+ }
45
+
46
+ const Key &Keyset::key(size_t i) const {
47
+ return reinterpret_cast<const Key &>((*keyset_)[i]);
48
+ }
49
+
50
+ void Keyset::key_str(size_t i,
51
+ const char **ptr_out, size_t *length_out) const {
52
+ *ptr_out = (*keyset_)[i].ptr();
53
+ *length_out = (*keyset_)[i].length();
54
+ }
55
+
56
+ size_t Keyset::key_id(size_t i) const {
57
+ return (*keyset_)[i].id();
58
+ }
59
+
60
+ size_t Keyset::num_keys() const {
61
+ return keyset_->num_keys();
62
+ }
63
+
64
+ bool Keyset::empty() const {
65
+ return keyset_->empty();
66
+ }
67
+
68
+ size_t Keyset::size() const {
69
+ return keyset_->size();
70
+ }
71
+
72
+ size_t Keyset::total_length() const {
73
+ return keyset_->total_length();
74
+ }
75
+
76
+ void Keyset::reset() {
77
+ keyset_->reset();
78
+ }
79
+
80
+ void Keyset::clear() {
81
+ keyset_->clear();
82
+ }
83
+
84
+ Agent::Agent()
85
+ : agent_(new (std::nothrow) marisa::Agent), buf_(NULL), buf_size_(0) {
86
+ MARISA_THROW_IF(agent_ == NULL, ::MARISA_MEMORY_ERROR);
87
+ }
88
+
89
+ Agent::~Agent() {
90
+ delete agent_;
91
+ delete [] buf_;
92
+ }
93
+
94
+ void Agent::set_query(const char *ptr, size_t length) {
95
+ if (length > buf_size_) {
96
+ size_t new_buf_size = (buf_size_ != 0) ? buf_size_ : 1;
97
+ if (length >= (MARISA_SIZE_MAX / 2)) {
98
+ new_buf_size = MARISA_SIZE_MAX;
99
+ } else {
100
+ while (new_buf_size < length) {
101
+ new_buf_size *= 2;
102
+ }
103
+ }
104
+ char *new_buf = new (std::nothrow) char[new_buf_size];
105
+ MARISA_THROW_IF(new_buf == NULL, MARISA_MEMORY_ERROR);
106
+ delete [] buf_;
107
+ buf_ = new_buf;
108
+ buf_size_ = new_buf_size;
109
+ }
110
+ std::memcpy(buf_, ptr, length);
111
+ agent_->set_query(buf_, length);
112
+ }
113
+
114
+ void Agent::set_query(size_t id) {
115
+ agent_->set_query(id);
116
+ }
117
+
118
+ const Key &Agent::key() const {
119
+ return reinterpret_cast<const Key &>(agent_->key());
120
+ }
121
+
122
+ const Query &Agent::query() const {
123
+ return reinterpret_cast<const Query &>(agent_->query());
124
+ }
125
+
126
+ void Agent::key_str(const char **ptr_out, size_t *length_out) const {
127
+ *ptr_out = agent_->key().ptr();
128
+ *length_out = agent_->key().length();
129
+ }
130
+
131
+ size_t Agent::key_id() const {
132
+ return agent_->key().id();
133
+ }
134
+
135
+ void Agent::query_str(const char **ptr_out, size_t *length_out) const {
136
+ *ptr_out = agent_->query().ptr();
137
+ *length_out = agent_->query().length();
138
+ }
139
+
140
+ size_t Agent::query_id() const {
141
+ return agent_->query().id();
142
+ }
143
+
144
+ Trie::Trie() : trie_(new (std::nothrow) marisa::Trie) {
145
+ MARISA_THROW_IF(trie_ == NULL, ::MARISA_MEMORY_ERROR);
146
+ }
147
+
148
+ Trie::~Trie() {
149
+ delete trie_;
150
+ }
151
+
152
+ void Trie::build(Keyset &keyset, int config_flags) {
153
+ trie_->build(*keyset.keyset_, config_flags);
154
+ }
155
+
156
+ void Trie::mmap(const char *filename) {
157
+ trie_->mmap(filename);
158
+ }
159
+
160
+ void Trie::load(const char *filename) {
161
+ trie_->load(filename);
162
+ }
163
+
164
+ void Trie::save(const char *filename) const {
165
+ trie_->save(filename);
166
+ }
167
+
168
+ bool Trie::lookup(Agent &agent) const {
169
+ return trie_->lookup(*agent.agent_);
170
+ }
171
+
172
+ void Trie::reverse_lookup(Agent &agent) const {
173
+ trie_->reverse_lookup(*agent.agent_);
174
+ }
175
+
176
+ bool Trie::common_prefix_search(Agent &agent) const {
177
+ return trie_->common_prefix_search(*agent.agent_);
178
+ }
179
+
180
+ bool Trie::predictive_search(Agent &agent) const {
181
+ return trie_->predictive_search(*agent.agent_);
182
+ }
183
+
184
+ size_t Trie::lookup(const char *ptr, size_t length) const {
185
+ marisa::Agent agent;
186
+ agent.set_query(ptr, length);
187
+ if (!trie_->lookup(agent)) {
188
+ return MARISA_INVALID_KEY_ID;
189
+ }
190
+ return agent.key().id();
191
+ }
192
+
193
+ void Trie::reverse_lookup(size_t id,
194
+ const char **ptr_out_to_be_deleted, size_t *length_out) const {
195
+ marisa::Agent agent;
196
+ agent.set_query(id);
197
+ trie_->reverse_lookup(agent);
198
+ char * const buf = new (std::nothrow) char[agent.key().length()];
199
+ MARISA_THROW_IF(buf == NULL, MARISA_MEMORY_ERROR);
200
+ std::memcpy(buf, agent.key().ptr(), agent.key().length());
201
+ *ptr_out_to_be_deleted = buf;
202
+ *length_out = agent.key().length();
203
+ }
204
+
205
+ size_t Trie::num_tries() const {
206
+ return trie_->num_tries();
207
+ }
208
+
209
+ size_t Trie::num_keys() const {
210
+ return trie_->num_keys();
211
+ }
212
+
213
+ size_t Trie::num_nodes() const {
214
+ return trie_->num_nodes();
215
+ }
216
+
217
+ TailMode Trie::tail_mode() const {
218
+ if (trie_->tail_mode() == ::MARISA_TEXT_TAIL) {
219
+ return TEXT_TAIL;
220
+ } else {
221
+ return BINARY_TAIL;
222
+ }
223
+ }
224
+
225
+ NodeOrder Trie::node_order() const {
226
+ if (trie_->node_order() == ::MARISA_LABEL_ORDER) {
227
+ return LABEL_ORDER;
228
+ } else {
229
+ return WEIGHT_ORDER;
230
+ }
231
+ }
232
+
233
+ bool Trie::empty() const {
234
+ return trie_->empty();
235
+ }
236
+
237
+ size_t Trie::size() const {
238
+ return trie_->size();
239
+ }
240
+
241
+ size_t Trie::total_size() const {
242
+ return trie_->total_size();
243
+ }
244
+
245
+ size_t Trie::io_size() const {
246
+ return trie_->io_size();
247
+ }
248
+
249
+ void Trie::clear() {
250
+ trie_->clear();
251
+ }
252
+
253
+ } // namespace marisa_swig