melisa 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. data/README.md +11 -0
  2. data/ext/marisa/bindings/marisa-swig.cxx +253 -0
  3. data/ext/marisa/bindings/marisa-swig.h +183 -0
  4. data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
  5. data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
  6. data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
  7. data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
  8. data/ext/marisa/bindings/python/marisa-swig.h +183 -0
  9. data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
  10. data/ext/marisa/bindings/ruby/extconf.rb +5 -0
  11. data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
  12. data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
  13. data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
  14. data/ext/marisa/lib/marisa.h +14 -0
  15. data/ext/marisa/lib/marisa/agent.cc +51 -0
  16. data/ext/marisa/lib/marisa/agent.h +73 -0
  17. data/ext/marisa/lib/marisa/base.h +193 -0
  18. data/ext/marisa/lib/marisa/exception.h +82 -0
  19. data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
  20. data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
  21. data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
  22. data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
  23. data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
  24. data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
  25. data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
  26. data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
  27. data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
  28. data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
  29. data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
  30. data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
  31. data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
  32. data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
  33. data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
  34. data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
  35. data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
  36. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
  37. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
  38. data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
  39. data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
  40. data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
  41. data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
  42. data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
  43. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
  44. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
  45. data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
  46. data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
  47. data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
  48. data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
  49. data/ext/marisa/lib/marisa/iostream.h +18 -0
  50. data/ext/marisa/lib/marisa/key.h +85 -0
  51. data/ext/marisa/lib/marisa/keyset.cc +181 -0
  52. data/ext/marisa/lib/marisa/keyset.h +80 -0
  53. data/ext/marisa/lib/marisa/query.h +71 -0
  54. data/ext/marisa/lib/marisa/scoped-array.h +48 -0
  55. data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
  56. data/ext/marisa/lib/marisa/stdio.h +15 -0
  57. data/ext/marisa/lib/marisa/trie.cc +249 -0
  58. data/ext/marisa/lib/marisa/trie.h +64 -0
  59. data/ext/marisa/tests/base-test.cc +309 -0
  60. data/ext/marisa/tests/io-test.cc +252 -0
  61. data/ext/marisa/tests/marisa-assert.h +26 -0
  62. data/ext/marisa/tests/marisa-test.cc +388 -0
  63. data/ext/marisa/tests/trie-test.cc +507 -0
  64. data/ext/marisa/tests/vector-test.cc +466 -0
  65. data/ext/marisa/tools/cmdopt.cc +298 -0
  66. data/ext/marisa/tools/cmdopt.h +58 -0
  67. data/ext/marisa/tools/marisa-benchmark.cc +418 -0
  68. data/ext/marisa/tools/marisa-build.cc +206 -0
  69. data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
  70. data/ext/marisa/tools/marisa-dump.cc +151 -0
  71. data/ext/marisa/tools/marisa-lookup.cc +110 -0
  72. data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
  73. data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
  74. data/lib/melisa.rb +7 -0
  75. data/lib/melisa/base_config_flags.rb +76 -0
  76. data/lib/melisa/bytes_trie.rb +55 -0
  77. data/lib/melisa/int_trie.rb +14 -0
  78. data/lib/melisa/search.rb +55 -0
  79. data/lib/melisa/trie.rb +96 -0
  80. data/lib/melisa/version.rb +3 -0
  81. data/melisa.gemspec +36 -0
  82. data/spec/base_config_flags_spec.rb +73 -0
  83. data/spec/bytes_trie_spec.rb +16 -0
  84. data/spec/int_trie_spec.rb +16 -0
  85. data/spec/search_spec.rb +29 -0
  86. data/spec/spec_helper.rb +1 -0
  87. data/spec/trie_spec.rb +30 -0
  88. metadata +207 -0
data/README.md ADDED
@@ -0,0 +1,11 @@
1
+ Features
2
+ --------
3
+ - fast search for exact strings and prefixes
4
+ - has a BytesTrie that can be used to store binary data
5
+ - has an IntTrie that can be used to store integer values easily
6
+
7
+ TODO
8
+ ----
9
+ - Ruby bindings aren't yet built into the gem
10
+
11
+ License: MIT
@@ -0,0 +1,253 @@
1
+ #include <cstring>
2
+ #include <new>
3
+
4
+ #include "marisa-swig.h"
5
+
6
+ namespace marisa_swig {
7
+
8
+ void Key::str(const char **ptr_out, size_t *length_out) const {
9
+ *ptr_out = key_.ptr();
10
+ *length_out = key_.length();
11
+ }
12
+
13
+ size_t Key::id() const {
14
+ return key_.id();
15
+ }
16
+
17
+ float Key::weight() const {
18
+ return key_.weight();
19
+ }
20
+
21
+ void Query::str(const char **ptr_out, size_t *length_out) const {
22
+ *ptr_out = query_.ptr();
23
+ *length_out = query_.length();
24
+ }
25
+
26
+ size_t Query::id() const {
27
+ return query_.id();
28
+ }
29
+
30
+ Keyset::Keyset() : keyset_(new (std::nothrow) marisa::Keyset) {
31
+ MARISA_THROW_IF(keyset_ == NULL, ::MARISA_MEMORY_ERROR);
32
+ }
33
+
34
+ Keyset::~Keyset() {
35
+ delete keyset_;
36
+ }
37
+
38
+ void Keyset::push_back(const marisa::Key &key) {
39
+ keyset_->push_back(key);
40
+ }
41
+
42
+ void Keyset::push_back(const char *ptr, size_t length, float weight) {
43
+ keyset_->push_back(ptr, length, weight);
44
+ }
45
+
46
+ const Key &Keyset::key(size_t i) const {
47
+ return reinterpret_cast<const Key &>((*keyset_)[i]);
48
+ }
49
+
50
+ void Keyset::key_str(size_t i,
51
+ const char **ptr_out, size_t *length_out) const {
52
+ *ptr_out = (*keyset_)[i].ptr();
53
+ *length_out = (*keyset_)[i].length();
54
+ }
55
+
56
+ size_t Keyset::key_id(size_t i) const {
57
+ return (*keyset_)[i].id();
58
+ }
59
+
60
+ size_t Keyset::num_keys() const {
61
+ return keyset_->num_keys();
62
+ }
63
+
64
+ bool Keyset::empty() const {
65
+ return keyset_->empty();
66
+ }
67
+
68
+ size_t Keyset::size() const {
69
+ return keyset_->size();
70
+ }
71
+
72
+ size_t Keyset::total_length() const {
73
+ return keyset_->total_length();
74
+ }
75
+
76
+ void Keyset::reset() {
77
+ keyset_->reset();
78
+ }
79
+
80
+ void Keyset::clear() {
81
+ keyset_->clear();
82
+ }
83
+
84
+ Agent::Agent()
85
+ : agent_(new (std::nothrow) marisa::Agent), buf_(NULL), buf_size_(0) {
86
+ MARISA_THROW_IF(agent_ == NULL, ::MARISA_MEMORY_ERROR);
87
+ }
88
+
89
+ Agent::~Agent() {
90
+ delete agent_;
91
+ delete [] buf_;
92
+ }
93
+
94
+ void Agent::set_query(const char *ptr, size_t length) {
95
+ if (length > buf_size_) {
96
+ size_t new_buf_size = (buf_size_ != 0) ? buf_size_ : 1;
97
+ if (length >= (MARISA_SIZE_MAX / 2)) {
98
+ new_buf_size = MARISA_SIZE_MAX;
99
+ } else {
100
+ while (new_buf_size < length) {
101
+ new_buf_size *= 2;
102
+ }
103
+ }
104
+ char *new_buf = new (std::nothrow) char[new_buf_size];
105
+ MARISA_THROW_IF(new_buf == NULL, MARISA_MEMORY_ERROR);
106
+ delete [] buf_;
107
+ buf_ = new_buf;
108
+ buf_size_ = new_buf_size;
109
+ }
110
+ std::memcpy(buf_, ptr, length);
111
+ agent_->set_query(buf_, length);
112
+ }
113
+
114
+ void Agent::set_query(size_t id) {
115
+ agent_->set_query(id);
116
+ }
117
+
118
+ const Key &Agent::key() const {
119
+ return reinterpret_cast<const Key &>(agent_->key());
120
+ }
121
+
122
+ const Query &Agent::query() const {
123
+ return reinterpret_cast<const Query &>(agent_->query());
124
+ }
125
+
126
+ void Agent::key_str(const char **ptr_out, size_t *length_out) const {
127
+ *ptr_out = agent_->key().ptr();
128
+ *length_out = agent_->key().length();
129
+ }
130
+
131
+ size_t Agent::key_id() const {
132
+ return agent_->key().id();
133
+ }
134
+
135
+ void Agent::query_str(const char **ptr_out, size_t *length_out) const {
136
+ *ptr_out = agent_->query().ptr();
137
+ *length_out = agent_->query().length();
138
+ }
139
+
140
+ size_t Agent::query_id() const {
141
+ return agent_->query().id();
142
+ }
143
+
144
+ Trie::Trie() : trie_(new (std::nothrow) marisa::Trie) {
145
+ MARISA_THROW_IF(trie_ == NULL, ::MARISA_MEMORY_ERROR);
146
+ }
147
+
148
+ Trie::~Trie() {
149
+ delete trie_;
150
+ }
151
+
152
+ void Trie::build(Keyset &keyset, int config_flags) {
153
+ trie_->build(*keyset.keyset_, config_flags);
154
+ }
155
+
156
+ void Trie::mmap(const char *filename) {
157
+ trie_->mmap(filename);
158
+ }
159
+
160
+ void Trie::load(const char *filename) {
161
+ trie_->load(filename);
162
+ }
163
+
164
+ void Trie::save(const char *filename) const {
165
+ trie_->save(filename);
166
+ }
167
+
168
+ bool Trie::lookup(Agent &agent) const {
169
+ return trie_->lookup(*agent.agent_);
170
+ }
171
+
172
+ void Trie::reverse_lookup(Agent &agent) const {
173
+ trie_->reverse_lookup(*agent.agent_);
174
+ }
175
+
176
+ bool Trie::common_prefix_search(Agent &agent) const {
177
+ return trie_->common_prefix_search(*agent.agent_);
178
+ }
179
+
180
+ bool Trie::predictive_search(Agent &agent) const {
181
+ return trie_->predictive_search(*agent.agent_);
182
+ }
183
+
184
+ size_t Trie::lookup(const char *ptr, size_t length) const {
185
+ marisa::Agent agent;
186
+ agent.set_query(ptr, length);
187
+ if (!trie_->lookup(agent)) {
188
+ return MARISA_INVALID_KEY_ID;
189
+ }
190
+ return agent.key().id();
191
+ }
192
+
193
+ void Trie::reverse_lookup(size_t id,
194
+ const char **ptr_out_to_be_deleted, size_t *length_out) const {
195
+ marisa::Agent agent;
196
+ agent.set_query(id);
197
+ trie_->reverse_lookup(agent);
198
+ char * const buf = new (std::nothrow) char[agent.key().length()];
199
+ MARISA_THROW_IF(buf == NULL, MARISA_MEMORY_ERROR);
200
+ std::memcpy(buf, agent.key().ptr(), agent.key().length());
201
+ *ptr_out_to_be_deleted = buf;
202
+ *length_out = agent.key().length();
203
+ }
204
+
205
+ size_t Trie::num_tries() const {
206
+ return trie_->num_tries();
207
+ }
208
+
209
+ size_t Trie::num_keys() const {
210
+ return trie_->num_keys();
211
+ }
212
+
213
+ size_t Trie::num_nodes() const {
214
+ return trie_->num_nodes();
215
+ }
216
+
217
+ TailMode Trie::tail_mode() const {
218
+ if (trie_->tail_mode() == ::MARISA_TEXT_TAIL) {
219
+ return TEXT_TAIL;
220
+ } else {
221
+ return BINARY_TAIL;
222
+ }
223
+ }
224
+
225
+ NodeOrder Trie::node_order() const {
226
+ if (trie_->node_order() == ::MARISA_LABEL_ORDER) {
227
+ return LABEL_ORDER;
228
+ } else {
229
+ return WEIGHT_ORDER;
230
+ }
231
+ }
232
+
233
+ bool Trie::empty() const {
234
+ return trie_->empty();
235
+ }
236
+
237
+ size_t Trie::size() const {
238
+ return trie_->size();
239
+ }
240
+
241
+ size_t Trie::total_size() const {
242
+ return trie_->total_size();
243
+ }
244
+
245
+ size_t Trie::io_size() const {
246
+ return trie_->io_size();
247
+ }
248
+
249
+ void Trie::clear() {
250
+ trie_->clear();
251
+ }
252
+
253
+ } // namespace marisa_swig
@@ -0,0 +1,183 @@
1
+ #ifndef MARISA_SWIG_H_
2
+ #define MARISA_SWIG_H_
3
+
4
+ #include <marisa.h>
5
+
6
+ namespace marisa_swig {
7
+
8
+ #define MARISA_SWIG_ENUM_COPY(name) name = MARISA_ ## name
9
+
10
+ enum ErrorCode {
11
+ MARISA_SWIG_ENUM_COPY(OK),
12
+ MARISA_SWIG_ENUM_COPY(STATE_ERROR),
13
+ MARISA_SWIG_ENUM_COPY(NULL_ERROR),
14
+ MARISA_SWIG_ENUM_COPY(BOUND_ERROR),
15
+ MARISA_SWIG_ENUM_COPY(RANGE_ERROR),
16
+ MARISA_SWIG_ENUM_COPY(CODE_ERROR),
17
+ MARISA_SWIG_ENUM_COPY(RESET_ERROR),
18
+ MARISA_SWIG_ENUM_COPY(SIZE_ERROR),
19
+ MARISA_SWIG_ENUM_COPY(MEMORY_ERROR),
20
+ MARISA_SWIG_ENUM_COPY(IO_ERROR),
21
+ MARISA_SWIG_ENUM_COPY(FORMAT_ERROR)
22
+ };
23
+
24
+ enum NumTries {
25
+ MARISA_SWIG_ENUM_COPY(MIN_NUM_TRIES),
26
+ MARISA_SWIG_ENUM_COPY(MAX_NUM_TRIES),
27
+ MARISA_SWIG_ENUM_COPY(DEFAULT_NUM_TRIES)
28
+ };
29
+
30
+ enum CacheLevel {
31
+ MARISA_SWIG_ENUM_COPY(HUGE_CACHE),
32
+ MARISA_SWIG_ENUM_COPY(LARGE_CACHE),
33
+ MARISA_SWIG_ENUM_COPY(NORMAL_CACHE),
34
+ MARISA_SWIG_ENUM_COPY(SMALL_CACHE),
35
+ MARISA_SWIG_ENUM_COPY(TINY_CACHE),
36
+ MARISA_SWIG_ENUM_COPY(DEFAULT_CACHE)
37
+ };
38
+
39
+ enum TailMode {
40
+ MARISA_SWIG_ENUM_COPY(TEXT_TAIL),
41
+ MARISA_SWIG_ENUM_COPY(BINARY_TAIL),
42
+ MARISA_SWIG_ENUM_COPY(DEFAULT_TAIL)
43
+ };
44
+
45
+ enum NodeOrder {
46
+ MARISA_SWIG_ENUM_COPY(LABEL_ORDER),
47
+ MARISA_SWIG_ENUM_COPY(WEIGHT_ORDER),
48
+ MARISA_SWIG_ENUM_COPY(DEFAULT_ORDER)
49
+ };
50
+
51
+ #undef MARISA_SWIG_ENUM_COPY
52
+
53
+ class Key {
54
+ public:
55
+ void str(const char **ptr_out, std::size_t *length_out) const;
56
+ std::size_t id() const;
57
+ float weight() const;
58
+
59
+ private:
60
+ const marisa::Key key_;
61
+
62
+ Key();
63
+ Key(const Key &key);
64
+ Key &operator=(const Key &);
65
+ };
66
+
67
+ class Query {
68
+ public:
69
+ void str(const char **ptr_out, std::size_t *length_out) const;
70
+ std::size_t id() const;
71
+
72
+ private:
73
+ const marisa::Query query_;
74
+
75
+ Query();
76
+ Query(const Query &query);
77
+ Query &operator=(const Query &);
78
+ };
79
+
80
+ class Keyset {
81
+ friend class Trie;
82
+
83
+ public:
84
+ Keyset();
85
+ ~Keyset();
86
+
87
+ void push_back(const marisa::Key &key);
88
+ void push_back(const char *ptr, std::size_t length, float weight = 1.0);
89
+
90
+ const Key &key(std::size_t i) const;
91
+
92
+ void key_str(std::size_t i,
93
+ const char **ptr_out, std::size_t *length_out) const;
94
+ std::size_t key_id(std::size_t i) const;
95
+
96
+ std::size_t num_keys() const;
97
+
98
+ bool empty() const;
99
+ std::size_t size() const;
100
+ std::size_t total_length() const;
101
+
102
+ void reset();
103
+ void clear();
104
+
105
+ private:
106
+ marisa::Keyset *keyset_;
107
+
108
+ Keyset(const Keyset &);
109
+ Keyset &operator=(const Keyset &);
110
+ };
111
+
112
+ class Agent {
113
+ friend class Trie;
114
+
115
+ public:
116
+ Agent();
117
+ ~Agent();
118
+
119
+ void set_query(const char *ptr, std::size_t length);
120
+ void set_query(std::size_t id);
121
+
122
+ const Key &key() const;
123
+ const Query &query() const;
124
+
125
+ void key_str(const char **ptr_out, std::size_t *length_out) const;
126
+ std::size_t key_id() const;
127
+
128
+ void query_str(const char **ptr_out, std::size_t *length_out) const;
129
+ std::size_t query_id() const;
130
+
131
+ private:
132
+ marisa::Agent *agent_;
133
+ char *buf_;
134
+ std::size_t buf_size_;
135
+
136
+ Agent(const Agent &);
137
+ Agent &operator=(const Agent &);
138
+ };
139
+
140
+ class Trie {
141
+ public:
142
+ Trie();
143
+ ~Trie();
144
+
145
+ void build(Keyset &keyset, int config_flags = 0);
146
+
147
+ void mmap(const char *filename);
148
+ void load(const char *filename);
149
+ void save(const char *filename) const;
150
+
151
+ bool lookup(Agent &agent) const;
152
+ void reverse_lookup(Agent &agent) const;
153
+ bool common_prefix_search(Agent &agent) const;
154
+ bool predictive_search(Agent &agent) const;
155
+
156
+ std::size_t lookup(const char *ptr, std::size_t length) const;
157
+ void reverse_lookup(std::size_t id,
158
+ const char **ptr_out_to_be_deleted, std::size_t *length_out) const;
159
+
160
+ std::size_t num_tries() const;
161
+ std::size_t num_keys() const;
162
+ std::size_t num_nodes() const;
163
+
164
+ TailMode tail_mode() const;
165
+ NodeOrder node_order() const;
166
+
167
+ bool empty() const;
168
+ std::size_t size() const;
169
+ std::size_t total_size() const;
170
+ std::size_t io_size() const;
171
+
172
+ void clear();
173
+
174
+ private:
175
+ marisa::Trie *trie_;
176
+
177
+ Trie(const Trie &);
178
+ Trie &operator=(const Trie &);
179
+ };
180
+
181
+ } // namespace marisa_swig
182
+
183
+ #endif // MARISA_SWIG_H_
@@ -0,0 +1,253 @@
1
+ #include <cstring>
2
+ #include <new>
3
+
4
+ #include "marisa-swig.h"
5
+
6
+ namespace marisa_swig {
7
+
8
+ void Key::str(const char **ptr_out, size_t *length_out) const {
9
+ *ptr_out = key_.ptr();
10
+ *length_out = key_.length();
11
+ }
12
+
13
+ size_t Key::id() const {
14
+ return key_.id();
15
+ }
16
+
17
+ float Key::weight() const {
18
+ return key_.weight();
19
+ }
20
+
21
+ void Query::str(const char **ptr_out, size_t *length_out) const {
22
+ *ptr_out = query_.ptr();
23
+ *length_out = query_.length();
24
+ }
25
+
26
+ size_t Query::id() const {
27
+ return query_.id();
28
+ }
29
+
30
+ Keyset::Keyset() : keyset_(new (std::nothrow) marisa::Keyset) {
31
+ MARISA_THROW_IF(keyset_ == NULL, ::MARISA_MEMORY_ERROR);
32
+ }
33
+
34
+ Keyset::~Keyset() {
35
+ delete keyset_;
36
+ }
37
+
38
+ void Keyset::push_back(const marisa::Key &key) {
39
+ keyset_->push_back(key);
40
+ }
41
+
42
+ void Keyset::push_back(const char *ptr, size_t length, float weight) {
43
+ keyset_->push_back(ptr, length, weight);
44
+ }
45
+
46
+ const Key &Keyset::key(size_t i) const {
47
+ return reinterpret_cast<const Key &>((*keyset_)[i]);
48
+ }
49
+
50
+ void Keyset::key_str(size_t i,
51
+ const char **ptr_out, size_t *length_out) const {
52
+ *ptr_out = (*keyset_)[i].ptr();
53
+ *length_out = (*keyset_)[i].length();
54
+ }
55
+
56
+ size_t Keyset::key_id(size_t i) const {
57
+ return (*keyset_)[i].id();
58
+ }
59
+
60
+ size_t Keyset::num_keys() const {
61
+ return keyset_->num_keys();
62
+ }
63
+
64
+ bool Keyset::empty() const {
65
+ return keyset_->empty();
66
+ }
67
+
68
+ size_t Keyset::size() const {
69
+ return keyset_->size();
70
+ }
71
+
72
+ size_t Keyset::total_length() const {
73
+ return keyset_->total_length();
74
+ }
75
+
76
+ void Keyset::reset() {
77
+ keyset_->reset();
78
+ }
79
+
80
+ void Keyset::clear() {
81
+ keyset_->clear();
82
+ }
83
+
84
+ Agent::Agent()
85
+ : agent_(new (std::nothrow) marisa::Agent), buf_(NULL), buf_size_(0) {
86
+ MARISA_THROW_IF(agent_ == NULL, ::MARISA_MEMORY_ERROR);
87
+ }
88
+
89
+ Agent::~Agent() {
90
+ delete agent_;
91
+ delete [] buf_;
92
+ }
93
+
94
+ void Agent::set_query(const char *ptr, size_t length) {
95
+ if (length > buf_size_) {
96
+ size_t new_buf_size = (buf_size_ != 0) ? buf_size_ : 1;
97
+ if (length >= (MARISA_SIZE_MAX / 2)) {
98
+ new_buf_size = MARISA_SIZE_MAX;
99
+ } else {
100
+ while (new_buf_size < length) {
101
+ new_buf_size *= 2;
102
+ }
103
+ }
104
+ char *new_buf = new (std::nothrow) char[new_buf_size];
105
+ MARISA_THROW_IF(new_buf == NULL, MARISA_MEMORY_ERROR);
106
+ delete [] buf_;
107
+ buf_ = new_buf;
108
+ buf_size_ = new_buf_size;
109
+ }
110
+ std::memcpy(buf_, ptr, length);
111
+ agent_->set_query(buf_, length);
112
+ }
113
+
114
+ void Agent::set_query(size_t id) {
115
+ agent_->set_query(id);
116
+ }
117
+
118
+ const Key &Agent::key() const {
119
+ return reinterpret_cast<const Key &>(agent_->key());
120
+ }
121
+
122
+ const Query &Agent::query() const {
123
+ return reinterpret_cast<const Query &>(agent_->query());
124
+ }
125
+
126
+ void Agent::key_str(const char **ptr_out, size_t *length_out) const {
127
+ *ptr_out = agent_->key().ptr();
128
+ *length_out = agent_->key().length();
129
+ }
130
+
131
+ size_t Agent::key_id() const {
132
+ return agent_->key().id();
133
+ }
134
+
135
+ void Agent::query_str(const char **ptr_out, size_t *length_out) const {
136
+ *ptr_out = agent_->query().ptr();
137
+ *length_out = agent_->query().length();
138
+ }
139
+
140
+ size_t Agent::query_id() const {
141
+ return agent_->query().id();
142
+ }
143
+
144
+ Trie::Trie() : trie_(new (std::nothrow) marisa::Trie) {
145
+ MARISA_THROW_IF(trie_ == NULL, ::MARISA_MEMORY_ERROR);
146
+ }
147
+
148
+ Trie::~Trie() {
149
+ delete trie_;
150
+ }
151
+
152
+ void Trie::build(Keyset &keyset, int config_flags) {
153
+ trie_->build(*keyset.keyset_, config_flags);
154
+ }
155
+
156
+ void Trie::mmap(const char *filename) {
157
+ trie_->mmap(filename);
158
+ }
159
+
160
+ void Trie::load(const char *filename) {
161
+ trie_->load(filename);
162
+ }
163
+
164
+ void Trie::save(const char *filename) const {
165
+ trie_->save(filename);
166
+ }
167
+
168
+ bool Trie::lookup(Agent &agent) const {
169
+ return trie_->lookup(*agent.agent_);
170
+ }
171
+
172
+ void Trie::reverse_lookup(Agent &agent) const {
173
+ trie_->reverse_lookup(*agent.agent_);
174
+ }
175
+
176
+ bool Trie::common_prefix_search(Agent &agent) const {
177
+ return trie_->common_prefix_search(*agent.agent_);
178
+ }
179
+
180
+ bool Trie::predictive_search(Agent &agent) const {
181
+ return trie_->predictive_search(*agent.agent_);
182
+ }
183
+
184
+ size_t Trie::lookup(const char *ptr, size_t length) const {
185
+ marisa::Agent agent;
186
+ agent.set_query(ptr, length);
187
+ if (!trie_->lookup(agent)) {
188
+ return MARISA_INVALID_KEY_ID;
189
+ }
190
+ return agent.key().id();
191
+ }
192
+
193
+ void Trie::reverse_lookup(size_t id,
194
+ const char **ptr_out_to_be_deleted, size_t *length_out) const {
195
+ marisa::Agent agent;
196
+ agent.set_query(id);
197
+ trie_->reverse_lookup(agent);
198
+ char * const buf = new (std::nothrow) char[agent.key().length()];
199
+ MARISA_THROW_IF(buf == NULL, MARISA_MEMORY_ERROR);
200
+ std::memcpy(buf, agent.key().ptr(), agent.key().length());
201
+ *ptr_out_to_be_deleted = buf;
202
+ *length_out = agent.key().length();
203
+ }
204
+
205
+ size_t Trie::num_tries() const {
206
+ return trie_->num_tries();
207
+ }
208
+
209
+ size_t Trie::num_keys() const {
210
+ return trie_->num_keys();
211
+ }
212
+
213
+ size_t Trie::num_nodes() const {
214
+ return trie_->num_nodes();
215
+ }
216
+
217
+ TailMode Trie::tail_mode() const {
218
+ if (trie_->tail_mode() == ::MARISA_TEXT_TAIL) {
219
+ return TEXT_TAIL;
220
+ } else {
221
+ return BINARY_TAIL;
222
+ }
223
+ }
224
+
225
+ NodeOrder Trie::node_order() const {
226
+ if (trie_->node_order() == ::MARISA_LABEL_ORDER) {
227
+ return LABEL_ORDER;
228
+ } else {
229
+ return WEIGHT_ORDER;
230
+ }
231
+ }
232
+
233
+ bool Trie::empty() const {
234
+ return trie_->empty();
235
+ }
236
+
237
+ size_t Trie::size() const {
238
+ return trie_->size();
239
+ }
240
+
241
+ size_t Trie::total_size() const {
242
+ return trie_->total_size();
243
+ }
244
+
245
+ size_t Trie::io_size() const {
246
+ return trie_->io_size();
247
+ }
248
+
249
+ void Trie::clear() {
250
+ trie_->clear();
251
+ }
252
+
253
+ } // namespace marisa_swig