melisa 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +11 -0
- data/ext/marisa/bindings/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
- data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/python/marisa-swig.h +183 -0
- data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
- data/ext/marisa/bindings/ruby/extconf.rb +5 -0
- data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
- data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
- data/ext/marisa/lib/marisa.h +14 -0
- data/ext/marisa/lib/marisa/agent.cc +51 -0
- data/ext/marisa/lib/marisa/agent.h +73 -0
- data/ext/marisa/lib/marisa/base.h +193 -0
- data/ext/marisa/lib/marisa/exception.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
- data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
- data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
- data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
- data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
- data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
- data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
- data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
- data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
- data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
- data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
- data/ext/marisa/lib/marisa/iostream.h +18 -0
- data/ext/marisa/lib/marisa/key.h +85 -0
- data/ext/marisa/lib/marisa/keyset.cc +181 -0
- data/ext/marisa/lib/marisa/keyset.h +80 -0
- data/ext/marisa/lib/marisa/query.h +71 -0
- data/ext/marisa/lib/marisa/scoped-array.h +48 -0
- data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
- data/ext/marisa/lib/marisa/stdio.h +15 -0
- data/ext/marisa/lib/marisa/trie.cc +249 -0
- data/ext/marisa/lib/marisa/trie.h +64 -0
- data/ext/marisa/tests/base-test.cc +309 -0
- data/ext/marisa/tests/io-test.cc +252 -0
- data/ext/marisa/tests/marisa-assert.h +26 -0
- data/ext/marisa/tests/marisa-test.cc +388 -0
- data/ext/marisa/tests/trie-test.cc +507 -0
- data/ext/marisa/tests/vector-test.cc +466 -0
- data/ext/marisa/tools/cmdopt.cc +298 -0
- data/ext/marisa/tools/cmdopt.h +58 -0
- data/ext/marisa/tools/marisa-benchmark.cc +418 -0
- data/ext/marisa/tools/marisa-build.cc +206 -0
- data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
- data/ext/marisa/tools/marisa-dump.cc +151 -0
- data/ext/marisa/tools/marisa-lookup.cc +110 -0
- data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
- data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
- data/lib/melisa.rb +7 -0
- data/lib/melisa/base_config_flags.rb +76 -0
- data/lib/melisa/bytes_trie.rb +55 -0
- data/lib/melisa/int_trie.rb +14 -0
- data/lib/melisa/search.rb +55 -0
- data/lib/melisa/trie.rb +96 -0
- data/lib/melisa/version.rb +3 -0
- data/melisa.gemspec +36 -0
- data/spec/base_config_flags_spec.rb +73 -0
- data/spec/bytes_trie_spec.rb +16 -0
- data/spec/int_trie_spec.rb +16 -0
- data/spec/search_spec.rb +29 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/trie_spec.rb +30 -0
- metadata +207 -0
@@ -0,0 +1,82 @@
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_
|
2
|
+
#define MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_
|
3
|
+
|
4
|
+
#include "marisa/base.h"
|
5
|
+
|
6
|
+
namespace marisa {
|
7
|
+
namespace grimoire {
|
8
|
+
namespace vector {
|
9
|
+
|
10
|
+
class RankIndex {
|
11
|
+
public:
|
12
|
+
RankIndex() : abs_(0), rel_lo_(0), rel_hi_(0) {}
|
13
|
+
|
14
|
+
void set_abs(std::size_t value) {
|
15
|
+
MARISA_DEBUG_IF(value > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
16
|
+
abs_ = (UInt32)value;
|
17
|
+
}
|
18
|
+
void set_rel1(std::size_t value) {
|
19
|
+
MARISA_DEBUG_IF(value > 64, MARISA_RANGE_ERROR);
|
20
|
+
rel_lo_ = (UInt32)((rel_lo_ & ~0x7FU) | (value & 0x7FU));
|
21
|
+
}
|
22
|
+
void set_rel2(std::size_t value) {
|
23
|
+
MARISA_DEBUG_IF(value > 128, MARISA_RANGE_ERROR);
|
24
|
+
rel_lo_ = (UInt32)((rel_lo_ & ~(0xFFU << 7)) | ((value & 0xFFU) << 7));
|
25
|
+
}
|
26
|
+
void set_rel3(std::size_t value) {
|
27
|
+
MARISA_DEBUG_IF(value > 192, MARISA_RANGE_ERROR);
|
28
|
+
rel_lo_ = (UInt32)((rel_lo_ & ~(0xFFU << 15)) | ((value & 0xFFU) << 15));
|
29
|
+
}
|
30
|
+
void set_rel4(std::size_t value) {
|
31
|
+
MARISA_DEBUG_IF(value > 256, MARISA_RANGE_ERROR);
|
32
|
+
rel_lo_ = (UInt32)((rel_lo_ & ~(0x1FFU << 23)) | ((value & 0x1FFU) << 23));
|
33
|
+
}
|
34
|
+
void set_rel5(std::size_t value) {
|
35
|
+
MARISA_DEBUG_IF(value > 320, MARISA_RANGE_ERROR);
|
36
|
+
rel_hi_ = (UInt32)((rel_hi_ & ~0x1FFU) | (value & 0x1FFU));
|
37
|
+
}
|
38
|
+
void set_rel6(std::size_t value) {
|
39
|
+
MARISA_DEBUG_IF(value > 384, MARISA_RANGE_ERROR);
|
40
|
+
rel_hi_ = (UInt32)((rel_hi_ & ~(0x1FFU << 9)) | ((value & 0x1FFU) << 9));
|
41
|
+
}
|
42
|
+
void set_rel7(std::size_t value) {
|
43
|
+
MARISA_DEBUG_IF(value > 448, MARISA_RANGE_ERROR);
|
44
|
+
rel_hi_ = (UInt32)((rel_hi_ & ~(0x1FFU << 18)) | ((value & 0x1FFU) << 18));
|
45
|
+
}
|
46
|
+
|
47
|
+
std::size_t abs() const {
|
48
|
+
return abs_;
|
49
|
+
}
|
50
|
+
std::size_t rel1() const {
|
51
|
+
return rel_lo_ & 0x7FU;
|
52
|
+
}
|
53
|
+
std::size_t rel2() const {
|
54
|
+
return (rel_lo_ >> 7) & 0xFFU;
|
55
|
+
}
|
56
|
+
std::size_t rel3() const {
|
57
|
+
return (rel_lo_ >> 15) & 0xFFU;
|
58
|
+
}
|
59
|
+
std::size_t rel4() const {
|
60
|
+
return (rel_lo_ >> 23) & 0x1FFU;
|
61
|
+
}
|
62
|
+
std::size_t rel5() const {
|
63
|
+
return rel_hi_ & 0x1FFU;
|
64
|
+
}
|
65
|
+
std::size_t rel6() const {
|
66
|
+
return (rel_hi_ >> 9) & 0x1FFU;
|
67
|
+
}
|
68
|
+
std::size_t rel7() const {
|
69
|
+
return (rel_hi_ >> 18) & 0x1FFU;
|
70
|
+
}
|
71
|
+
|
72
|
+
private:
|
73
|
+
UInt32 abs_;
|
74
|
+
UInt32 rel_lo_;
|
75
|
+
UInt32 rel_hi_;
|
76
|
+
};
|
77
|
+
|
78
|
+
} // namespace vector
|
79
|
+
} // namespace grimoire
|
80
|
+
} // namespace marisa
|
81
|
+
|
82
|
+
#endif // MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_
|
@@ -0,0 +1,256 @@
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_VECTOR_VECTOR_H_
|
2
|
+
#define MARISA_GRIMOIRE_VECTOR_VECTOR_H_
|
3
|
+
|
4
|
+
#include <new>
|
5
|
+
|
6
|
+
#include "marisa/grimoire/io.h"
|
7
|
+
|
8
|
+
namespace marisa {
|
9
|
+
namespace grimoire {
|
10
|
+
namespace vector {
|
11
|
+
|
12
|
+
template <typename T>
|
13
|
+
class Vector {
|
14
|
+
public:
|
15
|
+
Vector()
|
16
|
+
: buf_(), objs_(NULL), const_objs_(NULL),
|
17
|
+
size_(0), capacity_(0), fixed_(false) {}
|
18
|
+
~Vector() {
|
19
|
+
if (objs_ != NULL) {
|
20
|
+
for (std::size_t i = 0; i < size_; ++i) {
|
21
|
+
objs_[i].~T();
|
22
|
+
}
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
void map(Mapper &mapper) {
|
27
|
+
Vector temp;
|
28
|
+
temp.map_(mapper);
|
29
|
+
swap(temp);
|
30
|
+
}
|
31
|
+
|
32
|
+
void read(Reader &reader) {
|
33
|
+
Vector temp;
|
34
|
+
temp.read_(reader);
|
35
|
+
swap(temp);
|
36
|
+
}
|
37
|
+
|
38
|
+
void write(Writer &writer) const {
|
39
|
+
write_(writer);
|
40
|
+
}
|
41
|
+
|
42
|
+
void push_back(const T &x) {
|
43
|
+
MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
|
44
|
+
MARISA_DEBUG_IF(size_ == max_size(), MARISA_SIZE_ERROR);
|
45
|
+
reserve(size_ + 1);
|
46
|
+
new (&objs_[size_]) T(x);
|
47
|
+
++size_;
|
48
|
+
}
|
49
|
+
|
50
|
+
void pop_back() {
|
51
|
+
MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
|
52
|
+
MARISA_DEBUG_IF(size_ == 0, MARISA_STATE_ERROR);
|
53
|
+
objs_[--size_].~T();
|
54
|
+
}
|
55
|
+
|
56
|
+
// resize() assumes that T's placement new does not throw an exception.
|
57
|
+
void resize(std::size_t size) {
|
58
|
+
MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
|
59
|
+
reserve(size);
|
60
|
+
for (std::size_t i = size_; i < size; ++i) {
|
61
|
+
new (&objs_[i]) T;
|
62
|
+
}
|
63
|
+
for (std::size_t i = size; i < size_; ++i) {
|
64
|
+
objs_[i].~T();
|
65
|
+
}
|
66
|
+
size_ = size;
|
67
|
+
}
|
68
|
+
|
69
|
+
// resize() assumes that T's placement new does not throw an exception.
|
70
|
+
void resize(std::size_t size, const T &x) {
|
71
|
+
MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
|
72
|
+
reserve(size);
|
73
|
+
for (std::size_t i = size_; i < size; ++i) {
|
74
|
+
new (&objs_[i]) T(x);
|
75
|
+
}
|
76
|
+
for (std::size_t i = size; i < size_; ++i) {
|
77
|
+
objs_[i].~T();
|
78
|
+
}
|
79
|
+
size_ = size;
|
80
|
+
}
|
81
|
+
|
82
|
+
void reserve(std::size_t capacity) {
|
83
|
+
MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
|
84
|
+
if (capacity <= capacity_) {
|
85
|
+
return;
|
86
|
+
}
|
87
|
+
MARISA_DEBUG_IF(capacity > max_size(), MARISA_SIZE_ERROR);
|
88
|
+
std::size_t new_capacity = capacity;
|
89
|
+
if (capacity_ > (capacity / 2)) {
|
90
|
+
if (capacity_ > (max_size() / 2)) {
|
91
|
+
new_capacity = max_size();
|
92
|
+
} else {
|
93
|
+
new_capacity = capacity_ * 2;
|
94
|
+
}
|
95
|
+
}
|
96
|
+
realloc(new_capacity);
|
97
|
+
}
|
98
|
+
|
99
|
+
void shrink() {
|
100
|
+
MARISA_THROW_IF(fixed_, MARISA_STATE_ERROR);
|
101
|
+
if (size_ != capacity_) {
|
102
|
+
realloc(size_);
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
void fix() {
|
107
|
+
MARISA_THROW_IF(fixed_, MARISA_STATE_ERROR);
|
108
|
+
fixed_ = true;
|
109
|
+
}
|
110
|
+
|
111
|
+
const T *begin() const {
|
112
|
+
return const_objs_;
|
113
|
+
}
|
114
|
+
const T *end() const {
|
115
|
+
return const_objs_ + size_;
|
116
|
+
}
|
117
|
+
const T &operator[](std::size_t i) const {
|
118
|
+
MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
|
119
|
+
return const_objs_[i];
|
120
|
+
}
|
121
|
+
const T &front() const {
|
122
|
+
MARISA_DEBUG_IF(size_ == 0, MARISA_STATE_ERROR);
|
123
|
+
return const_objs_[0];
|
124
|
+
}
|
125
|
+
const T &back() const {
|
126
|
+
MARISA_DEBUG_IF(size_ == 0, MARISA_STATE_ERROR);
|
127
|
+
return const_objs_[size_ - 1];
|
128
|
+
}
|
129
|
+
|
130
|
+
T *begin() {
|
131
|
+
MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
|
132
|
+
return objs_;
|
133
|
+
}
|
134
|
+
T *end() {
|
135
|
+
MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
|
136
|
+
return objs_ + size_;
|
137
|
+
}
|
138
|
+
T &operator[](std::size_t i) {
|
139
|
+
MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
|
140
|
+
MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
|
141
|
+
return objs_[i];
|
142
|
+
}
|
143
|
+
T &front() {
|
144
|
+
MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
|
145
|
+
MARISA_DEBUG_IF(size_ == 0, MARISA_STATE_ERROR);
|
146
|
+
return objs_[0];
|
147
|
+
}
|
148
|
+
T &back() {
|
149
|
+
MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
|
150
|
+
MARISA_DEBUG_IF(size_ == 0, MARISA_STATE_ERROR);
|
151
|
+
return objs_[size_ - 1];
|
152
|
+
}
|
153
|
+
|
154
|
+
std::size_t size() const {
|
155
|
+
return size_;
|
156
|
+
}
|
157
|
+
std::size_t capacity() const {
|
158
|
+
return capacity_;
|
159
|
+
}
|
160
|
+
bool fixed() const {
|
161
|
+
return fixed_;
|
162
|
+
}
|
163
|
+
|
164
|
+
bool empty() const {
|
165
|
+
return size_ == 0;
|
166
|
+
}
|
167
|
+
std::size_t total_size() const {
|
168
|
+
return sizeof(T) * size_;
|
169
|
+
}
|
170
|
+
std::size_t io_size() const {
|
171
|
+
return sizeof(UInt64) + ((total_size() + 7) & ~(std::size_t)0x07);
|
172
|
+
}
|
173
|
+
|
174
|
+
void clear() {
|
175
|
+
Vector().swap(*this);
|
176
|
+
}
|
177
|
+
void swap(Vector &rhs) {
|
178
|
+
buf_.swap(rhs.buf_);
|
179
|
+
marisa::swap(objs_, rhs.objs_);
|
180
|
+
marisa::swap(const_objs_, rhs.const_objs_);
|
181
|
+
marisa::swap(size_, rhs.size_);
|
182
|
+
marisa::swap(capacity_, rhs.capacity_);
|
183
|
+
marisa::swap(fixed_, rhs.fixed_);
|
184
|
+
}
|
185
|
+
|
186
|
+
static std::size_t max_size() {
|
187
|
+
return MARISA_SIZE_MAX / sizeof(T);
|
188
|
+
}
|
189
|
+
|
190
|
+
private:
|
191
|
+
scoped_array<char> buf_;
|
192
|
+
T *objs_;
|
193
|
+
const T *const_objs_;
|
194
|
+
std::size_t size_;
|
195
|
+
std::size_t capacity_;
|
196
|
+
bool fixed_;
|
197
|
+
|
198
|
+
void map_(Mapper &mapper) {
|
199
|
+
UInt64 total_size;
|
200
|
+
mapper.map(&total_size);
|
201
|
+
MARISA_THROW_IF(total_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
|
202
|
+
MARISA_THROW_IF((total_size % sizeof(T)) != 0, MARISA_FORMAT_ERROR);
|
203
|
+
const std::size_t size = (std::size_t)(total_size / sizeof(T));
|
204
|
+
mapper.map(&const_objs_, size);
|
205
|
+
mapper.seek((std::size_t)((8 - (total_size % 8)) % 8));
|
206
|
+
size_ = size;
|
207
|
+
fix();
|
208
|
+
}
|
209
|
+
void read_(Reader &reader) {
|
210
|
+
UInt64 total_size;
|
211
|
+
reader.read(&total_size);
|
212
|
+
MARISA_THROW_IF(total_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
|
213
|
+
MARISA_THROW_IF((total_size % sizeof(T)) != 0, MARISA_FORMAT_ERROR);
|
214
|
+
const std::size_t size = (std::size_t)(total_size / sizeof(T));
|
215
|
+
resize(size);
|
216
|
+
reader.read(objs_, size);
|
217
|
+
reader.seek((std::size_t)((8 - (total_size % 8)) % 8));
|
218
|
+
}
|
219
|
+
void write_(Writer &writer) const {
|
220
|
+
writer.write((UInt64)total_size());
|
221
|
+
writer.write(const_objs_, size_);
|
222
|
+
writer.seek((8 - (total_size() % 8)) % 8);
|
223
|
+
}
|
224
|
+
|
225
|
+
// realloc() assumes that T's placement new does not throw an exception.
|
226
|
+
void realloc(std::size_t new_capacity) {
|
227
|
+
MARISA_DEBUG_IF(new_capacity > max_size(), MARISA_SIZE_ERROR);
|
228
|
+
|
229
|
+
scoped_array<char> new_buf(
|
230
|
+
new (std::nothrow) char[sizeof(T) * new_capacity]);
|
231
|
+
MARISA_DEBUG_IF(new_buf.get() == NULL, MARISA_MEMORY_ERROR);
|
232
|
+
T *new_objs = reinterpret_cast<T *>(new_buf.get());
|
233
|
+
|
234
|
+
for (std::size_t i = 0; i < size_; ++i) {
|
235
|
+
new (&new_objs[i]) T(objs_[i]);
|
236
|
+
}
|
237
|
+
for (std::size_t i = 0; i < size_; ++i) {
|
238
|
+
objs_[i].~T();
|
239
|
+
}
|
240
|
+
|
241
|
+
buf_.swap(new_buf);
|
242
|
+
objs_ = new_objs;
|
243
|
+
const_objs_ = new_objs;
|
244
|
+
capacity_ = new_capacity;
|
245
|
+
}
|
246
|
+
|
247
|
+
// Disallows copy and assignment.
|
248
|
+
Vector(const Vector &);
|
249
|
+
Vector &operator=(const Vector &);
|
250
|
+
};
|
251
|
+
|
252
|
+
} // namespace vector
|
253
|
+
} // namespace grimoire
|
254
|
+
} // namespace marisa
|
255
|
+
|
256
|
+
#endif // MARISA_GRIMOIRE_VECTOR_VECTOR_H_
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#ifndef MARISA_IOSTREAM_H_
|
2
|
+
#define MARISA_IOSTREAM_H_
|
3
|
+
|
4
|
+
#include <iosfwd>
|
5
|
+
|
6
|
+
namespace marisa {
|
7
|
+
|
8
|
+
class Trie;
|
9
|
+
|
10
|
+
std::istream &read(std::istream &stream, Trie *trie);
|
11
|
+
std::ostream &write(std::ostream &stream, const Trie &trie);
|
12
|
+
|
13
|
+
std::istream &operator>>(std::istream &stream, Trie &trie);
|
14
|
+
std::ostream &operator<<(std::ostream &stream, const Trie &trie);
|
15
|
+
|
16
|
+
} // namespace marisa
|
17
|
+
|
18
|
+
#endif // MARISA_IOSTREAM_H_
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#ifndef MARISA_KEY_H_
|
2
|
+
#define MARISA_KEY_H_
|
3
|
+
|
4
|
+
#include "marisa/base.h"
|
5
|
+
|
6
|
+
namespace marisa {
|
7
|
+
|
8
|
+
class Key {
|
9
|
+
public:
|
10
|
+
Key() : ptr_(NULL), length_(0), union_() {
|
11
|
+
union_.id = 0;
|
12
|
+
}
|
13
|
+
Key(const Key &key)
|
14
|
+
: ptr_(key.ptr_), length_(key.length_), union_(key.union_) {}
|
15
|
+
|
16
|
+
Key &operator=(const Key &key) {
|
17
|
+
ptr_ = key.ptr_;
|
18
|
+
length_ = key.length_;
|
19
|
+
union_ = key.union_;
|
20
|
+
return *this;
|
21
|
+
}
|
22
|
+
|
23
|
+
char operator[](std::size_t i) const {
|
24
|
+
MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR);
|
25
|
+
return ptr_[i];
|
26
|
+
}
|
27
|
+
|
28
|
+
void set_str(const char *str) {
|
29
|
+
MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR);
|
30
|
+
std::size_t length = 0;
|
31
|
+
while (str[length] != '\0') {
|
32
|
+
++length;
|
33
|
+
}
|
34
|
+
MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
35
|
+
ptr_ = str;
|
36
|
+
length_ = (UInt32)length;
|
37
|
+
}
|
38
|
+
void set_str(const char *ptr, std::size_t length) {
|
39
|
+
MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR);
|
40
|
+
MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
41
|
+
ptr_ = ptr;
|
42
|
+
length_ = (UInt32)length;
|
43
|
+
}
|
44
|
+
void set_id(std::size_t id) {
|
45
|
+
MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
46
|
+
union_.id = (UInt32)id;
|
47
|
+
}
|
48
|
+
void set_weight(float weight) {
|
49
|
+
union_.weight = weight;
|
50
|
+
}
|
51
|
+
|
52
|
+
const char *ptr() const {
|
53
|
+
return ptr_;
|
54
|
+
}
|
55
|
+
std::size_t length() const {
|
56
|
+
return length_;
|
57
|
+
}
|
58
|
+
std::size_t id() const {
|
59
|
+
return union_.id;
|
60
|
+
}
|
61
|
+
float weight() const {
|
62
|
+
return union_.weight;
|
63
|
+
}
|
64
|
+
|
65
|
+
void clear() {
|
66
|
+
Key().swap(*this);
|
67
|
+
}
|
68
|
+
void swap(Key &rhs) {
|
69
|
+
marisa::swap(ptr_, rhs.ptr_);
|
70
|
+
marisa::swap(length_, rhs.length_);
|
71
|
+
marisa::swap(union_.id, rhs.union_.id);
|
72
|
+
}
|
73
|
+
|
74
|
+
private:
|
75
|
+
const char *ptr_;
|
76
|
+
UInt32 length_;
|
77
|
+
union Union {
|
78
|
+
UInt32 id;
|
79
|
+
float weight;
|
80
|
+
} union_;
|
81
|
+
};
|
82
|
+
|
83
|
+
} // namespace marisa
|
84
|
+
|
85
|
+
#endif // MARISA_KEY_H_
|
@@ -0,0 +1,181 @@
|
|
1
|
+
#include <new>
|
2
|
+
|
3
|
+
#include "marisa/keyset.h"
|
4
|
+
|
5
|
+
namespace marisa {
|
6
|
+
|
7
|
+
Keyset::Keyset()
|
8
|
+
: base_blocks_(), base_blocks_size_(0), base_blocks_capacity_(0),
|
9
|
+
extra_blocks_(), extra_blocks_size_(0), extra_blocks_capacity_(0),
|
10
|
+
key_blocks_(), key_blocks_size_(0), key_blocks_capacity_(0),
|
11
|
+
ptr_(NULL), avail_(0), size_(0), total_length_(0) {}
|
12
|
+
|
13
|
+
void Keyset::push_back(const Key &key) {
|
14
|
+
MARISA_DEBUG_IF(size_ == MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
|
15
|
+
|
16
|
+
char * const key_ptr = reserve(key.length());
|
17
|
+
for (std::size_t i = 0; i < key.length(); ++i) {
|
18
|
+
key_ptr[i] = key[i];
|
19
|
+
}
|
20
|
+
|
21
|
+
Key &new_key = key_blocks_[size_ / KEY_BLOCK_SIZE][size_ % KEY_BLOCK_SIZE];
|
22
|
+
new_key.set_str(key_ptr, key.length());
|
23
|
+
new_key.set_id(key.id());
|
24
|
+
++size_;
|
25
|
+
total_length_ += new_key.length();
|
26
|
+
}
|
27
|
+
|
28
|
+
void Keyset::push_back(const Key &key, char end_marker) {
|
29
|
+
MARISA_DEBUG_IF(size_ == MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
|
30
|
+
|
31
|
+
if ((size_ / KEY_BLOCK_SIZE) == key_blocks_size_) {
|
32
|
+
append_key_block();
|
33
|
+
}
|
34
|
+
|
35
|
+
char * const key_ptr = reserve(key.length() + 1);
|
36
|
+
for (std::size_t i = 0; i < key.length(); ++i) {
|
37
|
+
key_ptr[i] = key[i];
|
38
|
+
}
|
39
|
+
key_ptr[key.length()] = end_marker;
|
40
|
+
|
41
|
+
Key &new_key = key_blocks_[size_ / KEY_BLOCK_SIZE][size_ % KEY_BLOCK_SIZE];
|
42
|
+
new_key.set_str(key_ptr, key.length());
|
43
|
+
new_key.set_id(key.id());
|
44
|
+
++size_;
|
45
|
+
total_length_ += new_key.length();
|
46
|
+
}
|
47
|
+
|
48
|
+
void Keyset::push_back(const char *str) {
|
49
|
+
MARISA_DEBUG_IF(size_ == MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
|
50
|
+
MARISA_THROW_IF(str == NULL, MARISA_NULL_ERROR);
|
51
|
+
|
52
|
+
std::size_t length = 0;
|
53
|
+
while (str[length] != '\0') {
|
54
|
+
++length;
|
55
|
+
}
|
56
|
+
push_back(str, length);
|
57
|
+
}
|
58
|
+
|
59
|
+
void Keyset::push_back(const char *ptr, std::size_t length, float weight) {
|
60
|
+
MARISA_DEBUG_IF(size_ == MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
|
61
|
+
MARISA_THROW_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR);
|
62
|
+
MARISA_THROW_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
63
|
+
|
64
|
+
char * const key_ptr = reserve(length);
|
65
|
+
for (std::size_t i = 0; i < length; ++i) {
|
66
|
+
key_ptr[i] = ptr[i];
|
67
|
+
}
|
68
|
+
|
69
|
+
Key &key = key_blocks_[size_ / KEY_BLOCK_SIZE][size_ % KEY_BLOCK_SIZE];
|
70
|
+
key.set_str(key_ptr, length);
|
71
|
+
key.set_weight(weight);
|
72
|
+
++size_;
|
73
|
+
total_length_ += length;
|
74
|
+
}
|
75
|
+
|
76
|
+
void Keyset::reset() {
|
77
|
+
base_blocks_size_ = 0;
|
78
|
+
extra_blocks_size_ = 0;
|
79
|
+
ptr_ = NULL;
|
80
|
+
avail_ = 0;
|
81
|
+
size_ = 0;
|
82
|
+
total_length_ = 0;
|
83
|
+
}
|
84
|
+
|
85
|
+
void Keyset::clear() {
|
86
|
+
Keyset().swap(*this);
|
87
|
+
}
|
88
|
+
|
89
|
+
void Keyset::swap(Keyset &rhs) {
|
90
|
+
base_blocks_.swap(rhs.base_blocks_);
|
91
|
+
marisa::swap(base_blocks_size_, rhs.base_blocks_size_);
|
92
|
+
marisa::swap(base_blocks_capacity_, rhs.base_blocks_capacity_);
|
93
|
+
extra_blocks_.swap(rhs.extra_blocks_);
|
94
|
+
marisa::swap(extra_blocks_size_, rhs.extra_blocks_size_);
|
95
|
+
marisa::swap(extra_blocks_capacity_, rhs.extra_blocks_capacity_);
|
96
|
+
key_blocks_.swap(rhs.key_blocks_);
|
97
|
+
marisa::swap(key_blocks_size_, rhs.key_blocks_size_);
|
98
|
+
marisa::swap(key_blocks_capacity_, rhs.key_blocks_capacity_);
|
99
|
+
marisa::swap(ptr_, rhs.ptr_);
|
100
|
+
marisa::swap(avail_, rhs.avail_);
|
101
|
+
marisa::swap(size_, rhs.size_);
|
102
|
+
marisa::swap(total_length_, rhs.total_length_);
|
103
|
+
}
|
104
|
+
|
105
|
+
char *Keyset::reserve(std::size_t size) {
|
106
|
+
if ((size_ / KEY_BLOCK_SIZE) == key_blocks_size_) {
|
107
|
+
append_key_block();
|
108
|
+
}
|
109
|
+
|
110
|
+
if (size > EXTRA_BLOCK_SIZE) {
|
111
|
+
append_extra_block(size);
|
112
|
+
return extra_blocks_[extra_blocks_size_ - 1].get();
|
113
|
+
} else {
|
114
|
+
if (size > avail_) {
|
115
|
+
append_base_block();
|
116
|
+
}
|
117
|
+
ptr_ += size;
|
118
|
+
avail_ -= size;
|
119
|
+
return ptr_ - size;
|
120
|
+
}
|
121
|
+
}
|
122
|
+
|
123
|
+
void Keyset::append_base_block() {
|
124
|
+
if (base_blocks_size_ == base_blocks_capacity_) {
|
125
|
+
const std::size_t new_capacity =
|
126
|
+
(base_blocks_size_ != 0) ? (base_blocks_size_ * 2) : 1;
|
127
|
+
scoped_array<scoped_array<char> > new_blocks(
|
128
|
+
new (std::nothrow) scoped_array<char>[new_capacity]);
|
129
|
+
MARISA_THROW_IF(new_blocks.get() == NULL, MARISA_MEMORY_ERROR);
|
130
|
+
for (std::size_t i = 0; i < base_blocks_size_; ++i) {
|
131
|
+
base_blocks_[i].swap(new_blocks[i]);
|
132
|
+
}
|
133
|
+
base_blocks_.swap(new_blocks);
|
134
|
+
base_blocks_capacity_ = new_capacity;
|
135
|
+
}
|
136
|
+
if (base_blocks_[base_blocks_size_].get() == NULL) {
|
137
|
+
scoped_array<char> new_block(new (std::nothrow) char[BASE_BLOCK_SIZE]);
|
138
|
+
MARISA_THROW_IF(new_block.get() == NULL, MARISA_MEMORY_ERROR);
|
139
|
+
base_blocks_[base_blocks_size_].swap(new_block);
|
140
|
+
}
|
141
|
+
ptr_ = base_blocks_[base_blocks_size_++].get();
|
142
|
+
avail_ = BASE_BLOCK_SIZE;
|
143
|
+
}
|
144
|
+
|
145
|
+
void Keyset::append_extra_block(std::size_t size) {
|
146
|
+
if (extra_blocks_size_ == extra_blocks_capacity_) {
|
147
|
+
const std::size_t new_capacity =
|
148
|
+
(extra_blocks_size_ != 0) ? (extra_blocks_size_ * 2) : 1;
|
149
|
+
scoped_array<scoped_array<char> > new_blocks(
|
150
|
+
new (std::nothrow) scoped_array<char>[new_capacity]);
|
151
|
+
MARISA_THROW_IF(new_blocks.get() == NULL, MARISA_MEMORY_ERROR);
|
152
|
+
for (std::size_t i = 0; i < extra_blocks_size_; ++i) {
|
153
|
+
extra_blocks_[i].swap(new_blocks[i]);
|
154
|
+
}
|
155
|
+
extra_blocks_.swap(new_blocks);
|
156
|
+
extra_blocks_capacity_ = new_capacity;
|
157
|
+
}
|
158
|
+
scoped_array<char> new_block(new (std::nothrow) char[size]);
|
159
|
+
MARISA_THROW_IF(new_block.get() == NULL, MARISA_MEMORY_ERROR);
|
160
|
+
extra_blocks_[extra_blocks_size_++].swap(new_block);
|
161
|
+
}
|
162
|
+
|
163
|
+
void Keyset::append_key_block() {
|
164
|
+
if (key_blocks_size_ == key_blocks_capacity_) {
|
165
|
+
const std::size_t new_capacity =
|
166
|
+
(key_blocks_size_ != 0) ? (key_blocks_size_ * 2) : 1;
|
167
|
+
scoped_array<scoped_array<Key> > new_blocks(
|
168
|
+
new (std::nothrow) scoped_array<Key>[new_capacity]);
|
169
|
+
MARISA_THROW_IF(new_blocks.get() == NULL, MARISA_MEMORY_ERROR);
|
170
|
+
for (std::size_t i = 0; i < key_blocks_size_; ++i) {
|
171
|
+
key_blocks_[i].swap(new_blocks[i]);
|
172
|
+
}
|
173
|
+
key_blocks_.swap(new_blocks);
|
174
|
+
key_blocks_capacity_ = new_capacity;
|
175
|
+
}
|
176
|
+
scoped_array<Key> new_block(new (std::nothrow) Key[KEY_BLOCK_SIZE]);
|
177
|
+
MARISA_THROW_IF(new_block.get() == NULL, MARISA_MEMORY_ERROR);
|
178
|
+
key_blocks_[key_blocks_size_++].swap(new_block);
|
179
|
+
}
|
180
|
+
|
181
|
+
} // namespace marisa
|