melisa 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. data/README.md +11 -0
  2. data/ext/marisa/bindings/marisa-swig.cxx +253 -0
  3. data/ext/marisa/bindings/marisa-swig.h +183 -0
  4. data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
  5. data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
  6. data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
  7. data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
  8. data/ext/marisa/bindings/python/marisa-swig.h +183 -0
  9. data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
  10. data/ext/marisa/bindings/ruby/extconf.rb +5 -0
  11. data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
  12. data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
  13. data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
  14. data/ext/marisa/lib/marisa.h +14 -0
  15. data/ext/marisa/lib/marisa/agent.cc +51 -0
  16. data/ext/marisa/lib/marisa/agent.h +73 -0
  17. data/ext/marisa/lib/marisa/base.h +193 -0
  18. data/ext/marisa/lib/marisa/exception.h +82 -0
  19. data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
  20. data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
  21. data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
  22. data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
  23. data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
  24. data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
  25. data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
  26. data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
  27. data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
  28. data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
  29. data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
  30. data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
  31. data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
  32. data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
  33. data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
  34. data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
  35. data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
  36. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
  37. data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
  38. data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
  39. data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
  40. data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
  41. data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
  42. data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
  43. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
  44. data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
  45. data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
  46. data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
  47. data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
  48. data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
  49. data/ext/marisa/lib/marisa/iostream.h +18 -0
  50. data/ext/marisa/lib/marisa/key.h +85 -0
  51. data/ext/marisa/lib/marisa/keyset.cc +181 -0
  52. data/ext/marisa/lib/marisa/keyset.h +80 -0
  53. data/ext/marisa/lib/marisa/query.h +71 -0
  54. data/ext/marisa/lib/marisa/scoped-array.h +48 -0
  55. data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
  56. data/ext/marisa/lib/marisa/stdio.h +15 -0
  57. data/ext/marisa/lib/marisa/trie.cc +249 -0
  58. data/ext/marisa/lib/marisa/trie.h +64 -0
  59. data/ext/marisa/tests/base-test.cc +309 -0
  60. data/ext/marisa/tests/io-test.cc +252 -0
  61. data/ext/marisa/tests/marisa-assert.h +26 -0
  62. data/ext/marisa/tests/marisa-test.cc +388 -0
  63. data/ext/marisa/tests/trie-test.cc +507 -0
  64. data/ext/marisa/tests/vector-test.cc +466 -0
  65. data/ext/marisa/tools/cmdopt.cc +298 -0
  66. data/ext/marisa/tools/cmdopt.h +58 -0
  67. data/ext/marisa/tools/marisa-benchmark.cc +418 -0
  68. data/ext/marisa/tools/marisa-build.cc +206 -0
  69. data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
  70. data/ext/marisa/tools/marisa-dump.cc +151 -0
  71. data/ext/marisa/tools/marisa-lookup.cc +110 -0
  72. data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
  73. data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
  74. data/lib/melisa.rb +7 -0
  75. data/lib/melisa/base_config_flags.rb +76 -0
  76. data/lib/melisa/bytes_trie.rb +55 -0
  77. data/lib/melisa/int_trie.rb +14 -0
  78. data/lib/melisa/search.rb +55 -0
  79. data/lib/melisa/trie.rb +96 -0
  80. data/lib/melisa/version.rb +3 -0
  81. data/melisa.gemspec +36 -0
  82. data/spec/base_config_flags_spec.rb +73 -0
  83. data/spec/bytes_trie_spec.rb +16 -0
  84. data/spec/int_trie_spec.rb +16 -0
  85. data/spec/search_spec.rb +29 -0
  86. data/spec/spec_helper.rb +1 -0
  87. data/spec/trie_spec.rb +30 -0
  88. metadata +207 -0
@@ -0,0 +1,82 @@
1
+ #ifndef MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_
2
+ #define MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_
3
+
4
+ #include "marisa/base.h"
5
+
6
+ namespace marisa {
7
+ namespace grimoire {
8
+ namespace vector {
9
+
10
+ class RankIndex {
11
+ public:
12
+ RankIndex() : abs_(0), rel_lo_(0), rel_hi_(0) {}
13
+
14
+ void set_abs(std::size_t value) {
15
+ MARISA_DEBUG_IF(value > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
16
+ abs_ = (UInt32)value;
17
+ }
18
+ void set_rel1(std::size_t value) {
19
+ MARISA_DEBUG_IF(value > 64, MARISA_RANGE_ERROR);
20
+ rel_lo_ = (UInt32)((rel_lo_ & ~0x7FU) | (value & 0x7FU));
21
+ }
22
+ void set_rel2(std::size_t value) {
23
+ MARISA_DEBUG_IF(value > 128, MARISA_RANGE_ERROR);
24
+ rel_lo_ = (UInt32)((rel_lo_ & ~(0xFFU << 7)) | ((value & 0xFFU) << 7));
25
+ }
26
+ void set_rel3(std::size_t value) {
27
+ MARISA_DEBUG_IF(value > 192, MARISA_RANGE_ERROR);
28
+ rel_lo_ = (UInt32)((rel_lo_ & ~(0xFFU << 15)) | ((value & 0xFFU) << 15));
29
+ }
30
+ void set_rel4(std::size_t value) {
31
+ MARISA_DEBUG_IF(value > 256, MARISA_RANGE_ERROR);
32
+ rel_lo_ = (UInt32)((rel_lo_ & ~(0x1FFU << 23)) | ((value & 0x1FFU) << 23));
33
+ }
34
+ void set_rel5(std::size_t value) {
35
+ MARISA_DEBUG_IF(value > 320, MARISA_RANGE_ERROR);
36
+ rel_hi_ = (UInt32)((rel_hi_ & ~0x1FFU) | (value & 0x1FFU));
37
+ }
38
+ void set_rel6(std::size_t value) {
39
+ MARISA_DEBUG_IF(value > 384, MARISA_RANGE_ERROR);
40
+ rel_hi_ = (UInt32)((rel_hi_ & ~(0x1FFU << 9)) | ((value & 0x1FFU) << 9));
41
+ }
42
+ void set_rel7(std::size_t value) {
43
+ MARISA_DEBUG_IF(value > 448, MARISA_RANGE_ERROR);
44
+ rel_hi_ = (UInt32)((rel_hi_ & ~(0x1FFU << 18)) | ((value & 0x1FFU) << 18));
45
+ }
46
+
47
+ std::size_t abs() const {
48
+ return abs_;
49
+ }
50
+ std::size_t rel1() const {
51
+ return rel_lo_ & 0x7FU;
52
+ }
53
+ std::size_t rel2() const {
54
+ return (rel_lo_ >> 7) & 0xFFU;
55
+ }
56
+ std::size_t rel3() const {
57
+ return (rel_lo_ >> 15) & 0xFFU;
58
+ }
59
+ std::size_t rel4() const {
60
+ return (rel_lo_ >> 23) & 0x1FFU;
61
+ }
62
+ std::size_t rel5() const {
63
+ return rel_hi_ & 0x1FFU;
64
+ }
65
+ std::size_t rel6() const {
66
+ return (rel_hi_ >> 9) & 0x1FFU;
67
+ }
68
+ std::size_t rel7() const {
69
+ return (rel_hi_ >> 18) & 0x1FFU;
70
+ }
71
+
72
+ private:
73
+ UInt32 abs_;
74
+ UInt32 rel_lo_;
75
+ UInt32 rel_hi_;
76
+ };
77
+
78
+ } // namespace vector
79
+ } // namespace grimoire
80
+ } // namespace marisa
81
+
82
+ #endif // MARISA_GRIMOIRE_VECTOR_RANK_INDEX_H_
@@ -0,0 +1,256 @@
1
+ #ifndef MARISA_GRIMOIRE_VECTOR_VECTOR_H_
2
+ #define MARISA_GRIMOIRE_VECTOR_VECTOR_H_
3
+
4
+ #include <new>
5
+
6
+ #include "marisa/grimoire/io.h"
7
+
8
+ namespace marisa {
9
+ namespace grimoire {
10
+ namespace vector {
11
+
12
+ template <typename T>
13
+ class Vector {
14
+ public:
15
+ Vector()
16
+ : buf_(), objs_(NULL), const_objs_(NULL),
17
+ size_(0), capacity_(0), fixed_(false) {}
18
+ ~Vector() {
19
+ if (objs_ != NULL) {
20
+ for (std::size_t i = 0; i < size_; ++i) {
21
+ objs_[i].~T();
22
+ }
23
+ }
24
+ }
25
+
26
+ void map(Mapper &mapper) {
27
+ Vector temp;
28
+ temp.map_(mapper);
29
+ swap(temp);
30
+ }
31
+
32
+ void read(Reader &reader) {
33
+ Vector temp;
34
+ temp.read_(reader);
35
+ swap(temp);
36
+ }
37
+
38
+ void write(Writer &writer) const {
39
+ write_(writer);
40
+ }
41
+
42
+ void push_back(const T &x) {
43
+ MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
44
+ MARISA_DEBUG_IF(size_ == max_size(), MARISA_SIZE_ERROR);
45
+ reserve(size_ + 1);
46
+ new (&objs_[size_]) T(x);
47
+ ++size_;
48
+ }
49
+
50
+ void pop_back() {
51
+ MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
52
+ MARISA_DEBUG_IF(size_ == 0, MARISA_STATE_ERROR);
53
+ objs_[--size_].~T();
54
+ }
55
+
56
+ // resize() assumes that T's placement new does not throw an exception.
57
+ void resize(std::size_t size) {
58
+ MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
59
+ reserve(size);
60
+ for (std::size_t i = size_; i < size; ++i) {
61
+ new (&objs_[i]) T;
62
+ }
63
+ for (std::size_t i = size; i < size_; ++i) {
64
+ objs_[i].~T();
65
+ }
66
+ size_ = size;
67
+ }
68
+
69
+ // resize() assumes that T's placement new does not throw an exception.
70
+ void resize(std::size_t size, const T &x) {
71
+ MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
72
+ reserve(size);
73
+ for (std::size_t i = size_; i < size; ++i) {
74
+ new (&objs_[i]) T(x);
75
+ }
76
+ for (std::size_t i = size; i < size_; ++i) {
77
+ objs_[i].~T();
78
+ }
79
+ size_ = size;
80
+ }
81
+
82
+ void reserve(std::size_t capacity) {
83
+ MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
84
+ if (capacity <= capacity_) {
85
+ return;
86
+ }
87
+ MARISA_DEBUG_IF(capacity > max_size(), MARISA_SIZE_ERROR);
88
+ std::size_t new_capacity = capacity;
89
+ if (capacity_ > (capacity / 2)) {
90
+ if (capacity_ > (max_size() / 2)) {
91
+ new_capacity = max_size();
92
+ } else {
93
+ new_capacity = capacity_ * 2;
94
+ }
95
+ }
96
+ realloc(new_capacity);
97
+ }
98
+
99
+ void shrink() {
100
+ MARISA_THROW_IF(fixed_, MARISA_STATE_ERROR);
101
+ if (size_ != capacity_) {
102
+ realloc(size_);
103
+ }
104
+ }
105
+
106
+ void fix() {
107
+ MARISA_THROW_IF(fixed_, MARISA_STATE_ERROR);
108
+ fixed_ = true;
109
+ }
110
+
111
+ const T *begin() const {
112
+ return const_objs_;
113
+ }
114
+ const T *end() const {
115
+ return const_objs_ + size_;
116
+ }
117
+ const T &operator[](std::size_t i) const {
118
+ MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
119
+ return const_objs_[i];
120
+ }
121
+ const T &front() const {
122
+ MARISA_DEBUG_IF(size_ == 0, MARISA_STATE_ERROR);
123
+ return const_objs_[0];
124
+ }
125
+ const T &back() const {
126
+ MARISA_DEBUG_IF(size_ == 0, MARISA_STATE_ERROR);
127
+ return const_objs_[size_ - 1];
128
+ }
129
+
130
+ T *begin() {
131
+ MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
132
+ return objs_;
133
+ }
134
+ T *end() {
135
+ MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
136
+ return objs_ + size_;
137
+ }
138
+ T &operator[](std::size_t i) {
139
+ MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
140
+ MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
141
+ return objs_[i];
142
+ }
143
+ T &front() {
144
+ MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
145
+ MARISA_DEBUG_IF(size_ == 0, MARISA_STATE_ERROR);
146
+ return objs_[0];
147
+ }
148
+ T &back() {
149
+ MARISA_DEBUG_IF(fixed_, MARISA_STATE_ERROR);
150
+ MARISA_DEBUG_IF(size_ == 0, MARISA_STATE_ERROR);
151
+ return objs_[size_ - 1];
152
+ }
153
+
154
+ std::size_t size() const {
155
+ return size_;
156
+ }
157
+ std::size_t capacity() const {
158
+ return capacity_;
159
+ }
160
+ bool fixed() const {
161
+ return fixed_;
162
+ }
163
+
164
+ bool empty() const {
165
+ return size_ == 0;
166
+ }
167
+ std::size_t total_size() const {
168
+ return sizeof(T) * size_;
169
+ }
170
+ std::size_t io_size() const {
171
+ return sizeof(UInt64) + ((total_size() + 7) & ~(std::size_t)0x07);
172
+ }
173
+
174
+ void clear() {
175
+ Vector().swap(*this);
176
+ }
177
+ void swap(Vector &rhs) {
178
+ buf_.swap(rhs.buf_);
179
+ marisa::swap(objs_, rhs.objs_);
180
+ marisa::swap(const_objs_, rhs.const_objs_);
181
+ marisa::swap(size_, rhs.size_);
182
+ marisa::swap(capacity_, rhs.capacity_);
183
+ marisa::swap(fixed_, rhs.fixed_);
184
+ }
185
+
186
+ static std::size_t max_size() {
187
+ return MARISA_SIZE_MAX / sizeof(T);
188
+ }
189
+
190
+ private:
191
+ scoped_array<char> buf_;
192
+ T *objs_;
193
+ const T *const_objs_;
194
+ std::size_t size_;
195
+ std::size_t capacity_;
196
+ bool fixed_;
197
+
198
+ void map_(Mapper &mapper) {
199
+ UInt64 total_size;
200
+ mapper.map(&total_size);
201
+ MARISA_THROW_IF(total_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
202
+ MARISA_THROW_IF((total_size % sizeof(T)) != 0, MARISA_FORMAT_ERROR);
203
+ const std::size_t size = (std::size_t)(total_size / sizeof(T));
204
+ mapper.map(&const_objs_, size);
205
+ mapper.seek((std::size_t)((8 - (total_size % 8)) % 8));
206
+ size_ = size;
207
+ fix();
208
+ }
209
+ void read_(Reader &reader) {
210
+ UInt64 total_size;
211
+ reader.read(&total_size);
212
+ MARISA_THROW_IF(total_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
213
+ MARISA_THROW_IF((total_size % sizeof(T)) != 0, MARISA_FORMAT_ERROR);
214
+ const std::size_t size = (std::size_t)(total_size / sizeof(T));
215
+ resize(size);
216
+ reader.read(objs_, size);
217
+ reader.seek((std::size_t)((8 - (total_size % 8)) % 8));
218
+ }
219
+ void write_(Writer &writer) const {
220
+ writer.write((UInt64)total_size());
221
+ writer.write(const_objs_, size_);
222
+ writer.seek((8 - (total_size() % 8)) % 8);
223
+ }
224
+
225
+ // realloc() assumes that T's placement new does not throw an exception.
226
+ void realloc(std::size_t new_capacity) {
227
+ MARISA_DEBUG_IF(new_capacity > max_size(), MARISA_SIZE_ERROR);
228
+
229
+ scoped_array<char> new_buf(
230
+ new (std::nothrow) char[sizeof(T) * new_capacity]);
231
+ MARISA_DEBUG_IF(new_buf.get() == NULL, MARISA_MEMORY_ERROR);
232
+ T *new_objs = reinterpret_cast<T *>(new_buf.get());
233
+
234
+ for (std::size_t i = 0; i < size_; ++i) {
235
+ new (&new_objs[i]) T(objs_[i]);
236
+ }
237
+ for (std::size_t i = 0; i < size_; ++i) {
238
+ objs_[i].~T();
239
+ }
240
+
241
+ buf_.swap(new_buf);
242
+ objs_ = new_objs;
243
+ const_objs_ = new_objs;
244
+ capacity_ = new_capacity;
245
+ }
246
+
247
+ // Disallows copy and assignment.
248
+ Vector(const Vector &);
249
+ Vector &operator=(const Vector &);
250
+ };
251
+
252
+ } // namespace vector
253
+ } // namespace grimoire
254
+ } // namespace marisa
255
+
256
+ #endif // MARISA_GRIMOIRE_VECTOR_VECTOR_H_
@@ -0,0 +1,18 @@
1
+ #ifndef MARISA_IOSTREAM_H_
2
+ #define MARISA_IOSTREAM_H_
3
+
4
+ #include <iosfwd>
5
+
6
+ namespace marisa {
7
+
8
+ class Trie;
9
+
10
+ std::istream &read(std::istream &stream, Trie *trie);
11
+ std::ostream &write(std::ostream &stream, const Trie &trie);
12
+
13
+ std::istream &operator>>(std::istream &stream, Trie &trie);
14
+ std::ostream &operator<<(std::ostream &stream, const Trie &trie);
15
+
16
+ } // namespace marisa
17
+
18
+ #endif // MARISA_IOSTREAM_H_
@@ -0,0 +1,85 @@
1
+ #ifndef MARISA_KEY_H_
2
+ #define MARISA_KEY_H_
3
+
4
+ #include "marisa/base.h"
5
+
6
+ namespace marisa {
7
+
8
+ class Key {
9
+ public:
10
+ Key() : ptr_(NULL), length_(0), union_() {
11
+ union_.id = 0;
12
+ }
13
+ Key(const Key &key)
14
+ : ptr_(key.ptr_), length_(key.length_), union_(key.union_) {}
15
+
16
+ Key &operator=(const Key &key) {
17
+ ptr_ = key.ptr_;
18
+ length_ = key.length_;
19
+ union_ = key.union_;
20
+ return *this;
21
+ }
22
+
23
+ char operator[](std::size_t i) const {
24
+ MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR);
25
+ return ptr_[i];
26
+ }
27
+
28
+ void set_str(const char *str) {
29
+ MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR);
30
+ std::size_t length = 0;
31
+ while (str[length] != '\0') {
32
+ ++length;
33
+ }
34
+ MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
35
+ ptr_ = str;
36
+ length_ = (UInt32)length;
37
+ }
38
+ void set_str(const char *ptr, std::size_t length) {
39
+ MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR);
40
+ MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
41
+ ptr_ = ptr;
42
+ length_ = (UInt32)length;
43
+ }
44
+ void set_id(std::size_t id) {
45
+ MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
46
+ union_.id = (UInt32)id;
47
+ }
48
+ void set_weight(float weight) {
49
+ union_.weight = weight;
50
+ }
51
+
52
+ const char *ptr() const {
53
+ return ptr_;
54
+ }
55
+ std::size_t length() const {
56
+ return length_;
57
+ }
58
+ std::size_t id() const {
59
+ return union_.id;
60
+ }
61
+ float weight() const {
62
+ return union_.weight;
63
+ }
64
+
65
+ void clear() {
66
+ Key().swap(*this);
67
+ }
68
+ void swap(Key &rhs) {
69
+ marisa::swap(ptr_, rhs.ptr_);
70
+ marisa::swap(length_, rhs.length_);
71
+ marisa::swap(union_.id, rhs.union_.id);
72
+ }
73
+
74
+ private:
75
+ const char *ptr_;
76
+ UInt32 length_;
77
+ union Union {
78
+ UInt32 id;
79
+ float weight;
80
+ } union_;
81
+ };
82
+
83
+ } // namespace marisa
84
+
85
+ #endif // MARISA_KEY_H_
@@ -0,0 +1,181 @@
1
+ #include <new>
2
+
3
+ #include "marisa/keyset.h"
4
+
5
+ namespace marisa {
6
+
7
+ Keyset::Keyset()
8
+ : base_blocks_(), base_blocks_size_(0), base_blocks_capacity_(0),
9
+ extra_blocks_(), extra_blocks_size_(0), extra_blocks_capacity_(0),
10
+ key_blocks_(), key_blocks_size_(0), key_blocks_capacity_(0),
11
+ ptr_(NULL), avail_(0), size_(0), total_length_(0) {}
12
+
13
+ void Keyset::push_back(const Key &key) {
14
+ MARISA_DEBUG_IF(size_ == MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
15
+
16
+ char * const key_ptr = reserve(key.length());
17
+ for (std::size_t i = 0; i < key.length(); ++i) {
18
+ key_ptr[i] = key[i];
19
+ }
20
+
21
+ Key &new_key = key_blocks_[size_ / KEY_BLOCK_SIZE][size_ % KEY_BLOCK_SIZE];
22
+ new_key.set_str(key_ptr, key.length());
23
+ new_key.set_id(key.id());
24
+ ++size_;
25
+ total_length_ += new_key.length();
26
+ }
27
+
28
+ void Keyset::push_back(const Key &key, char end_marker) {
29
+ MARISA_DEBUG_IF(size_ == MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
30
+
31
+ if ((size_ / KEY_BLOCK_SIZE) == key_blocks_size_) {
32
+ append_key_block();
33
+ }
34
+
35
+ char * const key_ptr = reserve(key.length() + 1);
36
+ for (std::size_t i = 0; i < key.length(); ++i) {
37
+ key_ptr[i] = key[i];
38
+ }
39
+ key_ptr[key.length()] = end_marker;
40
+
41
+ Key &new_key = key_blocks_[size_ / KEY_BLOCK_SIZE][size_ % KEY_BLOCK_SIZE];
42
+ new_key.set_str(key_ptr, key.length());
43
+ new_key.set_id(key.id());
44
+ ++size_;
45
+ total_length_ += new_key.length();
46
+ }
47
+
48
+ void Keyset::push_back(const char *str) {
49
+ MARISA_DEBUG_IF(size_ == MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
50
+ MARISA_THROW_IF(str == NULL, MARISA_NULL_ERROR);
51
+
52
+ std::size_t length = 0;
53
+ while (str[length] != '\0') {
54
+ ++length;
55
+ }
56
+ push_back(str, length);
57
+ }
58
+
59
+ void Keyset::push_back(const char *ptr, std::size_t length, float weight) {
60
+ MARISA_DEBUG_IF(size_ == MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
61
+ MARISA_THROW_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR);
62
+ MARISA_THROW_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
63
+
64
+ char * const key_ptr = reserve(length);
65
+ for (std::size_t i = 0; i < length; ++i) {
66
+ key_ptr[i] = ptr[i];
67
+ }
68
+
69
+ Key &key = key_blocks_[size_ / KEY_BLOCK_SIZE][size_ % KEY_BLOCK_SIZE];
70
+ key.set_str(key_ptr, length);
71
+ key.set_weight(weight);
72
+ ++size_;
73
+ total_length_ += length;
74
+ }
75
+
76
+ void Keyset::reset() {
77
+ base_blocks_size_ = 0;
78
+ extra_blocks_size_ = 0;
79
+ ptr_ = NULL;
80
+ avail_ = 0;
81
+ size_ = 0;
82
+ total_length_ = 0;
83
+ }
84
+
85
+ void Keyset::clear() {
86
+ Keyset().swap(*this);
87
+ }
88
+
89
+ void Keyset::swap(Keyset &rhs) {
90
+ base_blocks_.swap(rhs.base_blocks_);
91
+ marisa::swap(base_blocks_size_, rhs.base_blocks_size_);
92
+ marisa::swap(base_blocks_capacity_, rhs.base_blocks_capacity_);
93
+ extra_blocks_.swap(rhs.extra_blocks_);
94
+ marisa::swap(extra_blocks_size_, rhs.extra_blocks_size_);
95
+ marisa::swap(extra_blocks_capacity_, rhs.extra_blocks_capacity_);
96
+ key_blocks_.swap(rhs.key_blocks_);
97
+ marisa::swap(key_blocks_size_, rhs.key_blocks_size_);
98
+ marisa::swap(key_blocks_capacity_, rhs.key_blocks_capacity_);
99
+ marisa::swap(ptr_, rhs.ptr_);
100
+ marisa::swap(avail_, rhs.avail_);
101
+ marisa::swap(size_, rhs.size_);
102
+ marisa::swap(total_length_, rhs.total_length_);
103
+ }
104
+
105
+ char *Keyset::reserve(std::size_t size) {
106
+ if ((size_ / KEY_BLOCK_SIZE) == key_blocks_size_) {
107
+ append_key_block();
108
+ }
109
+
110
+ if (size > EXTRA_BLOCK_SIZE) {
111
+ append_extra_block(size);
112
+ return extra_blocks_[extra_blocks_size_ - 1].get();
113
+ } else {
114
+ if (size > avail_) {
115
+ append_base_block();
116
+ }
117
+ ptr_ += size;
118
+ avail_ -= size;
119
+ return ptr_ - size;
120
+ }
121
+ }
122
+
123
+ void Keyset::append_base_block() {
124
+ if (base_blocks_size_ == base_blocks_capacity_) {
125
+ const std::size_t new_capacity =
126
+ (base_blocks_size_ != 0) ? (base_blocks_size_ * 2) : 1;
127
+ scoped_array<scoped_array<char> > new_blocks(
128
+ new (std::nothrow) scoped_array<char>[new_capacity]);
129
+ MARISA_THROW_IF(new_blocks.get() == NULL, MARISA_MEMORY_ERROR);
130
+ for (std::size_t i = 0; i < base_blocks_size_; ++i) {
131
+ base_blocks_[i].swap(new_blocks[i]);
132
+ }
133
+ base_blocks_.swap(new_blocks);
134
+ base_blocks_capacity_ = new_capacity;
135
+ }
136
+ if (base_blocks_[base_blocks_size_].get() == NULL) {
137
+ scoped_array<char> new_block(new (std::nothrow) char[BASE_BLOCK_SIZE]);
138
+ MARISA_THROW_IF(new_block.get() == NULL, MARISA_MEMORY_ERROR);
139
+ base_blocks_[base_blocks_size_].swap(new_block);
140
+ }
141
+ ptr_ = base_blocks_[base_blocks_size_++].get();
142
+ avail_ = BASE_BLOCK_SIZE;
143
+ }
144
+
145
+ void Keyset::append_extra_block(std::size_t size) {
146
+ if (extra_blocks_size_ == extra_blocks_capacity_) {
147
+ const std::size_t new_capacity =
148
+ (extra_blocks_size_ != 0) ? (extra_blocks_size_ * 2) : 1;
149
+ scoped_array<scoped_array<char> > new_blocks(
150
+ new (std::nothrow) scoped_array<char>[new_capacity]);
151
+ MARISA_THROW_IF(new_blocks.get() == NULL, MARISA_MEMORY_ERROR);
152
+ for (std::size_t i = 0; i < extra_blocks_size_; ++i) {
153
+ extra_blocks_[i].swap(new_blocks[i]);
154
+ }
155
+ extra_blocks_.swap(new_blocks);
156
+ extra_blocks_capacity_ = new_capacity;
157
+ }
158
+ scoped_array<char> new_block(new (std::nothrow) char[size]);
159
+ MARISA_THROW_IF(new_block.get() == NULL, MARISA_MEMORY_ERROR);
160
+ extra_blocks_[extra_blocks_size_++].swap(new_block);
161
+ }
162
+
163
+ void Keyset::append_key_block() {
164
+ if (key_blocks_size_ == key_blocks_capacity_) {
165
+ const std::size_t new_capacity =
166
+ (key_blocks_size_ != 0) ? (key_blocks_size_ * 2) : 1;
167
+ scoped_array<scoped_array<Key> > new_blocks(
168
+ new (std::nothrow) scoped_array<Key>[new_capacity]);
169
+ MARISA_THROW_IF(new_blocks.get() == NULL, MARISA_MEMORY_ERROR);
170
+ for (std::size_t i = 0; i < key_blocks_size_; ++i) {
171
+ key_blocks_[i].swap(new_blocks[i]);
172
+ }
173
+ key_blocks_.swap(new_blocks);
174
+ key_blocks_capacity_ = new_capacity;
175
+ }
176
+ scoped_array<Key> new_block(new (std::nothrow) Key[KEY_BLOCK_SIZE]);
177
+ MARISA_THROW_IF(new_block.get() == NULL, MARISA_MEMORY_ERROR);
178
+ key_blocks_[key_blocks_size_++].swap(new_block);
179
+ }
180
+
181
+ } // namespace marisa