melisa 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -0
- data/ext/marisa/bindings/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
- data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/python/marisa-swig.h +183 -0
- data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
- data/ext/marisa/bindings/ruby/extconf.rb +5 -0
- data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
- data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
- data/ext/marisa/lib/marisa.h +14 -0
- data/ext/marisa/lib/marisa/agent.cc +51 -0
- data/ext/marisa/lib/marisa/agent.h +73 -0
- data/ext/marisa/lib/marisa/base.h +193 -0
- data/ext/marisa/lib/marisa/exception.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
- data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
- data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
- data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
- data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
- data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
- data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
- data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
- data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
- data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
- data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
- data/ext/marisa/lib/marisa/iostream.h +18 -0
- data/ext/marisa/lib/marisa/key.h +85 -0
- data/ext/marisa/lib/marisa/keyset.cc +181 -0
- data/ext/marisa/lib/marisa/keyset.h +80 -0
- data/ext/marisa/lib/marisa/query.h +71 -0
- data/ext/marisa/lib/marisa/scoped-array.h +48 -0
- data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
- data/ext/marisa/lib/marisa/stdio.h +15 -0
- data/ext/marisa/lib/marisa/trie.cc +249 -0
- data/ext/marisa/lib/marisa/trie.h +64 -0
- data/ext/marisa/tests/base-test.cc +309 -0
- data/ext/marisa/tests/io-test.cc +252 -0
- data/ext/marisa/tests/marisa-assert.h +26 -0
- data/ext/marisa/tests/marisa-test.cc +388 -0
- data/ext/marisa/tests/trie-test.cc +507 -0
- data/ext/marisa/tests/vector-test.cc +466 -0
- data/ext/marisa/tools/cmdopt.cc +298 -0
- data/ext/marisa/tools/cmdopt.h +58 -0
- data/ext/marisa/tools/marisa-benchmark.cc +418 -0
- data/ext/marisa/tools/marisa-build.cc +206 -0
- data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
- data/ext/marisa/tools/marisa-dump.cc +151 -0
- data/ext/marisa/tools/marisa-lookup.cc +110 -0
- data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
- data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
- data/lib/melisa.rb +7 -0
- data/lib/melisa/base_config_flags.rb +76 -0
- data/lib/melisa/bytes_trie.rb +55 -0
- data/lib/melisa/int_trie.rb +14 -0
- data/lib/melisa/search.rb +55 -0
- data/lib/melisa/trie.rb +96 -0
- data/lib/melisa/version.rb +3 -0
- data/melisa.gemspec +36 -0
- data/spec/base_config_flags_spec.rb +73 -0
- data/spec/bytes_trie_spec.rb +16 -0
- data/spec/int_trie_spec.rb +16 -0
- data/spec/search_spec.rb +29 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/trie_spec.rb +30 -0
- metadata +207 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_VECTOR_BIT_VECTOR_H_
|
|
2
|
+
#define MARISA_GRIMOIRE_VECTOR_BIT_VECTOR_H_
|
|
3
|
+
|
|
4
|
+
#include "marisa/grimoire/vector/rank-index.h"
|
|
5
|
+
#include "marisa/grimoire/vector/vector.h"
|
|
6
|
+
|
|
7
|
+
namespace marisa {
|
|
8
|
+
namespace grimoire {
|
|
9
|
+
namespace vector {
|
|
10
|
+
|
|
11
|
+
class BitVector {
|
|
12
|
+
public:
|
|
13
|
+
#if MARISA_WORD_SIZE == 64
|
|
14
|
+
typedef UInt64 Unit;
|
|
15
|
+
#else // MARISA_WORD_SIZE == 64
|
|
16
|
+
typedef UInt32 Unit;
|
|
17
|
+
#endif // MARISA_WORD_SIZE == 64
|
|
18
|
+
|
|
19
|
+
BitVector()
|
|
20
|
+
: units_(), size_(0), num_1s_(0), ranks_(), select0s_(), select1s_() {}
|
|
21
|
+
|
|
22
|
+
void build(bool enables_select0, bool enables_select1) {
|
|
23
|
+
BitVector temp;
|
|
24
|
+
temp.build_index(*this, enables_select0, enables_select1);
|
|
25
|
+
units_.shrink();
|
|
26
|
+
temp.units_.swap(units_);
|
|
27
|
+
swap(temp);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
void map(Mapper &mapper) {
|
|
31
|
+
BitVector temp;
|
|
32
|
+
temp.map_(mapper);
|
|
33
|
+
swap(temp);
|
|
34
|
+
}
|
|
35
|
+
void read(Reader &reader) {
|
|
36
|
+
BitVector temp;
|
|
37
|
+
temp.read_(reader);
|
|
38
|
+
swap(temp);
|
|
39
|
+
}
|
|
40
|
+
void write(Writer &writer) const {
|
|
41
|
+
write_(writer);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
void disable_select0() {
|
|
45
|
+
select0s_.clear();
|
|
46
|
+
}
|
|
47
|
+
void disable_select1() {
|
|
48
|
+
select1s_.clear();
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
void push_back(bool bit) {
|
|
52
|
+
MARISA_THROW_IF(size_ == MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
|
53
|
+
if (size_ == (MARISA_WORD_SIZE * units_.size())) {
|
|
54
|
+
units_.resize(units_.size() + (64 / MARISA_WORD_SIZE), 0);
|
|
55
|
+
}
|
|
56
|
+
if (bit) {
|
|
57
|
+
units_[size_ / MARISA_WORD_SIZE] |=
|
|
58
|
+
(Unit)1 << (size_ % MARISA_WORD_SIZE);
|
|
59
|
+
++num_1s_;
|
|
60
|
+
}
|
|
61
|
+
++size_;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
bool operator[](std::size_t i) const {
|
|
65
|
+
MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
|
|
66
|
+
return (units_[i / MARISA_WORD_SIZE]
|
|
67
|
+
& ((Unit)1 << (i % MARISA_WORD_SIZE))) != 0;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
std::size_t rank0(std::size_t i) const {
|
|
71
|
+
MARISA_DEBUG_IF(ranks_.empty(), MARISA_STATE_ERROR);
|
|
72
|
+
MARISA_DEBUG_IF(i > size_, MARISA_BOUND_ERROR);
|
|
73
|
+
return i - rank1(i);
|
|
74
|
+
}
|
|
75
|
+
std::size_t rank1(std::size_t i) const;
|
|
76
|
+
|
|
77
|
+
std::size_t select0(std::size_t i) const;
|
|
78
|
+
std::size_t select1(std::size_t i) const;
|
|
79
|
+
|
|
80
|
+
std::size_t num_0s() const {
|
|
81
|
+
return size_ - num_1s_;
|
|
82
|
+
}
|
|
83
|
+
std::size_t num_1s() const {
|
|
84
|
+
return num_1s_;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
bool empty() const {
|
|
88
|
+
return size_ == 0;
|
|
89
|
+
}
|
|
90
|
+
std::size_t size() const {
|
|
91
|
+
return size_;
|
|
92
|
+
}
|
|
93
|
+
std::size_t total_size() const {
|
|
94
|
+
return units_.total_size() + ranks_.total_size()
|
|
95
|
+
+ select0s_.total_size() + select1s_.total_size();
|
|
96
|
+
}
|
|
97
|
+
std::size_t io_size() const {
|
|
98
|
+
return units_.io_size() + (sizeof(UInt32) * 2) + ranks_.io_size()
|
|
99
|
+
+ select0s_.io_size() + select1s_.io_size();
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
void clear() {
|
|
103
|
+
BitVector().swap(*this);
|
|
104
|
+
}
|
|
105
|
+
void swap(BitVector &rhs) {
|
|
106
|
+
units_.swap(rhs.units_);
|
|
107
|
+
marisa::swap(size_, rhs.size_);
|
|
108
|
+
marisa::swap(num_1s_, rhs.num_1s_);
|
|
109
|
+
ranks_.swap(rhs.ranks_);
|
|
110
|
+
select0s_.swap(rhs.select0s_);
|
|
111
|
+
select1s_.swap(rhs.select1s_);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
private:
|
|
115
|
+
Vector<Unit> units_;
|
|
116
|
+
std::size_t size_;
|
|
117
|
+
std::size_t num_1s_;
|
|
118
|
+
Vector<RankIndex> ranks_;
|
|
119
|
+
Vector<UInt32> select0s_;
|
|
120
|
+
Vector<UInt32> select1s_;
|
|
121
|
+
|
|
122
|
+
void build_index(const BitVector &bv,
|
|
123
|
+
bool enables_select0, bool enables_select1);
|
|
124
|
+
|
|
125
|
+
void map_(Mapper &mapper) {
|
|
126
|
+
units_.map(mapper);
|
|
127
|
+
{
|
|
128
|
+
UInt32 temp_size;
|
|
129
|
+
mapper.map(&temp_size);
|
|
130
|
+
size_ = temp_size;
|
|
131
|
+
}
|
|
132
|
+
{
|
|
133
|
+
UInt32 temp_num_1s;
|
|
134
|
+
mapper.map(&temp_num_1s);
|
|
135
|
+
MARISA_THROW_IF(temp_num_1s > size_, MARISA_FORMAT_ERROR);
|
|
136
|
+
num_1s_ = temp_num_1s;
|
|
137
|
+
}
|
|
138
|
+
ranks_.map(mapper);
|
|
139
|
+
select0s_.map(mapper);
|
|
140
|
+
select1s_.map(mapper);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
void read_(Reader &reader) {
|
|
144
|
+
units_.read(reader);
|
|
145
|
+
{
|
|
146
|
+
UInt32 temp_size;
|
|
147
|
+
reader.read(&temp_size);
|
|
148
|
+
size_ = temp_size;
|
|
149
|
+
}
|
|
150
|
+
{
|
|
151
|
+
UInt32 temp_num_1s;
|
|
152
|
+
reader.read(&temp_num_1s);
|
|
153
|
+
MARISA_THROW_IF(temp_num_1s > size_, MARISA_FORMAT_ERROR);
|
|
154
|
+
num_1s_ = temp_num_1s;
|
|
155
|
+
}
|
|
156
|
+
ranks_.read(reader);
|
|
157
|
+
select0s_.read(reader);
|
|
158
|
+
select1s_.read(reader);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
void write_(Writer &writer) const {
|
|
162
|
+
units_.write(writer);
|
|
163
|
+
writer.write((UInt32)size_);
|
|
164
|
+
writer.write((UInt32)num_1s_);
|
|
165
|
+
ranks_.write(writer);
|
|
166
|
+
select0s_.write(writer);
|
|
167
|
+
select1s_.write(writer);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Disallows copy and assignment.
|
|
171
|
+
BitVector(const BitVector &);
|
|
172
|
+
BitVector &operator=(const BitVector &);
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
} // namespace vector
|
|
176
|
+
} // namespace grimoire
|
|
177
|
+
} // namespace marisa
|
|
178
|
+
|
|
179
|
+
#endif // MARISA_GRIMOIRE_VECTOR_BIT_VECTOR_H_
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_VECTOR_FLAT_VECTOR_H_
|
|
2
|
+
#define MARISA_GRIMOIRE_VECTOR_FLAT_VECTOR_H_
|
|
3
|
+
|
|
4
|
+
#include "marisa/grimoire/vector/vector.h"
|
|
5
|
+
|
|
6
|
+
namespace marisa {
|
|
7
|
+
namespace grimoire {
|
|
8
|
+
namespace vector {
|
|
9
|
+
|
|
10
|
+
class FlatVector {
|
|
11
|
+
public:
|
|
12
|
+
#if MARISA_WORD_SIZE == 64
|
|
13
|
+
typedef UInt64 Unit;
|
|
14
|
+
#else // MARISA_WORD_SIZE == 64
|
|
15
|
+
typedef UInt32 Unit;
|
|
16
|
+
#endif // MARISA_WORD_SIZE == 64
|
|
17
|
+
|
|
18
|
+
FlatVector() : units_(), value_size_(0), mask_(0), size_(0) {}
|
|
19
|
+
|
|
20
|
+
void build(const Vector<UInt32> &values) {
|
|
21
|
+
FlatVector temp;
|
|
22
|
+
temp.build_(values);
|
|
23
|
+
swap(temp);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
void map(Mapper &mapper) {
|
|
27
|
+
FlatVector temp;
|
|
28
|
+
temp.map_(mapper);
|
|
29
|
+
swap(temp);
|
|
30
|
+
}
|
|
31
|
+
void read(Reader &reader) {
|
|
32
|
+
FlatVector temp;
|
|
33
|
+
temp.read_(reader);
|
|
34
|
+
swap(temp);
|
|
35
|
+
}
|
|
36
|
+
void write(Writer &writer) const {
|
|
37
|
+
write_(writer);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
UInt32 operator[](std::size_t i) const {
|
|
41
|
+
MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
|
|
42
|
+
|
|
43
|
+
const std::size_t pos = i * value_size_;
|
|
44
|
+
const std::size_t unit_id = pos / MARISA_WORD_SIZE;
|
|
45
|
+
const std::size_t unit_offset = pos % MARISA_WORD_SIZE;
|
|
46
|
+
|
|
47
|
+
if ((unit_offset + value_size_) <= MARISA_WORD_SIZE) {
|
|
48
|
+
return (UInt32)(units_[unit_id] >> unit_offset) & mask_;
|
|
49
|
+
} else {
|
|
50
|
+
return (UInt32)((units_[unit_id] >> unit_offset)
|
|
51
|
+
| (units_[unit_id + 1] << (MARISA_WORD_SIZE - unit_offset))) & mask_;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
std::size_t value_size() const {
|
|
56
|
+
return value_size_;
|
|
57
|
+
}
|
|
58
|
+
UInt32 mask() const {
|
|
59
|
+
return mask_;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
bool empty() const {
|
|
63
|
+
return size_ == 0;
|
|
64
|
+
}
|
|
65
|
+
std::size_t size() const {
|
|
66
|
+
return size_;
|
|
67
|
+
}
|
|
68
|
+
std::size_t total_size() const {
|
|
69
|
+
return units_.total_size();
|
|
70
|
+
}
|
|
71
|
+
std::size_t io_size() const {
|
|
72
|
+
return units_.io_size() + (sizeof(UInt32) * 2) + sizeof(UInt64);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
void clear() {
|
|
76
|
+
FlatVector().swap(*this);
|
|
77
|
+
}
|
|
78
|
+
void swap(FlatVector &rhs) {
|
|
79
|
+
units_.swap(rhs.units_);
|
|
80
|
+
marisa::swap(value_size_, rhs.value_size_);
|
|
81
|
+
marisa::swap(mask_, rhs.mask_);
|
|
82
|
+
marisa::swap(size_, rhs.size_);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
private:
|
|
86
|
+
Vector<Unit> units_;
|
|
87
|
+
std::size_t value_size_;
|
|
88
|
+
UInt32 mask_;
|
|
89
|
+
std::size_t size_;
|
|
90
|
+
|
|
91
|
+
void build_(const Vector<UInt32> &values) {
|
|
92
|
+
UInt32 max_value = 0;
|
|
93
|
+
for (std::size_t i = 0; i < values.size(); ++i) {
|
|
94
|
+
if (values[i] > max_value) {
|
|
95
|
+
max_value = values[i];
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
std::size_t value_size = 0;
|
|
100
|
+
while (max_value != 0) {
|
|
101
|
+
++value_size;
|
|
102
|
+
max_value >>= 1;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
std::size_t num_units = values.empty() ? 0 : (64 / MARISA_WORD_SIZE);
|
|
106
|
+
if (value_size != 0) {
|
|
107
|
+
num_units = (std::size_t)(
|
|
108
|
+
(((UInt64)value_size * values.size()) + (MARISA_WORD_SIZE - 1))
|
|
109
|
+
/ MARISA_WORD_SIZE);
|
|
110
|
+
num_units += num_units % (64 / MARISA_WORD_SIZE);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
units_.resize(num_units);
|
|
114
|
+
if (num_units > 0) {
|
|
115
|
+
units_.back() = 0;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
value_size_ = value_size;
|
|
119
|
+
if (value_size != 0) {
|
|
120
|
+
mask_ = MARISA_UINT32_MAX >> (32 - value_size);
|
|
121
|
+
}
|
|
122
|
+
size_ = values.size();
|
|
123
|
+
|
|
124
|
+
for (std::size_t i = 0; i < values.size(); ++i) {
|
|
125
|
+
set(i, values[i]);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
void map_(Mapper &mapper) {
|
|
130
|
+
units_.map(mapper);
|
|
131
|
+
{
|
|
132
|
+
UInt32 temp_value_size;
|
|
133
|
+
mapper.map(&temp_value_size);
|
|
134
|
+
MARISA_THROW_IF(temp_value_size > 32, MARISA_FORMAT_ERROR);
|
|
135
|
+
value_size_ = temp_value_size;
|
|
136
|
+
}
|
|
137
|
+
{
|
|
138
|
+
UInt32 temp_mask;
|
|
139
|
+
mapper.map(&temp_mask);
|
|
140
|
+
mask_ = temp_mask;
|
|
141
|
+
}
|
|
142
|
+
{
|
|
143
|
+
UInt64 temp_size;
|
|
144
|
+
mapper.map(&temp_size);
|
|
145
|
+
MARISA_THROW_IF(temp_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
|
|
146
|
+
size_ = (std::size_t)temp_size;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
void read_(Reader &reader) {
|
|
151
|
+
units_.read(reader);
|
|
152
|
+
{
|
|
153
|
+
UInt32 temp_value_size;
|
|
154
|
+
reader.read(&temp_value_size);
|
|
155
|
+
MARISA_THROW_IF(temp_value_size > 32, MARISA_FORMAT_ERROR);
|
|
156
|
+
value_size_ = temp_value_size;
|
|
157
|
+
}
|
|
158
|
+
{
|
|
159
|
+
UInt32 temp_mask;
|
|
160
|
+
reader.read(&temp_mask);
|
|
161
|
+
mask_ = temp_mask;
|
|
162
|
+
}
|
|
163
|
+
{
|
|
164
|
+
UInt64 temp_size;
|
|
165
|
+
reader.read(&temp_size);
|
|
166
|
+
MARISA_THROW_IF(temp_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
|
|
167
|
+
size_ = (std::size_t)temp_size;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
void write_(Writer &writer) const {
|
|
172
|
+
units_.write(writer);
|
|
173
|
+
writer.write((UInt32)value_size_);
|
|
174
|
+
writer.write((UInt32)mask_);
|
|
175
|
+
writer.write((UInt64)size_);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
void set(std::size_t i, UInt32 value) {
|
|
179
|
+
MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
|
|
180
|
+
MARISA_DEBUG_IF(value > mask_, MARISA_RANGE_ERROR);
|
|
181
|
+
|
|
182
|
+
const std::size_t pos = i * value_size_;
|
|
183
|
+
const std::size_t unit_id = pos / MARISA_WORD_SIZE;
|
|
184
|
+
const std::size_t unit_offset = pos % MARISA_WORD_SIZE;
|
|
185
|
+
|
|
186
|
+
units_[unit_id] &= ~((Unit)mask_ << unit_offset);
|
|
187
|
+
units_[unit_id] |= (Unit)(value & mask_) << unit_offset;
|
|
188
|
+
if ((unit_offset + value_size_) > MARISA_WORD_SIZE) {
|
|
189
|
+
units_[unit_id + 1] &=
|
|
190
|
+
~((Unit)mask_ >> (MARISA_WORD_SIZE - unit_offset));
|
|
191
|
+
units_[unit_id + 1] |=
|
|
192
|
+
(Unit)(value & mask_) >> (MARISA_WORD_SIZE - unit_offset);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Disallows copy and assignment.
|
|
197
|
+
FlatVector(const FlatVector &);
|
|
198
|
+
FlatVector &operator=(const FlatVector &);
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
} // namespace vector
|
|
202
|
+
} // namespace grimoire
|
|
203
|
+
} // namespace marisa
|
|
204
|
+
|
|
205
|
+
#endif // MARISA_GRIMOIRE_VECTOR_FLAT_VECTOR_H_
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_
|
|
2
|
+
#define MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_
|
|
3
|
+
|
|
4
|
+
#include "marisa/grimoire/intrin.h"
|
|
5
|
+
|
|
6
|
+
namespace marisa {
|
|
7
|
+
namespace grimoire {
|
|
8
|
+
namespace vector {
|
|
9
|
+
|
|
10
|
+
#if MARISA_WORD_SIZE == 64
|
|
11
|
+
|
|
12
|
+
class PopCount {
|
|
13
|
+
public:
|
|
14
|
+
explicit PopCount(UInt64 x) : value_() {
|
|
15
|
+
x = (x & 0x5555555555555555ULL) + ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1);
|
|
16
|
+
x = (x & 0x3333333333333333ULL) + ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2);
|
|
17
|
+
x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4);
|
|
18
|
+
x *= 0x0101010101010101ULL;
|
|
19
|
+
value_ = x;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
std::size_t lo8() const {
|
|
23
|
+
return (std::size_t)(value_ & 0xFFU);
|
|
24
|
+
}
|
|
25
|
+
std::size_t lo16() const {
|
|
26
|
+
return (std::size_t)((value_ >> 8) & 0xFFU);
|
|
27
|
+
}
|
|
28
|
+
std::size_t lo24() const {
|
|
29
|
+
return (std::size_t)((value_ >> 16) & 0xFFU);
|
|
30
|
+
}
|
|
31
|
+
std::size_t lo32() const {
|
|
32
|
+
return (std::size_t)((value_ >> 24) & 0xFFU);
|
|
33
|
+
}
|
|
34
|
+
std::size_t lo40() const {
|
|
35
|
+
return (std::size_t)((value_ >> 32) & 0xFFU);
|
|
36
|
+
}
|
|
37
|
+
std::size_t lo48() const {
|
|
38
|
+
return (std::size_t)((value_ >> 40) & 0xFFU);
|
|
39
|
+
}
|
|
40
|
+
std::size_t lo56() const {
|
|
41
|
+
return (std::size_t)((value_ >> 48) & 0xFFU);
|
|
42
|
+
}
|
|
43
|
+
std::size_t lo64() const {
|
|
44
|
+
return (std::size_t)((value_ >> 56) & 0xFFU);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
static std::size_t count(UInt64 x) {
|
|
48
|
+
#if defined(MARISA_X64) && defined(MARISA_USE_POPCNT)
|
|
49
|
+
#ifdef _MSC_VER
|
|
50
|
+
return __popcnt64(x);
|
|
51
|
+
#else // _MSC_VER
|
|
52
|
+
return _mm_popcnt_u64(x);
|
|
53
|
+
#endif // _MSC_VER
|
|
54
|
+
#else // defined(MARISA_X64) && defined(MARISA_USE_POPCNT)
|
|
55
|
+
return PopCount(x).lo64();
|
|
56
|
+
#endif // defined(MARISA_X64) && defined(MARISA_USE_POPCNT)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
private:
|
|
60
|
+
UInt64 value_;
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
#else // MARISA_WORD_SIZE == 64
|
|
64
|
+
|
|
65
|
+
class PopCount {
|
|
66
|
+
public:
|
|
67
|
+
explicit PopCount(UInt32 x) : value_() {
|
|
68
|
+
x = (x & 0x55555555U) + ((x & 0xAAAAAAAAU) >> 1);
|
|
69
|
+
x = (x & 0x33333333U) + ((x & 0xCCCCCCCCU) >> 2);
|
|
70
|
+
x = (x & 0x0F0F0F0FU) + ((x & 0xF0F0F0F0U) >> 4);
|
|
71
|
+
x *= 0x01010101U;
|
|
72
|
+
value_ = x;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
std::size_t lo8() const {
|
|
76
|
+
return value_ & 0xFFU;
|
|
77
|
+
}
|
|
78
|
+
std::size_t lo16() const {
|
|
79
|
+
return (value_ >> 8) & 0xFFU;
|
|
80
|
+
}
|
|
81
|
+
std::size_t lo24() const {
|
|
82
|
+
return (value_ >> 16) & 0xFFU;
|
|
83
|
+
}
|
|
84
|
+
std::size_t lo32() const {
|
|
85
|
+
return (value_ >> 24) & 0xFFU;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
static std::size_t count(UInt32 x) {
|
|
89
|
+
#ifdef MARISA_USE_POPCNT
|
|
90
|
+
#ifdef _MSC_VER
|
|
91
|
+
return __popcnt(x);
|
|
92
|
+
#else // _MSC_VER
|
|
93
|
+
return _mm_popcnt_u32(x);
|
|
94
|
+
#endif // _MSC_VER
|
|
95
|
+
#else // MARISA_USE_POPCNT
|
|
96
|
+
return PopCount(x).lo32();
|
|
97
|
+
#endif // MARISA_USE_POPCNT
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
private:
|
|
101
|
+
UInt32 value_;
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
#endif // MARISA_WORD_SIZE == 64
|
|
105
|
+
|
|
106
|
+
} // namespace vector
|
|
107
|
+
} // namespace grimoire
|
|
108
|
+
} // namespace marisa
|
|
109
|
+
|
|
110
|
+
#endif // MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_
|