melisa 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +11 -0
- data/ext/marisa/bindings/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
- data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/python/marisa-swig.h +183 -0
- data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
- data/ext/marisa/bindings/ruby/extconf.rb +5 -0
- data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
- data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
- data/ext/marisa/lib/marisa.h +14 -0
- data/ext/marisa/lib/marisa/agent.cc +51 -0
- data/ext/marisa/lib/marisa/agent.h +73 -0
- data/ext/marisa/lib/marisa/base.h +193 -0
- data/ext/marisa/lib/marisa/exception.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
- data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
- data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
- data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
- data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
- data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
- data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
- data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
- data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
- data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
- data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
- data/ext/marisa/lib/marisa/iostream.h +18 -0
- data/ext/marisa/lib/marisa/key.h +85 -0
- data/ext/marisa/lib/marisa/keyset.cc +181 -0
- data/ext/marisa/lib/marisa/keyset.h +80 -0
- data/ext/marisa/lib/marisa/query.h +71 -0
- data/ext/marisa/lib/marisa/scoped-array.h +48 -0
- data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
- data/ext/marisa/lib/marisa/stdio.h +15 -0
- data/ext/marisa/lib/marisa/trie.cc +249 -0
- data/ext/marisa/lib/marisa/trie.h +64 -0
- data/ext/marisa/tests/base-test.cc +309 -0
- data/ext/marisa/tests/io-test.cc +252 -0
- data/ext/marisa/tests/marisa-assert.h +26 -0
- data/ext/marisa/tests/marisa-test.cc +388 -0
- data/ext/marisa/tests/trie-test.cc +507 -0
- data/ext/marisa/tests/vector-test.cc +466 -0
- data/ext/marisa/tools/cmdopt.cc +298 -0
- data/ext/marisa/tools/cmdopt.h +58 -0
- data/ext/marisa/tools/marisa-benchmark.cc +418 -0
- data/ext/marisa/tools/marisa-build.cc +206 -0
- data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
- data/ext/marisa/tools/marisa-dump.cc +151 -0
- data/ext/marisa/tools/marisa-lookup.cc +110 -0
- data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
- data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
- data/lib/melisa.rb +7 -0
- data/lib/melisa/base_config_flags.rb +76 -0
- data/lib/melisa/bytes_trie.rb +55 -0
- data/lib/melisa/int_trie.rb +14 -0
- data/lib/melisa/search.rb +55 -0
- data/lib/melisa/trie.rb +96 -0
- data/lib/melisa/version.rb +3 -0
- data/melisa.gemspec +36 -0
- data/spec/base_config_flags_spec.rb +73 -0
- data/spec/bytes_trie_spec.rb +16 -0
- data/spec/int_trie_spec.rb +16 -0
- data/spec/search_spec.rb +29 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/trie_spec.rb +30 -0
- metadata +207 -0
@@ -0,0 +1,179 @@
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_VECTOR_BIT_VECTOR_H_
|
2
|
+
#define MARISA_GRIMOIRE_VECTOR_BIT_VECTOR_H_
|
3
|
+
|
4
|
+
#include "marisa/grimoire/vector/rank-index.h"
|
5
|
+
#include "marisa/grimoire/vector/vector.h"
|
6
|
+
|
7
|
+
namespace marisa {
|
8
|
+
namespace grimoire {
|
9
|
+
namespace vector {
|
10
|
+
|
11
|
+
class BitVector {
|
12
|
+
public:
|
13
|
+
#if MARISA_WORD_SIZE == 64
|
14
|
+
typedef UInt64 Unit;
|
15
|
+
#else // MARISA_WORD_SIZE == 64
|
16
|
+
typedef UInt32 Unit;
|
17
|
+
#endif // MARISA_WORD_SIZE == 64
|
18
|
+
|
19
|
+
BitVector()
|
20
|
+
: units_(), size_(0), num_1s_(0), ranks_(), select0s_(), select1s_() {}
|
21
|
+
|
22
|
+
void build(bool enables_select0, bool enables_select1) {
|
23
|
+
BitVector temp;
|
24
|
+
temp.build_index(*this, enables_select0, enables_select1);
|
25
|
+
units_.shrink();
|
26
|
+
temp.units_.swap(units_);
|
27
|
+
swap(temp);
|
28
|
+
}
|
29
|
+
|
30
|
+
void map(Mapper &mapper) {
|
31
|
+
BitVector temp;
|
32
|
+
temp.map_(mapper);
|
33
|
+
swap(temp);
|
34
|
+
}
|
35
|
+
void read(Reader &reader) {
|
36
|
+
BitVector temp;
|
37
|
+
temp.read_(reader);
|
38
|
+
swap(temp);
|
39
|
+
}
|
40
|
+
void write(Writer &writer) const {
|
41
|
+
write_(writer);
|
42
|
+
}
|
43
|
+
|
44
|
+
void disable_select0() {
|
45
|
+
select0s_.clear();
|
46
|
+
}
|
47
|
+
void disable_select1() {
|
48
|
+
select1s_.clear();
|
49
|
+
}
|
50
|
+
|
51
|
+
void push_back(bool bit) {
|
52
|
+
MARISA_THROW_IF(size_ == MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
53
|
+
if (size_ == (MARISA_WORD_SIZE * units_.size())) {
|
54
|
+
units_.resize(units_.size() + (64 / MARISA_WORD_SIZE), 0);
|
55
|
+
}
|
56
|
+
if (bit) {
|
57
|
+
units_[size_ / MARISA_WORD_SIZE] |=
|
58
|
+
(Unit)1 << (size_ % MARISA_WORD_SIZE);
|
59
|
+
++num_1s_;
|
60
|
+
}
|
61
|
+
++size_;
|
62
|
+
}
|
63
|
+
|
64
|
+
bool operator[](std::size_t i) const {
|
65
|
+
MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
|
66
|
+
return (units_[i / MARISA_WORD_SIZE]
|
67
|
+
& ((Unit)1 << (i % MARISA_WORD_SIZE))) != 0;
|
68
|
+
}
|
69
|
+
|
70
|
+
std::size_t rank0(std::size_t i) const {
|
71
|
+
MARISA_DEBUG_IF(ranks_.empty(), MARISA_STATE_ERROR);
|
72
|
+
MARISA_DEBUG_IF(i > size_, MARISA_BOUND_ERROR);
|
73
|
+
return i - rank1(i);
|
74
|
+
}
|
75
|
+
std::size_t rank1(std::size_t i) const;
|
76
|
+
|
77
|
+
std::size_t select0(std::size_t i) const;
|
78
|
+
std::size_t select1(std::size_t i) const;
|
79
|
+
|
80
|
+
std::size_t num_0s() const {
|
81
|
+
return size_ - num_1s_;
|
82
|
+
}
|
83
|
+
std::size_t num_1s() const {
|
84
|
+
return num_1s_;
|
85
|
+
}
|
86
|
+
|
87
|
+
bool empty() const {
|
88
|
+
return size_ == 0;
|
89
|
+
}
|
90
|
+
std::size_t size() const {
|
91
|
+
return size_;
|
92
|
+
}
|
93
|
+
std::size_t total_size() const {
|
94
|
+
return units_.total_size() + ranks_.total_size()
|
95
|
+
+ select0s_.total_size() + select1s_.total_size();
|
96
|
+
}
|
97
|
+
std::size_t io_size() const {
|
98
|
+
return units_.io_size() + (sizeof(UInt32) * 2) + ranks_.io_size()
|
99
|
+
+ select0s_.io_size() + select1s_.io_size();
|
100
|
+
}
|
101
|
+
|
102
|
+
void clear() {
|
103
|
+
BitVector().swap(*this);
|
104
|
+
}
|
105
|
+
void swap(BitVector &rhs) {
|
106
|
+
units_.swap(rhs.units_);
|
107
|
+
marisa::swap(size_, rhs.size_);
|
108
|
+
marisa::swap(num_1s_, rhs.num_1s_);
|
109
|
+
ranks_.swap(rhs.ranks_);
|
110
|
+
select0s_.swap(rhs.select0s_);
|
111
|
+
select1s_.swap(rhs.select1s_);
|
112
|
+
}
|
113
|
+
|
114
|
+
private:
|
115
|
+
Vector<Unit> units_;
|
116
|
+
std::size_t size_;
|
117
|
+
std::size_t num_1s_;
|
118
|
+
Vector<RankIndex> ranks_;
|
119
|
+
Vector<UInt32> select0s_;
|
120
|
+
Vector<UInt32> select1s_;
|
121
|
+
|
122
|
+
void build_index(const BitVector &bv,
|
123
|
+
bool enables_select0, bool enables_select1);
|
124
|
+
|
125
|
+
void map_(Mapper &mapper) {
|
126
|
+
units_.map(mapper);
|
127
|
+
{
|
128
|
+
UInt32 temp_size;
|
129
|
+
mapper.map(&temp_size);
|
130
|
+
size_ = temp_size;
|
131
|
+
}
|
132
|
+
{
|
133
|
+
UInt32 temp_num_1s;
|
134
|
+
mapper.map(&temp_num_1s);
|
135
|
+
MARISA_THROW_IF(temp_num_1s > size_, MARISA_FORMAT_ERROR);
|
136
|
+
num_1s_ = temp_num_1s;
|
137
|
+
}
|
138
|
+
ranks_.map(mapper);
|
139
|
+
select0s_.map(mapper);
|
140
|
+
select1s_.map(mapper);
|
141
|
+
}
|
142
|
+
|
143
|
+
void read_(Reader &reader) {
|
144
|
+
units_.read(reader);
|
145
|
+
{
|
146
|
+
UInt32 temp_size;
|
147
|
+
reader.read(&temp_size);
|
148
|
+
size_ = temp_size;
|
149
|
+
}
|
150
|
+
{
|
151
|
+
UInt32 temp_num_1s;
|
152
|
+
reader.read(&temp_num_1s);
|
153
|
+
MARISA_THROW_IF(temp_num_1s > size_, MARISA_FORMAT_ERROR);
|
154
|
+
num_1s_ = temp_num_1s;
|
155
|
+
}
|
156
|
+
ranks_.read(reader);
|
157
|
+
select0s_.read(reader);
|
158
|
+
select1s_.read(reader);
|
159
|
+
}
|
160
|
+
|
161
|
+
void write_(Writer &writer) const {
|
162
|
+
units_.write(writer);
|
163
|
+
writer.write((UInt32)size_);
|
164
|
+
writer.write((UInt32)num_1s_);
|
165
|
+
ranks_.write(writer);
|
166
|
+
select0s_.write(writer);
|
167
|
+
select1s_.write(writer);
|
168
|
+
}
|
169
|
+
|
170
|
+
// Disallows copy and assignment.
|
171
|
+
BitVector(const BitVector &);
|
172
|
+
BitVector &operator=(const BitVector &);
|
173
|
+
};
|
174
|
+
|
175
|
+
} // namespace vector
|
176
|
+
} // namespace grimoire
|
177
|
+
} // namespace marisa
|
178
|
+
|
179
|
+
#endif // MARISA_GRIMOIRE_VECTOR_BIT_VECTOR_H_
|
@@ -0,0 +1,205 @@
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_VECTOR_FLAT_VECTOR_H_
|
2
|
+
#define MARISA_GRIMOIRE_VECTOR_FLAT_VECTOR_H_
|
3
|
+
|
4
|
+
#include "marisa/grimoire/vector/vector.h"
|
5
|
+
|
6
|
+
namespace marisa {
|
7
|
+
namespace grimoire {
|
8
|
+
namespace vector {
|
9
|
+
|
10
|
+
class FlatVector {
|
11
|
+
public:
|
12
|
+
#if MARISA_WORD_SIZE == 64
|
13
|
+
typedef UInt64 Unit;
|
14
|
+
#else // MARISA_WORD_SIZE == 64
|
15
|
+
typedef UInt32 Unit;
|
16
|
+
#endif // MARISA_WORD_SIZE == 64
|
17
|
+
|
18
|
+
FlatVector() : units_(), value_size_(0), mask_(0), size_(0) {}
|
19
|
+
|
20
|
+
void build(const Vector<UInt32> &values) {
|
21
|
+
FlatVector temp;
|
22
|
+
temp.build_(values);
|
23
|
+
swap(temp);
|
24
|
+
}
|
25
|
+
|
26
|
+
void map(Mapper &mapper) {
|
27
|
+
FlatVector temp;
|
28
|
+
temp.map_(mapper);
|
29
|
+
swap(temp);
|
30
|
+
}
|
31
|
+
void read(Reader &reader) {
|
32
|
+
FlatVector temp;
|
33
|
+
temp.read_(reader);
|
34
|
+
swap(temp);
|
35
|
+
}
|
36
|
+
void write(Writer &writer) const {
|
37
|
+
write_(writer);
|
38
|
+
}
|
39
|
+
|
40
|
+
UInt32 operator[](std::size_t i) const {
|
41
|
+
MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
|
42
|
+
|
43
|
+
const std::size_t pos = i * value_size_;
|
44
|
+
const std::size_t unit_id = pos / MARISA_WORD_SIZE;
|
45
|
+
const std::size_t unit_offset = pos % MARISA_WORD_SIZE;
|
46
|
+
|
47
|
+
if ((unit_offset + value_size_) <= MARISA_WORD_SIZE) {
|
48
|
+
return (UInt32)(units_[unit_id] >> unit_offset) & mask_;
|
49
|
+
} else {
|
50
|
+
return (UInt32)((units_[unit_id] >> unit_offset)
|
51
|
+
| (units_[unit_id + 1] << (MARISA_WORD_SIZE - unit_offset))) & mask_;
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
std::size_t value_size() const {
|
56
|
+
return value_size_;
|
57
|
+
}
|
58
|
+
UInt32 mask() const {
|
59
|
+
return mask_;
|
60
|
+
}
|
61
|
+
|
62
|
+
bool empty() const {
|
63
|
+
return size_ == 0;
|
64
|
+
}
|
65
|
+
std::size_t size() const {
|
66
|
+
return size_;
|
67
|
+
}
|
68
|
+
std::size_t total_size() const {
|
69
|
+
return units_.total_size();
|
70
|
+
}
|
71
|
+
std::size_t io_size() const {
|
72
|
+
return units_.io_size() + (sizeof(UInt32) * 2) + sizeof(UInt64);
|
73
|
+
}
|
74
|
+
|
75
|
+
void clear() {
|
76
|
+
FlatVector().swap(*this);
|
77
|
+
}
|
78
|
+
void swap(FlatVector &rhs) {
|
79
|
+
units_.swap(rhs.units_);
|
80
|
+
marisa::swap(value_size_, rhs.value_size_);
|
81
|
+
marisa::swap(mask_, rhs.mask_);
|
82
|
+
marisa::swap(size_, rhs.size_);
|
83
|
+
}
|
84
|
+
|
85
|
+
private:
|
86
|
+
Vector<Unit> units_;
|
87
|
+
std::size_t value_size_;
|
88
|
+
UInt32 mask_;
|
89
|
+
std::size_t size_;
|
90
|
+
|
91
|
+
void build_(const Vector<UInt32> &values) {
|
92
|
+
UInt32 max_value = 0;
|
93
|
+
for (std::size_t i = 0; i < values.size(); ++i) {
|
94
|
+
if (values[i] > max_value) {
|
95
|
+
max_value = values[i];
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
99
|
+
std::size_t value_size = 0;
|
100
|
+
while (max_value != 0) {
|
101
|
+
++value_size;
|
102
|
+
max_value >>= 1;
|
103
|
+
}
|
104
|
+
|
105
|
+
std::size_t num_units = values.empty() ? 0 : (64 / MARISA_WORD_SIZE);
|
106
|
+
if (value_size != 0) {
|
107
|
+
num_units = (std::size_t)(
|
108
|
+
(((UInt64)value_size * values.size()) + (MARISA_WORD_SIZE - 1))
|
109
|
+
/ MARISA_WORD_SIZE);
|
110
|
+
num_units += num_units % (64 / MARISA_WORD_SIZE);
|
111
|
+
}
|
112
|
+
|
113
|
+
units_.resize(num_units);
|
114
|
+
if (num_units > 0) {
|
115
|
+
units_.back() = 0;
|
116
|
+
}
|
117
|
+
|
118
|
+
value_size_ = value_size;
|
119
|
+
if (value_size != 0) {
|
120
|
+
mask_ = MARISA_UINT32_MAX >> (32 - value_size);
|
121
|
+
}
|
122
|
+
size_ = values.size();
|
123
|
+
|
124
|
+
for (std::size_t i = 0; i < values.size(); ++i) {
|
125
|
+
set(i, values[i]);
|
126
|
+
}
|
127
|
+
}
|
128
|
+
|
129
|
+
void map_(Mapper &mapper) {
|
130
|
+
units_.map(mapper);
|
131
|
+
{
|
132
|
+
UInt32 temp_value_size;
|
133
|
+
mapper.map(&temp_value_size);
|
134
|
+
MARISA_THROW_IF(temp_value_size > 32, MARISA_FORMAT_ERROR);
|
135
|
+
value_size_ = temp_value_size;
|
136
|
+
}
|
137
|
+
{
|
138
|
+
UInt32 temp_mask;
|
139
|
+
mapper.map(&temp_mask);
|
140
|
+
mask_ = temp_mask;
|
141
|
+
}
|
142
|
+
{
|
143
|
+
UInt64 temp_size;
|
144
|
+
mapper.map(&temp_size);
|
145
|
+
MARISA_THROW_IF(temp_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
|
146
|
+
size_ = (std::size_t)temp_size;
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
void read_(Reader &reader) {
|
151
|
+
units_.read(reader);
|
152
|
+
{
|
153
|
+
UInt32 temp_value_size;
|
154
|
+
reader.read(&temp_value_size);
|
155
|
+
MARISA_THROW_IF(temp_value_size > 32, MARISA_FORMAT_ERROR);
|
156
|
+
value_size_ = temp_value_size;
|
157
|
+
}
|
158
|
+
{
|
159
|
+
UInt32 temp_mask;
|
160
|
+
reader.read(&temp_mask);
|
161
|
+
mask_ = temp_mask;
|
162
|
+
}
|
163
|
+
{
|
164
|
+
UInt64 temp_size;
|
165
|
+
reader.read(&temp_size);
|
166
|
+
MARISA_THROW_IF(temp_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
|
167
|
+
size_ = (std::size_t)temp_size;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
171
|
+
void write_(Writer &writer) const {
|
172
|
+
units_.write(writer);
|
173
|
+
writer.write((UInt32)value_size_);
|
174
|
+
writer.write((UInt32)mask_);
|
175
|
+
writer.write((UInt64)size_);
|
176
|
+
}
|
177
|
+
|
178
|
+
void set(std::size_t i, UInt32 value) {
|
179
|
+
MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
|
180
|
+
MARISA_DEBUG_IF(value > mask_, MARISA_RANGE_ERROR);
|
181
|
+
|
182
|
+
const std::size_t pos = i * value_size_;
|
183
|
+
const std::size_t unit_id = pos / MARISA_WORD_SIZE;
|
184
|
+
const std::size_t unit_offset = pos % MARISA_WORD_SIZE;
|
185
|
+
|
186
|
+
units_[unit_id] &= ~((Unit)mask_ << unit_offset);
|
187
|
+
units_[unit_id] |= (Unit)(value & mask_) << unit_offset;
|
188
|
+
if ((unit_offset + value_size_) > MARISA_WORD_SIZE) {
|
189
|
+
units_[unit_id + 1] &=
|
190
|
+
~((Unit)mask_ >> (MARISA_WORD_SIZE - unit_offset));
|
191
|
+
units_[unit_id + 1] |=
|
192
|
+
(Unit)(value & mask_) >> (MARISA_WORD_SIZE - unit_offset);
|
193
|
+
}
|
194
|
+
}
|
195
|
+
|
196
|
+
// Disallows copy and assignment.
|
197
|
+
FlatVector(const FlatVector &);
|
198
|
+
FlatVector &operator=(const FlatVector &);
|
199
|
+
};
|
200
|
+
|
201
|
+
} // namespace vector
|
202
|
+
} // namespace grimoire
|
203
|
+
} // namespace marisa
|
204
|
+
|
205
|
+
#endif // MARISA_GRIMOIRE_VECTOR_FLAT_VECTOR_H_
|
@@ -0,0 +1,110 @@
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_
|
2
|
+
#define MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_
|
3
|
+
|
4
|
+
#include "marisa/grimoire/intrin.h"
|
5
|
+
|
6
|
+
namespace marisa {
|
7
|
+
namespace grimoire {
|
8
|
+
namespace vector {
|
9
|
+
|
10
|
+
#if MARISA_WORD_SIZE == 64
|
11
|
+
|
12
|
+
class PopCount {
|
13
|
+
public:
|
14
|
+
explicit PopCount(UInt64 x) : value_() {
|
15
|
+
x = (x & 0x5555555555555555ULL) + ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1);
|
16
|
+
x = (x & 0x3333333333333333ULL) + ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2);
|
17
|
+
x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4);
|
18
|
+
x *= 0x0101010101010101ULL;
|
19
|
+
value_ = x;
|
20
|
+
}
|
21
|
+
|
22
|
+
std::size_t lo8() const {
|
23
|
+
return (std::size_t)(value_ & 0xFFU);
|
24
|
+
}
|
25
|
+
std::size_t lo16() const {
|
26
|
+
return (std::size_t)((value_ >> 8) & 0xFFU);
|
27
|
+
}
|
28
|
+
std::size_t lo24() const {
|
29
|
+
return (std::size_t)((value_ >> 16) & 0xFFU);
|
30
|
+
}
|
31
|
+
std::size_t lo32() const {
|
32
|
+
return (std::size_t)((value_ >> 24) & 0xFFU);
|
33
|
+
}
|
34
|
+
std::size_t lo40() const {
|
35
|
+
return (std::size_t)((value_ >> 32) & 0xFFU);
|
36
|
+
}
|
37
|
+
std::size_t lo48() const {
|
38
|
+
return (std::size_t)((value_ >> 40) & 0xFFU);
|
39
|
+
}
|
40
|
+
std::size_t lo56() const {
|
41
|
+
return (std::size_t)((value_ >> 48) & 0xFFU);
|
42
|
+
}
|
43
|
+
std::size_t lo64() const {
|
44
|
+
return (std::size_t)((value_ >> 56) & 0xFFU);
|
45
|
+
}
|
46
|
+
|
47
|
+
static std::size_t count(UInt64 x) {
|
48
|
+
#if defined(MARISA_X64) && defined(MARISA_USE_POPCNT)
|
49
|
+
#ifdef _MSC_VER
|
50
|
+
return __popcnt64(x);
|
51
|
+
#else // _MSC_VER
|
52
|
+
return _mm_popcnt_u64(x);
|
53
|
+
#endif // _MSC_VER
|
54
|
+
#else // defined(MARISA_X64) && defined(MARISA_USE_POPCNT)
|
55
|
+
return PopCount(x).lo64();
|
56
|
+
#endif // defined(MARISA_X64) && defined(MARISA_USE_POPCNT)
|
57
|
+
}
|
58
|
+
|
59
|
+
private:
|
60
|
+
UInt64 value_;
|
61
|
+
};
|
62
|
+
|
63
|
+
#else // MARISA_WORD_SIZE == 64
|
64
|
+
|
65
|
+
class PopCount {
|
66
|
+
public:
|
67
|
+
explicit PopCount(UInt32 x) : value_() {
|
68
|
+
x = (x & 0x55555555U) + ((x & 0xAAAAAAAAU) >> 1);
|
69
|
+
x = (x & 0x33333333U) + ((x & 0xCCCCCCCCU) >> 2);
|
70
|
+
x = (x & 0x0F0F0F0FU) + ((x & 0xF0F0F0F0U) >> 4);
|
71
|
+
x *= 0x01010101U;
|
72
|
+
value_ = x;
|
73
|
+
}
|
74
|
+
|
75
|
+
std::size_t lo8() const {
|
76
|
+
return value_ & 0xFFU;
|
77
|
+
}
|
78
|
+
std::size_t lo16() const {
|
79
|
+
return (value_ >> 8) & 0xFFU;
|
80
|
+
}
|
81
|
+
std::size_t lo24() const {
|
82
|
+
return (value_ >> 16) & 0xFFU;
|
83
|
+
}
|
84
|
+
std::size_t lo32() const {
|
85
|
+
return (value_ >> 24) & 0xFFU;
|
86
|
+
}
|
87
|
+
|
88
|
+
static std::size_t count(UInt32 x) {
|
89
|
+
#ifdef MARISA_USE_POPCNT
|
90
|
+
#ifdef _MSC_VER
|
91
|
+
return __popcnt(x);
|
92
|
+
#else // _MSC_VER
|
93
|
+
return _mm_popcnt_u32(x);
|
94
|
+
#endif // _MSC_VER
|
95
|
+
#else // MARISA_USE_POPCNT
|
96
|
+
return PopCount(x).lo32();
|
97
|
+
#endif // MARISA_USE_POPCNT
|
98
|
+
}
|
99
|
+
|
100
|
+
private:
|
101
|
+
UInt32 value_;
|
102
|
+
};
|
103
|
+
|
104
|
+
#endif // MARISA_WORD_SIZE == 64
|
105
|
+
|
106
|
+
} // namespace vector
|
107
|
+
} // namespace grimoire
|
108
|
+
} // namespace marisa
|
109
|
+
|
110
|
+
#endif // MARISA_GRIMOIRE_VECTOR_POP_COUNT_H_
|