melisa 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -0
- data/ext/marisa/bindings/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
- data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/python/marisa-swig.h +183 -0
- data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
- data/ext/marisa/bindings/ruby/extconf.rb +5 -0
- data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
- data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
- data/ext/marisa/lib/marisa.h +14 -0
- data/ext/marisa/lib/marisa/agent.cc +51 -0
- data/ext/marisa/lib/marisa/agent.h +73 -0
- data/ext/marisa/lib/marisa/base.h +193 -0
- data/ext/marisa/lib/marisa/exception.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
- data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
- data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
- data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
- data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
- data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
- data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
- data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
- data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
- data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
- data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
- data/ext/marisa/lib/marisa/iostream.h +18 -0
- data/ext/marisa/lib/marisa/key.h +85 -0
- data/ext/marisa/lib/marisa/keyset.cc +181 -0
- data/ext/marisa/lib/marisa/keyset.h +80 -0
- data/ext/marisa/lib/marisa/query.h +71 -0
- data/ext/marisa/lib/marisa/scoped-array.h +48 -0
- data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
- data/ext/marisa/lib/marisa/stdio.h +15 -0
- data/ext/marisa/lib/marisa/trie.cc +249 -0
- data/ext/marisa/lib/marisa/trie.h +64 -0
- data/ext/marisa/tests/base-test.cc +309 -0
- data/ext/marisa/tests/io-test.cc +252 -0
- data/ext/marisa/tests/marisa-assert.h +26 -0
- data/ext/marisa/tests/marisa-test.cc +388 -0
- data/ext/marisa/tests/trie-test.cc +507 -0
- data/ext/marisa/tests/vector-test.cc +466 -0
- data/ext/marisa/tools/cmdopt.cc +298 -0
- data/ext/marisa/tools/cmdopt.h +58 -0
- data/ext/marisa/tools/marisa-benchmark.cc +418 -0
- data/ext/marisa/tools/marisa-build.cc +206 -0
- data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
- data/ext/marisa/tools/marisa-dump.cc +151 -0
- data/ext/marisa/tools/marisa-lookup.cc +110 -0
- data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
- data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
- data/lib/melisa.rb +7 -0
- data/lib/melisa/base_config_flags.rb +76 -0
- data/lib/melisa/bytes_trie.rb +55 -0
- data/lib/melisa/int_trie.rb +14 -0
- data/lib/melisa/search.rb +55 -0
- data/lib/melisa/trie.rb +96 -0
- data/lib/melisa/version.rb +3 -0
- data/melisa.gemspec +36 -0
- data/spec/base_config_flags_spec.rb +73 -0
- data/spec/bytes_trie_spec.rb +16 -0
- data/spec/int_trie_spec.rb +16 -0
- data/spec/search_spec.rb +29 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/trie_spec.rb +30 -0
- metadata +207 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_IO_READER_H_
|
|
2
|
+
#define MARISA_GRIMOIRE_IO_READER_H_
|
|
3
|
+
|
|
4
|
+
#include <cstdio>
|
|
5
|
+
#include <iostream>
|
|
6
|
+
|
|
7
|
+
#include "marisa/base.h"
|
|
8
|
+
|
|
9
|
+
namespace marisa {
|
|
10
|
+
namespace grimoire {
|
|
11
|
+
namespace io {
|
|
12
|
+
|
|
13
|
+
class Reader {
|
|
14
|
+
public:
|
|
15
|
+
Reader();
|
|
16
|
+
~Reader();
|
|
17
|
+
|
|
18
|
+
void open(const char *filename);
|
|
19
|
+
void open(std::FILE *file);
|
|
20
|
+
void open(int fd);
|
|
21
|
+
void open(std::istream &stream);
|
|
22
|
+
|
|
23
|
+
template <typename T>
|
|
24
|
+
void read(T *obj) {
|
|
25
|
+
MARISA_THROW_IF(obj == NULL, MARISA_NULL_ERROR);
|
|
26
|
+
read_data(obj, sizeof(T));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
template <typename T>
|
|
30
|
+
void read(T *objs, std::size_t num_objs) {
|
|
31
|
+
MARISA_THROW_IF((objs == NULL) && (num_objs != 0), MARISA_NULL_ERROR);
|
|
32
|
+
MARISA_THROW_IF(num_objs > (MARISA_SIZE_MAX / sizeof(T)),
|
|
33
|
+
MARISA_SIZE_ERROR);
|
|
34
|
+
read_data(objs, sizeof(T) * num_objs);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
void seek(std::size_t size);
|
|
38
|
+
|
|
39
|
+
bool is_open() const;
|
|
40
|
+
|
|
41
|
+
void clear();
|
|
42
|
+
void swap(Reader &rhs);
|
|
43
|
+
|
|
44
|
+
private:
|
|
45
|
+
std::FILE *file_;
|
|
46
|
+
int fd_;
|
|
47
|
+
std::istream *stream_;
|
|
48
|
+
bool needs_fclose_;
|
|
49
|
+
|
|
50
|
+
void open_(const char *filename);
|
|
51
|
+
void open_(std::FILE *file);
|
|
52
|
+
void open_(int fd);
|
|
53
|
+
void open_(std::istream &stream);
|
|
54
|
+
|
|
55
|
+
void read_data(void *buf, std::size_t size);
|
|
56
|
+
|
|
57
|
+
// Disallows copy and assignment.
|
|
58
|
+
Reader(const Reader &);
|
|
59
|
+
Reader &operator=(const Reader &);
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
} // namespace io
|
|
63
|
+
} // namespace grimoire
|
|
64
|
+
} // namespace marisa
|
|
65
|
+
|
|
66
|
+
#endif // MARISA_GRIMOIRE_IO_READER_H_
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
|
|
3
|
+
#ifdef _WIN32
|
|
4
|
+
#include <io.h>
|
|
5
|
+
#else // _WIN32
|
|
6
|
+
#include <unistd.h>
|
|
7
|
+
#endif // _WIN32
|
|
8
|
+
|
|
9
|
+
#include <limits>
|
|
10
|
+
|
|
11
|
+
#include "marisa/grimoire/io/writer.h"
|
|
12
|
+
|
|
13
|
+
namespace marisa {
|
|
14
|
+
namespace grimoire {
|
|
15
|
+
namespace io {
|
|
16
|
+
|
|
17
|
+
Writer::Writer()
|
|
18
|
+
: file_(NULL), fd_(-1), stream_(NULL), needs_fclose_(false) {}
|
|
19
|
+
|
|
20
|
+
Writer::~Writer() {
|
|
21
|
+
if (needs_fclose_) {
|
|
22
|
+
::fclose(file_);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
void Writer::open(const char *filename) {
|
|
27
|
+
MARISA_THROW_IF(filename == NULL, MARISA_NULL_ERROR);
|
|
28
|
+
|
|
29
|
+
Writer temp;
|
|
30
|
+
temp.open_(filename);
|
|
31
|
+
swap(temp);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
void Writer::open(std::FILE *file) {
|
|
35
|
+
MARISA_THROW_IF(file == NULL, MARISA_NULL_ERROR);
|
|
36
|
+
|
|
37
|
+
Writer temp;
|
|
38
|
+
temp.open_(file);
|
|
39
|
+
swap(temp);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
void Writer::open(int fd) {
|
|
43
|
+
MARISA_THROW_IF(fd == -1, MARISA_CODE_ERROR);
|
|
44
|
+
|
|
45
|
+
Writer temp;
|
|
46
|
+
temp.open_(fd);
|
|
47
|
+
swap(temp);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
void Writer::open(std::ostream &stream) {
|
|
51
|
+
Writer temp;
|
|
52
|
+
temp.open_(stream);
|
|
53
|
+
swap(temp);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
void Writer::clear() {
|
|
57
|
+
Writer().swap(*this);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
void Writer::swap(Writer &rhs) {
|
|
61
|
+
marisa::swap(file_, rhs.file_);
|
|
62
|
+
marisa::swap(fd_, rhs.fd_);
|
|
63
|
+
marisa::swap(stream_, rhs.stream_);
|
|
64
|
+
marisa::swap(needs_fclose_, rhs.needs_fclose_);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
void Writer::seek(std::size_t size) {
|
|
68
|
+
MARISA_THROW_IF(!is_open(), MARISA_STATE_ERROR);
|
|
69
|
+
if (size == 0) {
|
|
70
|
+
return;
|
|
71
|
+
} else if (size <= 16) {
|
|
72
|
+
const char buf[16] = {};
|
|
73
|
+
write_data(buf, size);
|
|
74
|
+
} else {
|
|
75
|
+
const char buf[1024] = {};
|
|
76
|
+
do {
|
|
77
|
+
const std::size_t count = (size < sizeof(buf)) ? size : sizeof(buf);
|
|
78
|
+
write_data(buf, count);
|
|
79
|
+
size -= count;
|
|
80
|
+
} while (size != 0);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
bool Writer::is_open() const {
|
|
85
|
+
return (file_ != NULL) || (fd_ != -1) || (stream_ != NULL);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
void Writer::open_(const char *filename) {
|
|
89
|
+
std::FILE *file = NULL;
|
|
90
|
+
#ifdef _MSC_VER
|
|
91
|
+
MARISA_THROW_IF(::fopen_s(&file, filename, "wb") != 0, MARISA_IO_ERROR);
|
|
92
|
+
#else // _MSC_VER
|
|
93
|
+
file = ::fopen(filename, "wb");
|
|
94
|
+
MARISA_THROW_IF(file == NULL, MARISA_IO_ERROR);
|
|
95
|
+
#endif // _MSC_VER
|
|
96
|
+
file_ = file;
|
|
97
|
+
needs_fclose_ = true;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
void Writer::open_(std::FILE *file) {
|
|
101
|
+
file_ = file;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
void Writer::open_(int fd) {
|
|
105
|
+
fd_ = fd;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
void Writer::open_(std::ostream &stream) {
|
|
109
|
+
stream_ = &stream;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
void Writer::write_data(const void *data, std::size_t size) {
|
|
113
|
+
MARISA_THROW_IF(!is_open(), MARISA_STATE_ERROR);
|
|
114
|
+
if (size == 0) {
|
|
115
|
+
return;
|
|
116
|
+
} else if (fd_ != -1) {
|
|
117
|
+
while (size != 0) {
|
|
118
|
+
#ifdef _WIN32
|
|
119
|
+
static const std::size_t CHUNK_SIZE =
|
|
120
|
+
std::numeric_limits<int>::max();
|
|
121
|
+
const unsigned int count = (size < CHUNK_SIZE) ? size : CHUNK_SIZE;
|
|
122
|
+
const int size_written = ::_write(fd_, data, count);
|
|
123
|
+
#else // _WIN32
|
|
124
|
+
static const std::size_t CHUNK_SIZE =
|
|
125
|
+
std::numeric_limits< ::ssize_t>::max();
|
|
126
|
+
const ::size_t count = (size < CHUNK_SIZE) ? size : CHUNK_SIZE;
|
|
127
|
+
const ::ssize_t size_written = ::write(fd_, data, count);
|
|
128
|
+
#endif // _WIN32
|
|
129
|
+
MARISA_THROW_IF(size_written <= 0, MARISA_IO_ERROR);
|
|
130
|
+
data = static_cast<const char *>(data) + size_written;
|
|
131
|
+
size -= size_written;
|
|
132
|
+
}
|
|
133
|
+
} else if (file_ != NULL) {
|
|
134
|
+
MARISA_THROW_IF(::fwrite(data, 1, size, file_) != size, MARISA_IO_ERROR);
|
|
135
|
+
MARISA_THROW_IF(::fflush(file_) != 0, MARISA_IO_ERROR);
|
|
136
|
+
} else if (stream_ != NULL) {
|
|
137
|
+
try {
|
|
138
|
+
MARISA_THROW_IF(!stream_->write(static_cast<const char *>(data), size),
|
|
139
|
+
MARISA_IO_ERROR);
|
|
140
|
+
} catch (const std::ios_base::failure &) {
|
|
141
|
+
MARISA_THROW(MARISA_IO_ERROR, "std::ios_base::failure");
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
} // namespace io
|
|
147
|
+
} // namespace grimoire
|
|
148
|
+
} // namespace marisa
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_IO_WRITER_H_
|
|
2
|
+
#define MARISA_GRIMOIRE_IO_WRITER_H_
|
|
3
|
+
|
|
4
|
+
#include <cstdio>
|
|
5
|
+
#include <iostream>
|
|
6
|
+
|
|
7
|
+
#include "marisa/base.h"
|
|
8
|
+
|
|
9
|
+
namespace marisa {
|
|
10
|
+
namespace grimoire {
|
|
11
|
+
namespace io {
|
|
12
|
+
|
|
13
|
+
class Writer {
|
|
14
|
+
public:
|
|
15
|
+
Writer();
|
|
16
|
+
~Writer();
|
|
17
|
+
|
|
18
|
+
void open(const char *filename);
|
|
19
|
+
void open(std::FILE *file);
|
|
20
|
+
void open(int fd);
|
|
21
|
+
void open(std::ostream &stream);
|
|
22
|
+
|
|
23
|
+
template <typename T>
|
|
24
|
+
void write(const T &obj) {
|
|
25
|
+
write_data(&obj, sizeof(T));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
template <typename T>
|
|
29
|
+
void write(const T *objs, std::size_t num_objs) {
|
|
30
|
+
MARISA_THROW_IF((objs == NULL) && (num_objs != 0), MARISA_NULL_ERROR);
|
|
31
|
+
MARISA_THROW_IF(num_objs > (MARISA_SIZE_MAX / sizeof(T)),
|
|
32
|
+
MARISA_SIZE_ERROR);
|
|
33
|
+
write_data(objs, sizeof(T) * num_objs);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
void seek(std::size_t size);
|
|
37
|
+
|
|
38
|
+
bool is_open() const;
|
|
39
|
+
|
|
40
|
+
void clear();
|
|
41
|
+
void swap(Writer &rhs);
|
|
42
|
+
|
|
43
|
+
private:
|
|
44
|
+
std::FILE *file_;
|
|
45
|
+
int fd_;
|
|
46
|
+
std::ostream *stream_;
|
|
47
|
+
bool needs_fclose_;
|
|
48
|
+
|
|
49
|
+
void open_(const char *filename);
|
|
50
|
+
void open_(std::FILE *file);
|
|
51
|
+
void open_(int fd);
|
|
52
|
+
void open_(std::ostream &stream);
|
|
53
|
+
|
|
54
|
+
void write_data(const void *data, std::size_t size);
|
|
55
|
+
|
|
56
|
+
// Disallows copy and assignment.
|
|
57
|
+
Writer(const Writer &);
|
|
58
|
+
Writer &operator=(const Writer &);
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
} // namespace io
|
|
62
|
+
} // namespace grimoire
|
|
63
|
+
} // namespace marisa
|
|
64
|
+
|
|
65
|
+
#endif // MARISA_GRIMOIRE_IO_WRITER_H_
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_TRIE_H_
|
|
2
|
+
#define MARISA_GRIMOIRE_TRIE_H_
|
|
3
|
+
|
|
4
|
+
#include "marisa/grimoire/trie/state.h"
|
|
5
|
+
#include "marisa/grimoire/trie/louds-trie.h"
|
|
6
|
+
|
|
7
|
+
namespace marisa {
|
|
8
|
+
namespace grimoire {
|
|
9
|
+
|
|
10
|
+
using trie::State;
|
|
11
|
+
using trie::LoudsTrie;
|
|
12
|
+
|
|
13
|
+
} // namespace grimoire
|
|
14
|
+
} // namespace marisa
|
|
15
|
+
|
|
16
|
+
#endif // MARISA_GRIMOIRE_TRIE_H_
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_TRIE_CACHE_H_
|
|
2
|
+
#define MARISA_GRIMOIRE_TRIE_CACHE_H_
|
|
3
|
+
|
|
4
|
+
#include <cfloat>
|
|
5
|
+
|
|
6
|
+
#include "marisa/base.h"
|
|
7
|
+
|
|
8
|
+
namespace marisa {
|
|
9
|
+
namespace grimoire {
|
|
10
|
+
namespace trie {
|
|
11
|
+
|
|
12
|
+
class Cache {
|
|
13
|
+
public:
|
|
14
|
+
Cache() : parent_(0), child_(0), union_() {
|
|
15
|
+
union_.weight = FLT_MIN;
|
|
16
|
+
}
|
|
17
|
+
Cache(const Cache &cache)
|
|
18
|
+
: parent_(cache.parent_), child_(cache.child_), union_(cache.union_) {}
|
|
19
|
+
|
|
20
|
+
Cache &operator=(const Cache &cache) {
|
|
21
|
+
parent_ = cache.parent_;
|
|
22
|
+
child_ = cache.child_;
|
|
23
|
+
union_ = cache.union_;
|
|
24
|
+
return *this;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
void set_parent(std::size_t parent) {
|
|
28
|
+
MARISA_DEBUG_IF(parent > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
|
29
|
+
parent_ = (UInt32)parent;
|
|
30
|
+
}
|
|
31
|
+
void set_child(std::size_t child) {
|
|
32
|
+
MARISA_DEBUG_IF(child > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
|
33
|
+
child_ = (UInt32)child;
|
|
34
|
+
}
|
|
35
|
+
void set_base(UInt8 base) {
|
|
36
|
+
union_.link = (union_.link & ~0xFFU) | base;
|
|
37
|
+
}
|
|
38
|
+
void set_extra(std::size_t extra) {
|
|
39
|
+
MARISA_DEBUG_IF(extra > (MARISA_UINT32_MAX >> 8), MARISA_SIZE_ERROR);
|
|
40
|
+
union_.link = (UInt32)((union_.link & 0xFFU) | (extra << 8));
|
|
41
|
+
}
|
|
42
|
+
void set_weight(float weight) {
|
|
43
|
+
union_.weight = weight;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
std::size_t parent() const {
|
|
47
|
+
return parent_;
|
|
48
|
+
}
|
|
49
|
+
std::size_t child() const {
|
|
50
|
+
return child_;
|
|
51
|
+
}
|
|
52
|
+
UInt8 base() const {
|
|
53
|
+
return (UInt8)(union_.link & 0xFFU);
|
|
54
|
+
}
|
|
55
|
+
std::size_t extra() const {
|
|
56
|
+
return union_.link >> 8;
|
|
57
|
+
}
|
|
58
|
+
char label() const {
|
|
59
|
+
return (char)base();
|
|
60
|
+
}
|
|
61
|
+
std::size_t link() const {
|
|
62
|
+
return union_.link;
|
|
63
|
+
}
|
|
64
|
+
float weight() const {
|
|
65
|
+
return union_.weight;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
private:
|
|
69
|
+
UInt32 parent_;
|
|
70
|
+
UInt32 child_;
|
|
71
|
+
union Union {
|
|
72
|
+
UInt32 link;
|
|
73
|
+
float weight;
|
|
74
|
+
} union_;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
} // namespace trie
|
|
78
|
+
} // namespace grimoire
|
|
79
|
+
} // namespace marisa
|
|
80
|
+
|
|
81
|
+
#endif // MARISA_GRIMOIRE_TRIE_CACHE_H_
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_TRIE_CONFIG_H_
|
|
2
|
+
#define MARISA_GRIMOIRE_TRIE_CONFIG_H_
|
|
3
|
+
|
|
4
|
+
#include "marisa/base.h"
|
|
5
|
+
|
|
6
|
+
namespace marisa {
|
|
7
|
+
namespace grimoire {
|
|
8
|
+
namespace trie {
|
|
9
|
+
|
|
10
|
+
class Config {
|
|
11
|
+
public:
|
|
12
|
+
Config()
|
|
13
|
+
: num_tries_(MARISA_DEFAULT_NUM_TRIES),
|
|
14
|
+
cache_level_(MARISA_DEFAULT_CACHE),
|
|
15
|
+
tail_mode_(MARISA_DEFAULT_TAIL),
|
|
16
|
+
node_order_(MARISA_DEFAULT_ORDER) {}
|
|
17
|
+
|
|
18
|
+
void parse(int config_flags) {
|
|
19
|
+
Config temp;
|
|
20
|
+
temp.parse_(config_flags);
|
|
21
|
+
swap(temp);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
int flags() const {
|
|
25
|
+
return (int)num_tries_ | tail_mode_ | node_order_;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
std::size_t num_tries() const {
|
|
29
|
+
return num_tries_;
|
|
30
|
+
}
|
|
31
|
+
CacheLevel cache_level() const {
|
|
32
|
+
return cache_level_;
|
|
33
|
+
}
|
|
34
|
+
TailMode tail_mode() const {
|
|
35
|
+
return tail_mode_;
|
|
36
|
+
}
|
|
37
|
+
NodeOrder node_order() const {
|
|
38
|
+
return node_order_;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
void clear() {
|
|
42
|
+
Config().swap(*this);
|
|
43
|
+
}
|
|
44
|
+
void swap(Config &rhs) {
|
|
45
|
+
marisa::swap(num_tries_, rhs.num_tries_);
|
|
46
|
+
marisa::swap(cache_level_, rhs.cache_level_);
|
|
47
|
+
marisa::swap(tail_mode_, rhs.tail_mode_);
|
|
48
|
+
marisa::swap(node_order_, rhs.node_order_);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
private:
|
|
52
|
+
std::size_t num_tries_;
|
|
53
|
+
CacheLevel cache_level_;
|
|
54
|
+
TailMode tail_mode_;
|
|
55
|
+
NodeOrder node_order_;
|
|
56
|
+
|
|
57
|
+
void parse_(int config_flags) {
|
|
58
|
+
MARISA_THROW_IF((config_flags & ~MARISA_CONFIG_MASK) != 0,
|
|
59
|
+
MARISA_CODE_ERROR);
|
|
60
|
+
|
|
61
|
+
parse_num_tries(config_flags);
|
|
62
|
+
parse_cache_level(config_flags);
|
|
63
|
+
parse_tail_mode(config_flags);
|
|
64
|
+
parse_node_order(config_flags);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
void parse_num_tries(int config_flags) {
|
|
68
|
+
const int num_tries = config_flags & MARISA_NUM_TRIES_MASK;
|
|
69
|
+
if (num_tries != 0) {
|
|
70
|
+
num_tries_ = num_tries;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
void parse_cache_level(int config_flags) {
|
|
75
|
+
switch (config_flags & MARISA_CACHE_LEVEL_MASK) {
|
|
76
|
+
case 0: {
|
|
77
|
+
cache_level_ = MARISA_DEFAULT_CACHE;
|
|
78
|
+
break;
|
|
79
|
+
}
|
|
80
|
+
case MARISA_HUGE_CACHE: {
|
|
81
|
+
cache_level_ = MARISA_HUGE_CACHE;
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
case MARISA_LARGE_CACHE: {
|
|
85
|
+
cache_level_ = MARISA_LARGE_CACHE;
|
|
86
|
+
break;
|
|
87
|
+
}
|
|
88
|
+
case MARISA_NORMAL_CACHE: {
|
|
89
|
+
cache_level_ = MARISA_NORMAL_CACHE;
|
|
90
|
+
break;
|
|
91
|
+
}
|
|
92
|
+
case MARISA_SMALL_CACHE: {
|
|
93
|
+
cache_level_ = MARISA_SMALL_CACHE;
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
case MARISA_TINY_CACHE: {
|
|
97
|
+
cache_level_ = MARISA_TINY_CACHE;
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
default: {
|
|
101
|
+
MARISA_THROW(MARISA_CODE_ERROR, "undefined cache level");
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
void parse_tail_mode(int config_flags) {
|
|
107
|
+
switch (config_flags & MARISA_TAIL_MODE_MASK) {
|
|
108
|
+
case 0: {
|
|
109
|
+
tail_mode_ = MARISA_DEFAULT_TAIL;
|
|
110
|
+
break;
|
|
111
|
+
}
|
|
112
|
+
case MARISA_TEXT_TAIL: {
|
|
113
|
+
tail_mode_ = MARISA_TEXT_TAIL;
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
case MARISA_BINARY_TAIL: {
|
|
117
|
+
tail_mode_ = MARISA_BINARY_TAIL;
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
default: {
|
|
121
|
+
MARISA_THROW(MARISA_CODE_ERROR, "undefined tail mode");
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
void parse_node_order(int config_flags) {
|
|
127
|
+
switch (config_flags & MARISA_NODE_ORDER_MASK) {
|
|
128
|
+
case 0: {
|
|
129
|
+
node_order_ = MARISA_DEFAULT_ORDER;
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
case MARISA_LABEL_ORDER: {
|
|
133
|
+
node_order_ = MARISA_LABEL_ORDER;
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
case MARISA_WEIGHT_ORDER: {
|
|
137
|
+
node_order_ = MARISA_WEIGHT_ORDER;
|
|
138
|
+
break;
|
|
139
|
+
}
|
|
140
|
+
default: {
|
|
141
|
+
MARISA_THROW(MARISA_CODE_ERROR, "undefined node order");
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Disallows copy and assignment.
|
|
147
|
+
Config(const Config &);
|
|
148
|
+
Config &operator=(const Config &);
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
} // namespace trie
|
|
152
|
+
} // namespace grimoire
|
|
153
|
+
} // namespace marisa
|
|
154
|
+
|
|
155
|
+
#endif // MARISA_GRIMOIRE_TRIE_CONFIG_H_
|