melisa 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -0
- data/ext/marisa/bindings/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
- data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/python/marisa-swig.h +183 -0
- data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
- data/ext/marisa/bindings/ruby/extconf.rb +5 -0
- data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
- data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
- data/ext/marisa/lib/marisa.h +14 -0
- data/ext/marisa/lib/marisa/agent.cc +51 -0
- data/ext/marisa/lib/marisa/agent.h +73 -0
- data/ext/marisa/lib/marisa/base.h +193 -0
- data/ext/marisa/lib/marisa/exception.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
- data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
- data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
- data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
- data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
- data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
- data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
- data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
- data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
- data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
- data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
- data/ext/marisa/lib/marisa/iostream.h +18 -0
- data/ext/marisa/lib/marisa/key.h +85 -0
- data/ext/marisa/lib/marisa/keyset.cc +181 -0
- data/ext/marisa/lib/marisa/keyset.h +80 -0
- data/ext/marisa/lib/marisa/query.h +71 -0
- data/ext/marisa/lib/marisa/scoped-array.h +48 -0
- data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
- data/ext/marisa/lib/marisa/stdio.h +15 -0
- data/ext/marisa/lib/marisa/trie.cc +249 -0
- data/ext/marisa/lib/marisa/trie.h +64 -0
- data/ext/marisa/tests/base-test.cc +309 -0
- data/ext/marisa/tests/io-test.cc +252 -0
- data/ext/marisa/tests/marisa-assert.h +26 -0
- data/ext/marisa/tests/marisa-test.cc +388 -0
- data/ext/marisa/tests/trie-test.cc +507 -0
- data/ext/marisa/tests/vector-test.cc +466 -0
- data/ext/marisa/tools/cmdopt.cc +298 -0
- data/ext/marisa/tools/cmdopt.h +58 -0
- data/ext/marisa/tools/marisa-benchmark.cc +418 -0
- data/ext/marisa/tools/marisa-build.cc +206 -0
- data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
- data/ext/marisa/tools/marisa-dump.cc +151 -0
- data/ext/marisa/tools/marisa-lookup.cc +110 -0
- data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
- data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
- data/lib/melisa.rb +7 -0
- data/lib/melisa/base_config_flags.rb +76 -0
- data/lib/melisa/bytes_trie.rb +55 -0
- data/lib/melisa/int_trie.rb +14 -0
- data/lib/melisa/search.rb +55 -0
- data/lib/melisa/trie.rb +96 -0
- data/lib/melisa/version.rb +3 -0
- data/melisa.gemspec +36 -0
- data/spec/base_config_flags_spec.rb +73 -0
- data/spec/bytes_trie_spec.rb +16 -0
- data/spec/int_trie_spec.rb +16 -0
- data/spec/search_spec.rb +29 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/trie_spec.rb +30 -0
- metadata +207 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <string>
|
|
3
|
+
|
|
4
|
+
#include <marisa.h>
|
|
5
|
+
|
|
6
|
+
#include "cmdopt.h"
|
|
7
|
+
|
|
8
|
+
namespace {
|
|
9
|
+
|
|
10
|
+
bool mmap_flag = true;
|
|
11
|
+
|
|
12
|
+
void print_help(const char *cmd) {
|
|
13
|
+
std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n"
|
|
14
|
+
"Options:\n"
|
|
15
|
+
" -m, --mmap-dictionary use memory-mapped I/O to load a dictionary"
|
|
16
|
+
" (default)\n"
|
|
17
|
+
" -r, --read-dictionary read an entire dictionary into memory\n"
|
|
18
|
+
" -h, --help print this help\n"
|
|
19
|
+
<< std::endl;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
int lookup(const char * const *args, std::size_t num_args) {
|
|
23
|
+
if (num_args == 0) {
|
|
24
|
+
std::cerr << "error: dictionary is not specified" << std::endl;
|
|
25
|
+
return 10;
|
|
26
|
+
} else if (num_args > 1) {
|
|
27
|
+
std::cerr << "error: more than one dictionaries are specified"
|
|
28
|
+
<< std::endl;
|
|
29
|
+
return 11;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
marisa::Trie trie;
|
|
33
|
+
if (mmap_flag) {
|
|
34
|
+
try {
|
|
35
|
+
trie.mmap(args[0]);
|
|
36
|
+
} catch (const marisa::Exception &ex) {
|
|
37
|
+
std::cerr << ex.what() << ": failed to mmap a dictionary file: "
|
|
38
|
+
<< args[0] << std::endl;
|
|
39
|
+
return 20;
|
|
40
|
+
}
|
|
41
|
+
} else {
|
|
42
|
+
try {
|
|
43
|
+
trie.load(args[0]);
|
|
44
|
+
} catch (const marisa::Exception &ex) {
|
|
45
|
+
std::cerr << ex.what() << ": failed to load a dictionary file: "
|
|
46
|
+
<< args[0] << std::endl;
|
|
47
|
+
return 21;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
marisa::Agent agent;
|
|
52
|
+
std::string str;
|
|
53
|
+
while (std::getline(std::cin, str)) {
|
|
54
|
+
try {
|
|
55
|
+
agent.set_query(str.c_str(), str.length());
|
|
56
|
+
if (trie.lookup(agent)) {
|
|
57
|
+
std::cout << agent.key().id() << '\t' << str << '\n';
|
|
58
|
+
} else {
|
|
59
|
+
std::cout << "-1\t" << str << '\n';
|
|
60
|
+
}
|
|
61
|
+
} catch (const marisa::Exception &ex) {
|
|
62
|
+
std::cerr << ex.what() << ": lookup() failed: " << str << std::endl;
|
|
63
|
+
return 30;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (!std::cout) {
|
|
67
|
+
std::cerr << "error: failed to write results to standard output"
|
|
68
|
+
<< std::endl;
|
|
69
|
+
return 30;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return 0;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
} // namespace
|
|
77
|
+
|
|
78
|
+
int main(int argc, char *argv[]) {
|
|
79
|
+
std::ios::sync_with_stdio(false);
|
|
80
|
+
|
|
81
|
+
::cmdopt_option long_options[] = {
|
|
82
|
+
{ "mmap-dictionary", 0, NULL, 'm' },
|
|
83
|
+
{ "read-dictionary", 0, NULL, 'r' },
|
|
84
|
+
{ "help", 0, NULL, 'h' },
|
|
85
|
+
{ NULL, 0, NULL, 0 }
|
|
86
|
+
};
|
|
87
|
+
::cmdopt_t cmdopt;
|
|
88
|
+
::cmdopt_init(&cmdopt, argc, argv, "mrh", long_options);
|
|
89
|
+
int label;
|
|
90
|
+
while ((label = ::cmdopt_get(&cmdopt)) != -1) {
|
|
91
|
+
switch (label) {
|
|
92
|
+
case 'm': {
|
|
93
|
+
mmap_flag = true;
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
case 'r': {
|
|
97
|
+
mmap_flag = false;
|
|
98
|
+
break;
|
|
99
|
+
}
|
|
100
|
+
case 'h': {
|
|
101
|
+
print_help(argv[0]);
|
|
102
|
+
return 0;
|
|
103
|
+
}
|
|
104
|
+
default: {
|
|
105
|
+
return 1;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return lookup(cmdopt.argv + cmdopt.optind, cmdopt.argc - cmdopt.optind);
|
|
110
|
+
}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
#include <cstdlib>
|
|
2
|
+
#include <iostream>
|
|
3
|
+
#include <string>
|
|
4
|
+
|
|
5
|
+
#include <marisa.h>
|
|
6
|
+
|
|
7
|
+
#include "cmdopt.h"
|
|
8
|
+
|
|
9
|
+
namespace {
|
|
10
|
+
|
|
11
|
+
std::size_t max_num_results = 10;
|
|
12
|
+
bool mmap_flag = true;
|
|
13
|
+
|
|
14
|
+
void print_help(const char *cmd) {
|
|
15
|
+
std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n"
|
|
16
|
+
"Options:\n"
|
|
17
|
+
" -n, --max-num-results=[N] limit the number of outputs to N"
|
|
18
|
+
" (default: 10)\n"
|
|
19
|
+
" 0: no limit\n"
|
|
20
|
+
" -m, --mmap-dictionary use memory-mapped I/O to load a dictionary"
|
|
21
|
+
" (default)\n"
|
|
22
|
+
" -r, --read-dictionary read an entire dictionary into memory\n"
|
|
23
|
+
" -h, --help print this help\n"
|
|
24
|
+
<< std::endl;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
int predictive_search(const char * const *args, std::size_t num_args) {
|
|
28
|
+
if (num_args == 0) {
|
|
29
|
+
std::cerr << "error: dictionary is not specified" << std::endl;
|
|
30
|
+
return 10;
|
|
31
|
+
} else if (num_args > 1) {
|
|
32
|
+
std::cerr << "error: more than one dictionaries are specified"
|
|
33
|
+
<< std::endl;
|
|
34
|
+
return 11;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
marisa::Trie trie;
|
|
38
|
+
if (mmap_flag) {
|
|
39
|
+
try {
|
|
40
|
+
trie.mmap(args[0]);
|
|
41
|
+
} catch (const marisa::Exception &ex) {
|
|
42
|
+
std::cerr << ex.what() << ": failed to mmap a dictionary file: "
|
|
43
|
+
<< args[0] << std::endl;
|
|
44
|
+
return 20;
|
|
45
|
+
}
|
|
46
|
+
} else {
|
|
47
|
+
try {
|
|
48
|
+
trie.load(args[0]);
|
|
49
|
+
} catch (const marisa::Exception &ex) {
|
|
50
|
+
std::cerr << ex.what() << ": failed to load a dictionary file: "
|
|
51
|
+
<< args[0] << std::endl;
|
|
52
|
+
return 21;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
marisa::Agent agent;
|
|
57
|
+
marisa::Keyset keyset;
|
|
58
|
+
std::string str;
|
|
59
|
+
while (std::getline(std::cin, str)) {
|
|
60
|
+
try {
|
|
61
|
+
agent.set_query(str.c_str(), str.length());
|
|
62
|
+
while (trie.predictive_search(agent)) {
|
|
63
|
+
keyset.push_back(agent.key());
|
|
64
|
+
}
|
|
65
|
+
if (keyset.empty()) {
|
|
66
|
+
std::cout << "not found" << std::endl;
|
|
67
|
+
} else {
|
|
68
|
+
std::cout << keyset.size() << " found" << std::endl;
|
|
69
|
+
const std::size_t end = std::min(max_num_results, keyset.size());
|
|
70
|
+
for (std::size_t i = 0; i < end; ++i) {
|
|
71
|
+
std::cout << keyset[i].id() << '\t';
|
|
72
|
+
std::cout.write(keyset[i].ptr(), keyset[i].length()) << '\t';
|
|
73
|
+
std::cout << str << '\n';
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
keyset.reset();
|
|
77
|
+
} catch (const marisa::Exception &ex) {
|
|
78
|
+
std::cerr << ex.what() << ": predictive_search() failed: "
|
|
79
|
+
<< str << std::endl;
|
|
80
|
+
return 30;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (!std::cout) {
|
|
84
|
+
std::cerr << "error: failed to write results to standard output"
|
|
85
|
+
<< std::endl;
|
|
86
|
+
return 31;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return 0;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
} // namespace
|
|
94
|
+
|
|
95
|
+
int main(int argc, char *argv[]) {
|
|
96
|
+
std::ios::sync_with_stdio(false);
|
|
97
|
+
|
|
98
|
+
::cmdopt_option long_options[] = {
|
|
99
|
+
{ "max-num-results", 1, NULL, 'n' },
|
|
100
|
+
{ "mmap-dictionary", 0, NULL, 'm' },
|
|
101
|
+
{ "read-dictionary", 0, NULL, 'r' },
|
|
102
|
+
{ "help", 0, NULL, 'h' },
|
|
103
|
+
{ NULL, 0, NULL, 0 }
|
|
104
|
+
};
|
|
105
|
+
::cmdopt_t cmdopt;
|
|
106
|
+
::cmdopt_init(&cmdopt, argc, argv, "n:mrh", long_options);
|
|
107
|
+
int label;
|
|
108
|
+
while ((label = ::cmdopt_get(&cmdopt)) != -1) {
|
|
109
|
+
switch (label) {
|
|
110
|
+
case 'n': {
|
|
111
|
+
char *end_of_value;
|
|
112
|
+
const long value = std::strtol(cmdopt.optarg, &end_of_value, 10);
|
|
113
|
+
if ((*end_of_value != '\0') || (value < 0)) {
|
|
114
|
+
std::cerr << "error: option `-n' with an invalid argument: "
|
|
115
|
+
<< cmdopt.optarg << std::endl;
|
|
116
|
+
}
|
|
117
|
+
if ((value == 0) || ((unsigned long long)value > MARISA_SIZE_MAX)) {
|
|
118
|
+
max_num_results = MARISA_SIZE_MAX;
|
|
119
|
+
} else {
|
|
120
|
+
max_num_results = (std::size_t)value;
|
|
121
|
+
}
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
case 'm': {
|
|
125
|
+
mmap_flag = true;
|
|
126
|
+
break;
|
|
127
|
+
}
|
|
128
|
+
case 'r': {
|
|
129
|
+
mmap_flag = false;
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
case 'h': {
|
|
133
|
+
print_help(argv[0]);
|
|
134
|
+
return 0;
|
|
135
|
+
}
|
|
136
|
+
default: {
|
|
137
|
+
return 1;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return predictive_search(cmdopt.argv + cmdopt.optind,
|
|
142
|
+
cmdopt.argc - cmdopt.optind);
|
|
143
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#include <iostream>
|
|
2
|
+
#include <string>
|
|
3
|
+
|
|
4
|
+
#include <marisa.h>
|
|
5
|
+
|
|
6
|
+
#include "cmdopt.h"
|
|
7
|
+
|
|
8
|
+
namespace {
|
|
9
|
+
|
|
10
|
+
bool mmap_flag = true;
|
|
11
|
+
|
|
12
|
+
void print_help(const char *cmd) {
|
|
13
|
+
std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n"
|
|
14
|
+
"Options:\n"
|
|
15
|
+
" -m, --mmap-dictionary use memory-mapped I/O to load a dictionary"
|
|
16
|
+
" (default)\n"
|
|
17
|
+
" -r, --read-dictionary read an entire dictionary into memory\n"
|
|
18
|
+
" -h, --help print this help\n"
|
|
19
|
+
<< std::endl;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
int reverse_lookup(const char * const *args, std::size_t num_args) {
|
|
23
|
+
if (num_args == 0) {
|
|
24
|
+
std::cerr << "error: dictionary is not specified" << std::endl;
|
|
25
|
+
return 10;
|
|
26
|
+
} else if (num_args > 1) {
|
|
27
|
+
std::cerr << "error: more than one dictionaries are specified"
|
|
28
|
+
<< std::endl;
|
|
29
|
+
return 11;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
marisa::Trie trie;
|
|
33
|
+
if (mmap_flag) {
|
|
34
|
+
try {
|
|
35
|
+
trie.mmap(args[0]);
|
|
36
|
+
} catch (const marisa::Exception &ex) {
|
|
37
|
+
std::cerr << ex.what() << ": failed to mmap a dictionary file: "
|
|
38
|
+
<< args[0] << std::endl;
|
|
39
|
+
return 20;
|
|
40
|
+
}
|
|
41
|
+
} else {
|
|
42
|
+
try {
|
|
43
|
+
trie.load(args[0]);
|
|
44
|
+
} catch (const marisa::Exception &ex) {
|
|
45
|
+
std::cerr << ex.what() << ": failed to load a dictionary file: "
|
|
46
|
+
<< args[0] << std::endl;
|
|
47
|
+
return 21;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
marisa::Agent agent;
|
|
52
|
+
std::size_t key_id;
|
|
53
|
+
while (std::cin >> key_id) {
|
|
54
|
+
try {
|
|
55
|
+
agent.set_query(key_id);
|
|
56
|
+
trie.reverse_lookup(agent);
|
|
57
|
+
std::cout << agent.key().id() << '\t';
|
|
58
|
+
std::cout.write(agent.key().ptr(), agent.key().length()) << '\n';
|
|
59
|
+
} catch (const marisa::Exception &ex) {
|
|
60
|
+
std::cerr << ex.what() << ": reverse_lookup() failed: "
|
|
61
|
+
<< key_id << std::endl;
|
|
62
|
+
return 30;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (!std::cout) {
|
|
66
|
+
std::cerr << "error: failed to write results to standard output"
|
|
67
|
+
<< std::endl;
|
|
68
|
+
return 30;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return 0;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
} // namespace
|
|
76
|
+
|
|
77
|
+
int main(int argc, char *argv[]) {
|
|
78
|
+
std::ios::sync_with_stdio(false);
|
|
79
|
+
|
|
80
|
+
::cmdopt_option long_options[] = {
|
|
81
|
+
{ "mmap-dictionary", 0, NULL, 'm' },
|
|
82
|
+
{ "read-dictionary", 0, NULL, 'r' },
|
|
83
|
+
{ "help", 0, NULL, 'h' },
|
|
84
|
+
{ NULL, 0, NULL, 0 }
|
|
85
|
+
};
|
|
86
|
+
::cmdopt_t cmdopt;
|
|
87
|
+
::cmdopt_init(&cmdopt, argc, argv, "mrh", long_options);
|
|
88
|
+
int label;
|
|
89
|
+
while ((label = ::cmdopt_get(&cmdopt)) != -1) {
|
|
90
|
+
switch (label) {
|
|
91
|
+
case 'm': {
|
|
92
|
+
mmap_flag = true;
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
case 'r': {
|
|
96
|
+
mmap_flag = false;
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
99
|
+
case 'h': {
|
|
100
|
+
print_help(argv[0]);
|
|
101
|
+
return 0;
|
|
102
|
+
}
|
|
103
|
+
default: {
|
|
104
|
+
return 1;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return reverse_lookup(cmdopt.argv + cmdopt.optind,
|
|
109
|
+
cmdopt.argc - cmdopt.optind);
|
|
110
|
+
}
|
data/lib/melisa.rb
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
module Melisa
|
|
2
|
+
ConfigError = Class.new(StandardError)
|
|
3
|
+
|
|
4
|
+
CacheSizes = {
|
|
5
|
+
:huge => Marisa::HUGE_CACHE,
|
|
6
|
+
:large => Marisa::LARGE_CACHE,
|
|
7
|
+
:normal => Marisa::NORMAL_CACHE,
|
|
8
|
+
:small => Marisa::SMALL_CACHE,
|
|
9
|
+
:tiny => Marisa::TINY_CACHE,
|
|
10
|
+
:default => Marisa::NORMAL_CACHE
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
NodeOrders = {
|
|
14
|
+
:label => Marisa::LABEL_ORDER,
|
|
15
|
+
:weight => Marisa::WEIGHT_ORDER,
|
|
16
|
+
:default => Marisa::DEFAULT_ORDER
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
module BaseConfigFlags
|
|
20
|
+
def config_flags(opts={})
|
|
21
|
+
opts = {
|
|
22
|
+
:binary => false,
|
|
23
|
+
:num_tries => :default,
|
|
24
|
+
:cache_size => :default,
|
|
25
|
+
:order => :default
|
|
26
|
+
}.merge(opts)
|
|
27
|
+
|
|
28
|
+
return \
|
|
29
|
+
binary_flag(opts[:binary]) |
|
|
30
|
+
valid_num_tries(opts[:num_tries]) |
|
|
31
|
+
lookup_cache_size(opts[:cache_size]) |
|
|
32
|
+
valid_node_order(opts[:order])
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def binary_flag(bool)
|
|
36
|
+
case bool
|
|
37
|
+
when true then Marisa::BINARY_TAIL
|
|
38
|
+
when false then Marisa::TEXT_TAIL
|
|
39
|
+
else
|
|
40
|
+
raise ArgumentError, "binary_flag must be true or false (got #{bool.inspect})"
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def valid_num_tries(num_tries)
|
|
45
|
+
num_tries = Marisa::DEFAULT_NUM_TRIES if num_tries == :default
|
|
46
|
+
min = Marisa::MIN_NUM_TRIES
|
|
47
|
+
max = Marisa::MAX_NUM_TRIES
|
|
48
|
+
if (min..max).include? num_tries
|
|
49
|
+
return num_tries
|
|
50
|
+
else
|
|
51
|
+
msg = "num_tries (#{num_tries}) must be between #{min} and #{max}"
|
|
52
|
+
raise ConfigError, msg
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def lookup_cache_size(cache_size)
|
|
57
|
+
if CacheSizes.keys.include?(cache_size)
|
|
58
|
+
return CacheSizes[cache_size]
|
|
59
|
+
else
|
|
60
|
+
sizes = CacheSizes.keys
|
|
61
|
+
msg = "cache_size (#{cache_size}) must be one of: #{sizes.inspect}"
|
|
62
|
+
raise ConfigError, msg
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def valid_node_order(order)
|
|
67
|
+
if NodeOrders.keys.include?(order)
|
|
68
|
+
return NodeOrders[order]
|
|
69
|
+
else
|
|
70
|
+
valid_options = NodeOrders.keys
|
|
71
|
+
msg = "node_order (#{order}) must be one of: #{valid_options.inspect}"
|
|
72
|
+
raise ConfigError, msg
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|