melisa 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +53 -4
- data/ext/extconf.rb +33 -0
- data/ext/marisa-0.2.4/AUTHORS +1 -0
- data/ext/marisa-0.2.4/COPYING +32 -0
- data/ext/marisa-0.2.4/ChangeLog +0 -0
- data/ext/marisa-0.2.4/INSTALL +237 -0
- data/ext/marisa-0.2.4/Makefile.am +24 -0
- data/ext/marisa-0.2.4/Makefile.in +803 -0
- data/ext/marisa-0.2.4/NEWS +0 -0
- data/ext/marisa-0.2.4/README +63 -0
- data/ext/marisa-0.2.4/aclocal.m4 +976 -0
- data/ext/marisa-0.2.4/bindings/Makefile +16 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/marisa-swig.cxx +0 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/marisa-swig.h +0 -0
- data/ext/marisa-0.2.4/bindings/marisa-swig.i +28 -0
- data/ext/marisa-0.2.4/bindings/perl/Makefile.PL +7 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/perl/marisa-swig.cxx +0 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/perl/marisa-swig.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/perl/marisa-swig_wrap.cxx +0 -0
- data/ext/marisa-0.2.4/bindings/perl/marisa.pm +297 -0
- data/ext/marisa-0.2.4/bindings/perl/sample.dic +0 -0
- data/ext/marisa-0.2.4/bindings/perl/sample.pl +62 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/python/marisa-swig.cxx +0 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/python/marisa-swig.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/python/marisa-swig_wrap.cxx +0 -0
- data/ext/marisa-0.2.4/bindings/python/marisa.py +206 -0
- data/ext/marisa-0.2.4/bindings/python/sample.dic +0 -0
- data/ext/marisa-0.2.4/bindings/python/sample.py +57 -0
- data/ext/marisa-0.2.4/bindings/python/setup.py +9 -0
- data/ext/marisa-0.2.4/config.guess +1530 -0
- data/ext/marisa-0.2.4/config.sub +1773 -0
- data/ext/marisa-0.2.4/configure +17636 -0
- data/ext/marisa-0.2.4/configure.ac +172 -0
- data/ext/marisa-0.2.4/depcomp +688 -0
- data/ext/marisa-0.2.4/docs/readme.en.html +740 -0
- data/ext/marisa-0.2.4/docs/readme.ja.html +750 -0
- data/ext/marisa-0.2.4/docs/style.css +245 -0
- data/ext/marisa-0.2.4/install-sh +520 -0
- data/ext/marisa-0.2.4/lib/Makefile.am +57 -0
- data/ext/marisa-0.2.4/lib/Makefile.in +701 -0
- data/ext/{marisa → marisa-0.2.4}/lib/marisa.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/agent.cc +2 -2
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/agent.h +2 -2
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/base.h +3 -3
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/exception.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/algorithm.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/algorithm/sort.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/intrin.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io.h +3 -3
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io/mapper.cc +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io/mapper.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io/reader.cc +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io/reader.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io/writer.cc +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io/writer.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie.h +2 -2
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/cache.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/config.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/entry.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/header.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/history.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/key.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/louds-trie.cc +5 -5
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/louds-trie.h +7 -7
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/range.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/state.h +2 -2
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/tail.cc +3 -3
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/tail.h +3 -3
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector.h +3 -3
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector/bit-vector.cc +2 -2
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector/bit-vector.h +2 -2
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector/flat-vector.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector/pop-count.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector/rank-index.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector/vector.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/iostream.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/key.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/keyset.cc +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/keyset.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/query.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/scoped-array.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/scoped-ptr.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/stdio.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/trie.cc +4 -4
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/trie.h +2 -2
- data/ext/marisa-0.2.4/ltmain.sh +9661 -0
- data/ext/marisa-0.2.4/m4/libtool.m4 +8001 -0
- data/ext/marisa-0.2.4/m4/ltoptions.m4 +384 -0
- data/ext/marisa-0.2.4/m4/ltsugar.m4 +123 -0
- data/ext/marisa-0.2.4/m4/ltversion.m4 +23 -0
- data/ext/marisa-0.2.4/m4/lt~obsolete.m4 +98 -0
- data/ext/marisa-0.2.4/marisa.pc.in +11 -0
- data/ext/marisa-0.2.4/missing +331 -0
- data/ext/marisa-0.2.4/mkmf.log +288 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-benchmark +0 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-build +0 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-common-prefix-search +0 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-dump +0 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-lookup +0 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-predictive-search +0 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-reverse-lookup +0 -0
- data/ext/marisa-0.2.4/pkg/include/marisa.h +14 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/agent.h +73 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/base.h +193 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/exception.h +82 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/iostream.h +18 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/key.h +85 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/keyset.h +80 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/query.h +71 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/scoped-array.h +48 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/scoped-ptr.h +52 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/stdio.h +15 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/trie.h +64 -0
- data/ext/marisa-0.2.4/pkg/lib/libmarisa.0.dylib +0 -0
- data/ext/marisa-0.2.4/pkg/lib/libmarisa.a +0 -0
- data/ext/marisa-0.2.4/pkg/lib/libmarisa.dylib +0 -0
- data/ext/marisa-0.2.4/pkg/lib/libmarisa.la +41 -0
- data/ext/marisa-0.2.4/pkg/lib/pkgconfig/marisa.pc +11 -0
- data/ext/marisa-0.2.4/tests/Makefile.am +27 -0
- data/ext/marisa-0.2.4/tests/Makefile.in +624 -0
- data/ext/{marisa → marisa-0.2.4}/tests/base-test.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tests/io-test.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tests/marisa-assert.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/tests/marisa-test.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tests/trie-test.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tests/vector-test.cc +0 -0
- data/ext/marisa-0.2.4/tools/Makefile.am +39 -0
- data/ext/marisa-0.2.4/tools/Makefile.in +616 -0
- data/ext/{marisa → marisa-0.2.4}/tools/cmdopt.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/cmdopt.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-benchmark.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-build.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-common-prefix-search.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-dump.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-lookup.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-predictive-search.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-reverse-lookup.cc +0 -0
- data/ext/marisa-0.2.4/vs2008/base-test/base-test.vcproj +200 -0
- data/ext/marisa-0.2.4/vs2008/io-test/io-test.vcproj +199 -0
- data/ext/marisa-0.2.4/vs2008/libmarisa/libmarisa.vcproj +347 -0
- data/ext/marisa-0.2.4/vs2008/marisa-benchmark/marisa-benchmark.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-build/marisa-build.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-common-prefix-search/marisa-common-prefix-search.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-dump/marisa-dump.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-lookup/marisa-lookup.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-predictive-search/marisa-predictive-search.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-reverse-lookup/marisa-reverse-lookup.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-test/marisa-test.vcproj +199 -0
- data/ext/marisa-0.2.4/vs2008/trie-test/trie-test.vcproj +199 -0
- data/ext/marisa-0.2.4/vs2008/vector-test/vector-test.vcproj +199 -0
- data/ext/marisa-0.2.4/vs2008/vs2008.sln +123 -0
- data/ext/marisa-0.2.4/vs2008/vs2008.suo +0 -0
- data/ext/{marisa/bindings/ruby/marisa-swig.cxx → marisa-swig.cxx} +0 -0
- data/ext/{marisa/bindings/ruby/marisa-swig.h → marisa-swig.h} +0 -0
- data/ext/{marisa/bindings/ruby/marisa-swig_wrap.cxx → marisa-swig_wrap.cxx} +0 -0
- data/lib/melisa/bytes_trie.rb +5 -0
- data/lib/melisa/trie.rb +2 -0
- data/lib/melisa/version.rb +1 -1
- data/melisa.gemspec +7 -9
- data/spec/bytes_trie_spec.rb +6 -1
- data/spec/int_trie_spec.rb +5 -0
- data/spec/search_spec.rb +6 -7
- data/spec/trie_spec.rb +4 -4
- metadata +178 -121
- data/ext/marisa/bindings/ruby/extconf.rb +0 -5
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
#ifndef MARISA_EXCEPTION_H_
|
|
2
|
+
#define MARISA_EXCEPTION_H_
|
|
3
|
+
|
|
4
|
+
#include <exception>
|
|
5
|
+
|
|
6
|
+
#include "base.h"
|
|
7
|
+
|
|
8
|
+
namespace marisa {
|
|
9
|
+
|
|
10
|
+
// An exception object keeps a filename, a line number, an error code and an
|
|
11
|
+
// error message. The message format is as follows:
|
|
12
|
+
// "__FILE__:__LINE__: error_code: error_message"
|
|
13
|
+
class Exception : public std::exception {
|
|
14
|
+
public:
|
|
15
|
+
Exception(const char *filename, int line,
|
|
16
|
+
ErrorCode error_code, const char *error_message)
|
|
17
|
+
: std::exception(), filename_(filename), line_(line),
|
|
18
|
+
error_code_(error_code), error_message_(error_message) {}
|
|
19
|
+
Exception(const Exception &ex)
|
|
20
|
+
: std::exception(), filename_(ex.filename_), line_(ex.line_),
|
|
21
|
+
error_code_(ex.error_code_), error_message_(ex.error_message_) {}
|
|
22
|
+
virtual ~Exception() throw() {}
|
|
23
|
+
|
|
24
|
+
Exception &operator=(const Exception &rhs) {
|
|
25
|
+
filename_ = rhs.filename_;
|
|
26
|
+
line_ = rhs.line_;
|
|
27
|
+
error_code_ = rhs.error_code_;
|
|
28
|
+
error_message_ = rhs.error_message_;
|
|
29
|
+
return *this;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const char *filename() const {
|
|
33
|
+
return filename_;
|
|
34
|
+
}
|
|
35
|
+
int line() const {
|
|
36
|
+
return line_;
|
|
37
|
+
}
|
|
38
|
+
ErrorCode error_code() const {
|
|
39
|
+
return error_code_;
|
|
40
|
+
}
|
|
41
|
+
const char *error_message() const {
|
|
42
|
+
return error_message_;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
virtual const char *what() const throw() {
|
|
46
|
+
return error_message_;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
private:
|
|
50
|
+
const char *filename_;
|
|
51
|
+
int line_;
|
|
52
|
+
ErrorCode error_code_;
|
|
53
|
+
const char *error_message_;
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
// These macros are used to convert a line number to a string constant.
|
|
57
|
+
#define MARISA_INT_TO_STR(value) #value
|
|
58
|
+
#define MARISA_LINE_TO_STR(line) MARISA_INT_TO_STR(line)
|
|
59
|
+
#define MARISA_LINE_STR MARISA_LINE_TO_STR(__LINE__)
|
|
60
|
+
|
|
61
|
+
// MARISA_THROW throws an exception with a filename, a line number, an error
|
|
62
|
+
// code and an error message. The message format is as follows:
|
|
63
|
+
// "__FILE__:__LINE__: error_code: error_message"
|
|
64
|
+
#define MARISA_THROW(error_code, error_message) \
|
|
65
|
+
(throw marisa::Exception(__FILE__, __LINE__, error_code, \
|
|
66
|
+
__FILE__ ":" MARISA_LINE_STR ": " #error_code ": " error_message))
|
|
67
|
+
|
|
68
|
+
// MARISA_THROW_IF throws an exception if `condition' is true.
|
|
69
|
+
#define MARISA_THROW_IF(condition, error_code) \
|
|
70
|
+
(void)((!(condition)) || (MARISA_THROW(error_code, #condition), 0))
|
|
71
|
+
|
|
72
|
+
// MARISA_DEBUG_IF is ignored if _DEBUG is undefined. So, it is useful for
|
|
73
|
+
// debugging time-critical codes.
|
|
74
|
+
#ifdef _DEBUG
|
|
75
|
+
#define MARISA_DEBUG_IF(cond, error_code) MARISA_THROW_IF(cond, error_code)
|
|
76
|
+
#else
|
|
77
|
+
#define MARISA_DEBUG_IF(cond, error_code)
|
|
78
|
+
#endif
|
|
79
|
+
|
|
80
|
+
} // namespace marisa
|
|
81
|
+
|
|
82
|
+
#endif // MARISA_EXCEPTION_H_
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#ifndef MARISA_IOSTREAM_H_
|
|
2
|
+
#define MARISA_IOSTREAM_H_
|
|
3
|
+
|
|
4
|
+
#include <iosfwd>
|
|
5
|
+
|
|
6
|
+
namespace marisa {
|
|
7
|
+
|
|
8
|
+
class Trie;
|
|
9
|
+
|
|
10
|
+
std::istream &read(std::istream &stream, Trie *trie);
|
|
11
|
+
std::ostream &write(std::ostream &stream, const Trie &trie);
|
|
12
|
+
|
|
13
|
+
std::istream &operator>>(std::istream &stream, Trie &trie);
|
|
14
|
+
std::ostream &operator<<(std::ostream &stream, const Trie &trie);
|
|
15
|
+
|
|
16
|
+
} // namespace marisa
|
|
17
|
+
|
|
18
|
+
#endif // MARISA_IOSTREAM_H_
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#ifndef MARISA_KEY_H_
|
|
2
|
+
#define MARISA_KEY_H_
|
|
3
|
+
|
|
4
|
+
#include "base.h"
|
|
5
|
+
|
|
6
|
+
namespace marisa {
|
|
7
|
+
|
|
8
|
+
class Key {
|
|
9
|
+
public:
|
|
10
|
+
Key() : ptr_(NULL), length_(0), union_() {
|
|
11
|
+
union_.id = 0;
|
|
12
|
+
}
|
|
13
|
+
Key(const Key &key)
|
|
14
|
+
: ptr_(key.ptr_), length_(key.length_), union_(key.union_) {}
|
|
15
|
+
|
|
16
|
+
Key &operator=(const Key &key) {
|
|
17
|
+
ptr_ = key.ptr_;
|
|
18
|
+
length_ = key.length_;
|
|
19
|
+
union_ = key.union_;
|
|
20
|
+
return *this;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
char operator[](std::size_t i) const {
|
|
24
|
+
MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR);
|
|
25
|
+
return ptr_[i];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
void set_str(const char *str) {
|
|
29
|
+
MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR);
|
|
30
|
+
std::size_t length = 0;
|
|
31
|
+
while (str[length] != '\0') {
|
|
32
|
+
++length;
|
|
33
|
+
}
|
|
34
|
+
MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
|
35
|
+
ptr_ = str;
|
|
36
|
+
length_ = (UInt32)length;
|
|
37
|
+
}
|
|
38
|
+
void set_str(const char *ptr, std::size_t length) {
|
|
39
|
+
MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR);
|
|
40
|
+
MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
|
41
|
+
ptr_ = ptr;
|
|
42
|
+
length_ = (UInt32)length;
|
|
43
|
+
}
|
|
44
|
+
void set_id(std::size_t id) {
|
|
45
|
+
MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
|
46
|
+
union_.id = (UInt32)id;
|
|
47
|
+
}
|
|
48
|
+
void set_weight(float weight) {
|
|
49
|
+
union_.weight = weight;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const char *ptr() const {
|
|
53
|
+
return ptr_;
|
|
54
|
+
}
|
|
55
|
+
std::size_t length() const {
|
|
56
|
+
return length_;
|
|
57
|
+
}
|
|
58
|
+
std::size_t id() const {
|
|
59
|
+
return union_.id;
|
|
60
|
+
}
|
|
61
|
+
float weight() const {
|
|
62
|
+
return union_.weight;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
void clear() {
|
|
66
|
+
Key().swap(*this);
|
|
67
|
+
}
|
|
68
|
+
void swap(Key &rhs) {
|
|
69
|
+
marisa::swap(ptr_, rhs.ptr_);
|
|
70
|
+
marisa::swap(length_, rhs.length_);
|
|
71
|
+
marisa::swap(union_.id, rhs.union_.id);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
private:
|
|
75
|
+
const char *ptr_;
|
|
76
|
+
UInt32 length_;
|
|
77
|
+
union Union {
|
|
78
|
+
UInt32 id;
|
|
79
|
+
float weight;
|
|
80
|
+
} union_;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
} // namespace marisa
|
|
84
|
+
|
|
85
|
+
#endif // MARISA_KEY_H_
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#ifndef MARISA_KEYSET_H_
|
|
2
|
+
#define MARISA_KEYSET_H_
|
|
3
|
+
|
|
4
|
+
#include "key.h"
|
|
5
|
+
|
|
6
|
+
namespace marisa {
|
|
7
|
+
|
|
8
|
+
class Keyset {
|
|
9
|
+
public:
|
|
10
|
+
enum {
|
|
11
|
+
BASE_BLOCK_SIZE = 4096,
|
|
12
|
+
EXTRA_BLOCK_SIZE = 1024,
|
|
13
|
+
KEY_BLOCK_SIZE = 256
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
Keyset();
|
|
17
|
+
|
|
18
|
+
void push_back(const Key &key);
|
|
19
|
+
void push_back(const Key &key, char end_marker);
|
|
20
|
+
|
|
21
|
+
void push_back(const char *str);
|
|
22
|
+
void push_back(const char *ptr, std::size_t length, float weight = 1.0);
|
|
23
|
+
|
|
24
|
+
const Key &operator[](std::size_t i) const {
|
|
25
|
+
MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
|
|
26
|
+
return key_blocks_[i / KEY_BLOCK_SIZE][i % KEY_BLOCK_SIZE];
|
|
27
|
+
}
|
|
28
|
+
Key &operator[](std::size_t i) {
|
|
29
|
+
MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
|
|
30
|
+
return key_blocks_[i / KEY_BLOCK_SIZE][i % KEY_BLOCK_SIZE];
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
std::size_t num_keys() const {
|
|
34
|
+
return size_;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
bool empty() const {
|
|
38
|
+
return size_ == 0;
|
|
39
|
+
}
|
|
40
|
+
std::size_t size() const {
|
|
41
|
+
return size_;
|
|
42
|
+
}
|
|
43
|
+
std::size_t total_length() const {
|
|
44
|
+
return total_length_;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
void reset();
|
|
48
|
+
|
|
49
|
+
void clear();
|
|
50
|
+
void swap(Keyset &rhs);
|
|
51
|
+
|
|
52
|
+
private:
|
|
53
|
+
scoped_array<scoped_array<char> > base_blocks_;
|
|
54
|
+
std::size_t base_blocks_size_;
|
|
55
|
+
std::size_t base_blocks_capacity_;
|
|
56
|
+
scoped_array<scoped_array<char> > extra_blocks_;
|
|
57
|
+
std::size_t extra_blocks_size_;
|
|
58
|
+
std::size_t extra_blocks_capacity_;
|
|
59
|
+
scoped_array<scoped_array<Key> > key_blocks_;
|
|
60
|
+
std::size_t key_blocks_size_;
|
|
61
|
+
std::size_t key_blocks_capacity_;
|
|
62
|
+
char *ptr_;
|
|
63
|
+
std::size_t avail_;
|
|
64
|
+
std::size_t size_;
|
|
65
|
+
std::size_t total_length_;
|
|
66
|
+
|
|
67
|
+
char *reserve(std::size_t size);
|
|
68
|
+
|
|
69
|
+
void append_base_block();
|
|
70
|
+
void append_extra_block(std::size_t size);
|
|
71
|
+
void append_key_block();
|
|
72
|
+
|
|
73
|
+
// Disallows copy and assignment.
|
|
74
|
+
Keyset(const Keyset &);
|
|
75
|
+
Keyset &operator=(const Keyset &);
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
} // namespace marisa
|
|
79
|
+
|
|
80
|
+
#endif // MARISA_KEYSET_H_
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#ifndef MARISA_QUERY_H_
|
|
2
|
+
#define MARISA_QUERY_H_
|
|
3
|
+
|
|
4
|
+
#include "base.h"
|
|
5
|
+
|
|
6
|
+
namespace marisa {
|
|
7
|
+
|
|
8
|
+
class Query {
|
|
9
|
+
public:
|
|
10
|
+
Query() : ptr_(NULL), length_(0), id_(0) {}
|
|
11
|
+
Query(const Query &query)
|
|
12
|
+
: ptr_(query.ptr_), length_(query.length_), id_(query.id_) {}
|
|
13
|
+
|
|
14
|
+
Query &operator=(const Query &query) {
|
|
15
|
+
ptr_ = query.ptr_;
|
|
16
|
+
length_ = query.length_;
|
|
17
|
+
id_ = query.id_;
|
|
18
|
+
return *this;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
char operator[](std::size_t i) const {
|
|
22
|
+
MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR);
|
|
23
|
+
return ptr_[i];
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
void set_str(const char *str) {
|
|
27
|
+
MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR);
|
|
28
|
+
std::size_t length = 0;
|
|
29
|
+
while (str[length] != '\0') {
|
|
30
|
+
++length;
|
|
31
|
+
}
|
|
32
|
+
ptr_ = str;
|
|
33
|
+
length_ = length;
|
|
34
|
+
}
|
|
35
|
+
void set_str(const char *ptr, std::size_t length) {
|
|
36
|
+
MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR);
|
|
37
|
+
ptr_ = ptr;
|
|
38
|
+
length_ = length;
|
|
39
|
+
}
|
|
40
|
+
void set_id(std::size_t id) {
|
|
41
|
+
id_ = id;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const char *ptr() const {
|
|
45
|
+
return ptr_;
|
|
46
|
+
}
|
|
47
|
+
std::size_t length() const {
|
|
48
|
+
return length_;
|
|
49
|
+
}
|
|
50
|
+
std::size_t id() const {
|
|
51
|
+
return id_;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
void clear() {
|
|
55
|
+
Query().swap(*this);
|
|
56
|
+
}
|
|
57
|
+
void swap(Query &rhs) {
|
|
58
|
+
marisa::swap(ptr_, rhs.ptr_);
|
|
59
|
+
marisa::swap(length_, rhs.length_);
|
|
60
|
+
marisa::swap(id_, rhs.id_);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
private:
|
|
64
|
+
const char *ptr_;
|
|
65
|
+
std::size_t length_;
|
|
66
|
+
std::size_t id_;
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
} // namespace marisa
|
|
70
|
+
|
|
71
|
+
#endif // MARISA_QUERY_H_
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
#ifndef MARISA_SCOPED_ARRAY_H_
|
|
2
|
+
#define MARISA_SCOPED_ARRAY_H_
|
|
3
|
+
|
|
4
|
+
#include "base.h"
|
|
5
|
+
|
|
6
|
+
namespace marisa {
|
|
7
|
+
|
|
8
|
+
template <typename T>
|
|
9
|
+
class scoped_array {
|
|
10
|
+
public:
|
|
11
|
+
scoped_array() : array_(NULL) {}
|
|
12
|
+
explicit scoped_array(T *array) : array_(array) {}
|
|
13
|
+
|
|
14
|
+
~scoped_array() {
|
|
15
|
+
delete [] array_;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
void reset(T *array = NULL) {
|
|
19
|
+
MARISA_THROW_IF((array != NULL) && (array == array_), MARISA_RESET_ERROR);
|
|
20
|
+
scoped_array(array).swap(*this);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
T &operator[](std::size_t i) const {
|
|
24
|
+
MARISA_DEBUG_IF(array_ == NULL, MARISA_STATE_ERROR);
|
|
25
|
+
return array_[i];
|
|
26
|
+
}
|
|
27
|
+
T *get() const {
|
|
28
|
+
return array_;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
void clear() {
|
|
32
|
+
scoped_array().swap(*this);
|
|
33
|
+
}
|
|
34
|
+
void swap(scoped_array &rhs) {
|
|
35
|
+
marisa::swap(array_, rhs.array_);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
private:
|
|
39
|
+
T *array_;
|
|
40
|
+
|
|
41
|
+
// Disallows copy and assignment.
|
|
42
|
+
scoped_array(const scoped_array &);
|
|
43
|
+
scoped_array &operator=(const scoped_array &);
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
} // namespace marisa
|
|
47
|
+
|
|
48
|
+
#endif // MARISA_SCOPED_ARRAY_H_
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
#ifndef MARISA_SCOPED_PTR_H_
|
|
2
|
+
#define MARISA_SCOPED_PTR_H_
|
|
3
|
+
|
|
4
|
+
#include "base.h"
|
|
5
|
+
|
|
6
|
+
namespace marisa {
|
|
7
|
+
|
|
8
|
+
template <typename T>
|
|
9
|
+
class scoped_ptr {
|
|
10
|
+
public:
|
|
11
|
+
scoped_ptr() : ptr_(NULL) {}
|
|
12
|
+
explicit scoped_ptr(T *ptr) : ptr_(ptr) {}
|
|
13
|
+
|
|
14
|
+
~scoped_ptr() {
|
|
15
|
+
delete ptr_;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
void reset(T *ptr = NULL) {
|
|
19
|
+
MARISA_THROW_IF((ptr != NULL) && (ptr == ptr_), MARISA_RESET_ERROR);
|
|
20
|
+
scoped_ptr(ptr).swap(*this);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
T &operator*() const {
|
|
24
|
+
MARISA_DEBUG_IF(ptr_ == NULL, MARISA_STATE_ERROR);
|
|
25
|
+
return *ptr_;
|
|
26
|
+
}
|
|
27
|
+
T *operator->() const {
|
|
28
|
+
MARISA_DEBUG_IF(ptr_ == NULL, MARISA_STATE_ERROR);
|
|
29
|
+
return ptr_;
|
|
30
|
+
}
|
|
31
|
+
T *get() const {
|
|
32
|
+
return ptr_;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
void clear() {
|
|
36
|
+
scoped_ptr().swap(*this);
|
|
37
|
+
}
|
|
38
|
+
void swap(scoped_ptr &rhs) {
|
|
39
|
+
marisa::swap(ptr_, rhs.ptr_);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
private:
|
|
43
|
+
T *ptr_;
|
|
44
|
+
|
|
45
|
+
// Disallows copy and assignment.
|
|
46
|
+
scoped_ptr(const scoped_ptr &);
|
|
47
|
+
scoped_ptr &operator=(const scoped_ptr &);
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
} // namespace marisa
|
|
51
|
+
|
|
52
|
+
#endif // MARISA_SCOPED_PTR_H_
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#ifndef MARISA_STDIO_H_
|
|
2
|
+
#define MARISA_STDIO_H_
|
|
3
|
+
|
|
4
|
+
#include <cstdio>
|
|
5
|
+
|
|
6
|
+
namespace marisa {
|
|
7
|
+
|
|
8
|
+
class Trie;
|
|
9
|
+
|
|
10
|
+
void fread(std::FILE *file, Trie *trie);
|
|
11
|
+
void fwrite(std::FILE *file, const Trie &trie);
|
|
12
|
+
|
|
13
|
+
} // namespace marisa
|
|
14
|
+
|
|
15
|
+
#endif // MARISA_STDIO_H_
|