melisa 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +53 -4
- data/ext/extconf.rb +33 -0
- data/ext/marisa-0.2.4/AUTHORS +1 -0
- data/ext/marisa-0.2.4/COPYING +32 -0
- data/ext/marisa-0.2.4/ChangeLog +0 -0
- data/ext/marisa-0.2.4/INSTALL +237 -0
- data/ext/marisa-0.2.4/Makefile.am +24 -0
- data/ext/marisa-0.2.4/Makefile.in +803 -0
- data/ext/marisa-0.2.4/NEWS +0 -0
- data/ext/marisa-0.2.4/README +63 -0
- data/ext/marisa-0.2.4/aclocal.m4 +976 -0
- data/ext/marisa-0.2.4/bindings/Makefile +16 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/marisa-swig.cxx +0 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/marisa-swig.h +0 -0
- data/ext/marisa-0.2.4/bindings/marisa-swig.i +28 -0
- data/ext/marisa-0.2.4/bindings/perl/Makefile.PL +7 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/perl/marisa-swig.cxx +0 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/perl/marisa-swig.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/perl/marisa-swig_wrap.cxx +0 -0
- data/ext/marisa-0.2.4/bindings/perl/marisa.pm +297 -0
- data/ext/marisa-0.2.4/bindings/perl/sample.dic +0 -0
- data/ext/marisa-0.2.4/bindings/perl/sample.pl +62 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/python/marisa-swig.cxx +0 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/python/marisa-swig.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/bindings/python/marisa-swig_wrap.cxx +0 -0
- data/ext/marisa-0.2.4/bindings/python/marisa.py +206 -0
- data/ext/marisa-0.2.4/bindings/python/sample.dic +0 -0
- data/ext/marisa-0.2.4/bindings/python/sample.py +57 -0
- data/ext/marisa-0.2.4/bindings/python/setup.py +9 -0
- data/ext/marisa-0.2.4/config.guess +1530 -0
- data/ext/marisa-0.2.4/config.sub +1773 -0
- data/ext/marisa-0.2.4/configure +17636 -0
- data/ext/marisa-0.2.4/configure.ac +172 -0
- data/ext/marisa-0.2.4/depcomp +688 -0
- data/ext/marisa-0.2.4/docs/readme.en.html +740 -0
- data/ext/marisa-0.2.4/docs/readme.ja.html +750 -0
- data/ext/marisa-0.2.4/docs/style.css +245 -0
- data/ext/marisa-0.2.4/install-sh +520 -0
- data/ext/marisa-0.2.4/lib/Makefile.am +57 -0
- data/ext/marisa-0.2.4/lib/Makefile.in +701 -0
- data/ext/{marisa → marisa-0.2.4}/lib/marisa.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/agent.cc +2 -2
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/agent.h +2 -2
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/base.h +3 -3
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/exception.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/algorithm.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/algorithm/sort.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/intrin.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io.h +3 -3
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io/mapper.cc +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io/mapper.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io/reader.cc +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io/reader.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io/writer.cc +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/io/writer.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie.h +2 -2
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/cache.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/config.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/entry.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/header.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/history.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/key.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/louds-trie.cc +5 -5
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/louds-trie.h +7 -7
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/range.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/state.h +2 -2
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/tail.cc +3 -3
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/trie/tail.h +3 -3
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector.h +3 -3
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector/bit-vector.cc +2 -2
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector/bit-vector.h +2 -2
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector/flat-vector.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector/pop-count.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector/rank-index.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/grimoire/vector/vector.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/iostream.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/key.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/keyset.cc +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/keyset.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/query.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/scoped-array.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/scoped-ptr.h +1 -1
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/stdio.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/trie.cc +4 -4
- data/ext/{marisa → marisa-0.2.4}/lib/marisa/trie.h +2 -2
- data/ext/marisa-0.2.4/ltmain.sh +9661 -0
- data/ext/marisa-0.2.4/m4/libtool.m4 +8001 -0
- data/ext/marisa-0.2.4/m4/ltoptions.m4 +384 -0
- data/ext/marisa-0.2.4/m4/ltsugar.m4 +123 -0
- data/ext/marisa-0.2.4/m4/ltversion.m4 +23 -0
- data/ext/marisa-0.2.4/m4/lt~obsolete.m4 +98 -0
- data/ext/marisa-0.2.4/marisa.pc.in +11 -0
- data/ext/marisa-0.2.4/missing +331 -0
- data/ext/marisa-0.2.4/mkmf.log +288 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-benchmark +0 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-build +0 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-common-prefix-search +0 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-dump +0 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-lookup +0 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-predictive-search +0 -0
- data/ext/marisa-0.2.4/pkg/bin/marisa-reverse-lookup +0 -0
- data/ext/marisa-0.2.4/pkg/include/marisa.h +14 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/agent.h +73 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/base.h +193 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/exception.h +82 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/iostream.h +18 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/key.h +85 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/keyset.h +80 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/query.h +71 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/scoped-array.h +48 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/scoped-ptr.h +52 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/stdio.h +15 -0
- data/ext/marisa-0.2.4/pkg/include/marisa/trie.h +64 -0
- data/ext/marisa-0.2.4/pkg/lib/libmarisa.0.dylib +0 -0
- data/ext/marisa-0.2.4/pkg/lib/libmarisa.a +0 -0
- data/ext/marisa-0.2.4/pkg/lib/libmarisa.dylib +0 -0
- data/ext/marisa-0.2.4/pkg/lib/libmarisa.la +41 -0
- data/ext/marisa-0.2.4/pkg/lib/pkgconfig/marisa.pc +11 -0
- data/ext/marisa-0.2.4/tests/Makefile.am +27 -0
- data/ext/marisa-0.2.4/tests/Makefile.in +624 -0
- data/ext/{marisa → marisa-0.2.4}/tests/base-test.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tests/io-test.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tests/marisa-assert.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/tests/marisa-test.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tests/trie-test.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tests/vector-test.cc +0 -0
- data/ext/marisa-0.2.4/tools/Makefile.am +39 -0
- data/ext/marisa-0.2.4/tools/Makefile.in +616 -0
- data/ext/{marisa → marisa-0.2.4}/tools/cmdopt.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/cmdopt.h +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-benchmark.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-build.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-common-prefix-search.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-dump.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-lookup.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-predictive-search.cc +0 -0
- data/ext/{marisa → marisa-0.2.4}/tools/marisa-reverse-lookup.cc +0 -0
- data/ext/marisa-0.2.4/vs2008/base-test/base-test.vcproj +200 -0
- data/ext/marisa-0.2.4/vs2008/io-test/io-test.vcproj +199 -0
- data/ext/marisa-0.2.4/vs2008/libmarisa/libmarisa.vcproj +347 -0
- data/ext/marisa-0.2.4/vs2008/marisa-benchmark/marisa-benchmark.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-build/marisa-build.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-common-prefix-search/marisa-common-prefix-search.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-dump/marisa-dump.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-lookup/marisa-lookup.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-predictive-search/marisa-predictive-search.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-reverse-lookup/marisa-reverse-lookup.vcproj +203 -0
- data/ext/marisa-0.2.4/vs2008/marisa-test/marisa-test.vcproj +199 -0
- data/ext/marisa-0.2.4/vs2008/trie-test/trie-test.vcproj +199 -0
- data/ext/marisa-0.2.4/vs2008/vector-test/vector-test.vcproj +199 -0
- data/ext/marisa-0.2.4/vs2008/vs2008.sln +123 -0
- data/ext/marisa-0.2.4/vs2008/vs2008.suo +0 -0
- data/ext/{marisa/bindings/ruby/marisa-swig.cxx → marisa-swig.cxx} +0 -0
- data/ext/{marisa/bindings/ruby/marisa-swig.h → marisa-swig.h} +0 -0
- data/ext/{marisa/bindings/ruby/marisa-swig_wrap.cxx → marisa-swig_wrap.cxx} +0 -0
- data/lib/melisa/bytes_trie.rb +5 -0
- data/lib/melisa/trie.rb +2 -0
- data/lib/melisa/version.rb +1 -1
- data/melisa.gemspec +7 -9
- data/spec/bytes_trie_spec.rb +6 -1
- data/spec/int_trie_spec.rb +5 -0
- data/spec/search_spec.rb +6 -7
- data/spec/trie_spec.rb +4 -4
- metadata +178 -121
- data/ext/marisa/bindings/ruby/extconf.rb +0 -5
@@ -0,0 +1,82 @@
|
|
1
|
+
#ifndef MARISA_EXCEPTION_H_
|
2
|
+
#define MARISA_EXCEPTION_H_
|
3
|
+
|
4
|
+
#include <exception>
|
5
|
+
|
6
|
+
#include "base.h"
|
7
|
+
|
8
|
+
namespace marisa {
|
9
|
+
|
10
|
+
// An exception object keeps a filename, a line number, an error code and an
|
11
|
+
// error message. The message format is as follows:
|
12
|
+
// "__FILE__:__LINE__: error_code: error_message"
|
13
|
+
class Exception : public std::exception {
|
14
|
+
public:
|
15
|
+
Exception(const char *filename, int line,
|
16
|
+
ErrorCode error_code, const char *error_message)
|
17
|
+
: std::exception(), filename_(filename), line_(line),
|
18
|
+
error_code_(error_code), error_message_(error_message) {}
|
19
|
+
Exception(const Exception &ex)
|
20
|
+
: std::exception(), filename_(ex.filename_), line_(ex.line_),
|
21
|
+
error_code_(ex.error_code_), error_message_(ex.error_message_) {}
|
22
|
+
virtual ~Exception() throw() {}
|
23
|
+
|
24
|
+
Exception &operator=(const Exception &rhs) {
|
25
|
+
filename_ = rhs.filename_;
|
26
|
+
line_ = rhs.line_;
|
27
|
+
error_code_ = rhs.error_code_;
|
28
|
+
error_message_ = rhs.error_message_;
|
29
|
+
return *this;
|
30
|
+
}
|
31
|
+
|
32
|
+
const char *filename() const {
|
33
|
+
return filename_;
|
34
|
+
}
|
35
|
+
int line() const {
|
36
|
+
return line_;
|
37
|
+
}
|
38
|
+
ErrorCode error_code() const {
|
39
|
+
return error_code_;
|
40
|
+
}
|
41
|
+
const char *error_message() const {
|
42
|
+
return error_message_;
|
43
|
+
}
|
44
|
+
|
45
|
+
virtual const char *what() const throw() {
|
46
|
+
return error_message_;
|
47
|
+
}
|
48
|
+
|
49
|
+
private:
|
50
|
+
const char *filename_;
|
51
|
+
int line_;
|
52
|
+
ErrorCode error_code_;
|
53
|
+
const char *error_message_;
|
54
|
+
};
|
55
|
+
|
56
|
+
// These macros are used to convert a line number to a string constant.
|
57
|
+
#define MARISA_INT_TO_STR(value) #value
|
58
|
+
#define MARISA_LINE_TO_STR(line) MARISA_INT_TO_STR(line)
|
59
|
+
#define MARISA_LINE_STR MARISA_LINE_TO_STR(__LINE__)
|
60
|
+
|
61
|
+
// MARISA_THROW throws an exception with a filename, a line number, an error
|
62
|
+
// code and an error message. The message format is as follows:
|
63
|
+
// "__FILE__:__LINE__: error_code: error_message"
|
64
|
+
#define MARISA_THROW(error_code, error_message) \
|
65
|
+
(throw marisa::Exception(__FILE__, __LINE__, error_code, \
|
66
|
+
__FILE__ ":" MARISA_LINE_STR ": " #error_code ": " error_message))
|
67
|
+
|
68
|
+
// MARISA_THROW_IF throws an exception if `condition' is true.
|
69
|
+
#define MARISA_THROW_IF(condition, error_code) \
|
70
|
+
(void)((!(condition)) || (MARISA_THROW(error_code, #condition), 0))
|
71
|
+
|
72
|
+
// MARISA_DEBUG_IF is ignored if _DEBUG is undefined. So, it is useful for
|
73
|
+
// debugging time-critical codes.
|
74
|
+
#ifdef _DEBUG
|
75
|
+
#define MARISA_DEBUG_IF(cond, error_code) MARISA_THROW_IF(cond, error_code)
|
76
|
+
#else
|
77
|
+
#define MARISA_DEBUG_IF(cond, error_code)
|
78
|
+
#endif
|
79
|
+
|
80
|
+
} // namespace marisa
|
81
|
+
|
82
|
+
#endif // MARISA_EXCEPTION_H_
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#ifndef MARISA_IOSTREAM_H_
|
2
|
+
#define MARISA_IOSTREAM_H_
|
3
|
+
|
4
|
+
#include <iosfwd>
|
5
|
+
|
6
|
+
namespace marisa {
|
7
|
+
|
8
|
+
class Trie;
|
9
|
+
|
10
|
+
std::istream &read(std::istream &stream, Trie *trie);
|
11
|
+
std::ostream &write(std::ostream &stream, const Trie &trie);
|
12
|
+
|
13
|
+
std::istream &operator>>(std::istream &stream, Trie &trie);
|
14
|
+
std::ostream &operator<<(std::ostream &stream, const Trie &trie);
|
15
|
+
|
16
|
+
} // namespace marisa
|
17
|
+
|
18
|
+
#endif // MARISA_IOSTREAM_H_
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#ifndef MARISA_KEY_H_
|
2
|
+
#define MARISA_KEY_H_
|
3
|
+
|
4
|
+
#include "base.h"
|
5
|
+
|
6
|
+
namespace marisa {
|
7
|
+
|
8
|
+
class Key {
|
9
|
+
public:
|
10
|
+
Key() : ptr_(NULL), length_(0), union_() {
|
11
|
+
union_.id = 0;
|
12
|
+
}
|
13
|
+
Key(const Key &key)
|
14
|
+
: ptr_(key.ptr_), length_(key.length_), union_(key.union_) {}
|
15
|
+
|
16
|
+
Key &operator=(const Key &key) {
|
17
|
+
ptr_ = key.ptr_;
|
18
|
+
length_ = key.length_;
|
19
|
+
union_ = key.union_;
|
20
|
+
return *this;
|
21
|
+
}
|
22
|
+
|
23
|
+
char operator[](std::size_t i) const {
|
24
|
+
MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR);
|
25
|
+
return ptr_[i];
|
26
|
+
}
|
27
|
+
|
28
|
+
void set_str(const char *str) {
|
29
|
+
MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR);
|
30
|
+
std::size_t length = 0;
|
31
|
+
while (str[length] != '\0') {
|
32
|
+
++length;
|
33
|
+
}
|
34
|
+
MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
35
|
+
ptr_ = str;
|
36
|
+
length_ = (UInt32)length;
|
37
|
+
}
|
38
|
+
void set_str(const char *ptr, std::size_t length) {
|
39
|
+
MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR);
|
40
|
+
MARISA_DEBUG_IF(length > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
41
|
+
ptr_ = ptr;
|
42
|
+
length_ = (UInt32)length;
|
43
|
+
}
|
44
|
+
void set_id(std::size_t id) {
|
45
|
+
MARISA_DEBUG_IF(id > MARISA_UINT32_MAX, MARISA_SIZE_ERROR);
|
46
|
+
union_.id = (UInt32)id;
|
47
|
+
}
|
48
|
+
void set_weight(float weight) {
|
49
|
+
union_.weight = weight;
|
50
|
+
}
|
51
|
+
|
52
|
+
const char *ptr() const {
|
53
|
+
return ptr_;
|
54
|
+
}
|
55
|
+
std::size_t length() const {
|
56
|
+
return length_;
|
57
|
+
}
|
58
|
+
std::size_t id() const {
|
59
|
+
return union_.id;
|
60
|
+
}
|
61
|
+
float weight() const {
|
62
|
+
return union_.weight;
|
63
|
+
}
|
64
|
+
|
65
|
+
void clear() {
|
66
|
+
Key().swap(*this);
|
67
|
+
}
|
68
|
+
void swap(Key &rhs) {
|
69
|
+
marisa::swap(ptr_, rhs.ptr_);
|
70
|
+
marisa::swap(length_, rhs.length_);
|
71
|
+
marisa::swap(union_.id, rhs.union_.id);
|
72
|
+
}
|
73
|
+
|
74
|
+
private:
|
75
|
+
const char *ptr_;
|
76
|
+
UInt32 length_;
|
77
|
+
union Union {
|
78
|
+
UInt32 id;
|
79
|
+
float weight;
|
80
|
+
} union_;
|
81
|
+
};
|
82
|
+
|
83
|
+
} // namespace marisa
|
84
|
+
|
85
|
+
#endif // MARISA_KEY_H_
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#ifndef MARISA_KEYSET_H_
|
2
|
+
#define MARISA_KEYSET_H_
|
3
|
+
|
4
|
+
#include "key.h"
|
5
|
+
|
6
|
+
namespace marisa {
|
7
|
+
|
8
|
+
class Keyset {
|
9
|
+
public:
|
10
|
+
enum {
|
11
|
+
BASE_BLOCK_SIZE = 4096,
|
12
|
+
EXTRA_BLOCK_SIZE = 1024,
|
13
|
+
KEY_BLOCK_SIZE = 256
|
14
|
+
};
|
15
|
+
|
16
|
+
Keyset();
|
17
|
+
|
18
|
+
void push_back(const Key &key);
|
19
|
+
void push_back(const Key &key, char end_marker);
|
20
|
+
|
21
|
+
void push_back(const char *str);
|
22
|
+
void push_back(const char *ptr, std::size_t length, float weight = 1.0);
|
23
|
+
|
24
|
+
const Key &operator[](std::size_t i) const {
|
25
|
+
MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
|
26
|
+
return key_blocks_[i / KEY_BLOCK_SIZE][i % KEY_BLOCK_SIZE];
|
27
|
+
}
|
28
|
+
Key &operator[](std::size_t i) {
|
29
|
+
MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR);
|
30
|
+
return key_blocks_[i / KEY_BLOCK_SIZE][i % KEY_BLOCK_SIZE];
|
31
|
+
}
|
32
|
+
|
33
|
+
std::size_t num_keys() const {
|
34
|
+
return size_;
|
35
|
+
}
|
36
|
+
|
37
|
+
bool empty() const {
|
38
|
+
return size_ == 0;
|
39
|
+
}
|
40
|
+
std::size_t size() const {
|
41
|
+
return size_;
|
42
|
+
}
|
43
|
+
std::size_t total_length() const {
|
44
|
+
return total_length_;
|
45
|
+
}
|
46
|
+
|
47
|
+
void reset();
|
48
|
+
|
49
|
+
void clear();
|
50
|
+
void swap(Keyset &rhs);
|
51
|
+
|
52
|
+
private:
|
53
|
+
scoped_array<scoped_array<char> > base_blocks_;
|
54
|
+
std::size_t base_blocks_size_;
|
55
|
+
std::size_t base_blocks_capacity_;
|
56
|
+
scoped_array<scoped_array<char> > extra_blocks_;
|
57
|
+
std::size_t extra_blocks_size_;
|
58
|
+
std::size_t extra_blocks_capacity_;
|
59
|
+
scoped_array<scoped_array<Key> > key_blocks_;
|
60
|
+
std::size_t key_blocks_size_;
|
61
|
+
std::size_t key_blocks_capacity_;
|
62
|
+
char *ptr_;
|
63
|
+
std::size_t avail_;
|
64
|
+
std::size_t size_;
|
65
|
+
std::size_t total_length_;
|
66
|
+
|
67
|
+
char *reserve(std::size_t size);
|
68
|
+
|
69
|
+
void append_base_block();
|
70
|
+
void append_extra_block(std::size_t size);
|
71
|
+
void append_key_block();
|
72
|
+
|
73
|
+
// Disallows copy and assignment.
|
74
|
+
Keyset(const Keyset &);
|
75
|
+
Keyset &operator=(const Keyset &);
|
76
|
+
};
|
77
|
+
|
78
|
+
} // namespace marisa
|
79
|
+
|
80
|
+
#endif // MARISA_KEYSET_H_
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#ifndef MARISA_QUERY_H_
|
2
|
+
#define MARISA_QUERY_H_
|
3
|
+
|
4
|
+
#include "base.h"
|
5
|
+
|
6
|
+
namespace marisa {
|
7
|
+
|
8
|
+
class Query {
|
9
|
+
public:
|
10
|
+
Query() : ptr_(NULL), length_(0), id_(0) {}
|
11
|
+
Query(const Query &query)
|
12
|
+
: ptr_(query.ptr_), length_(query.length_), id_(query.id_) {}
|
13
|
+
|
14
|
+
Query &operator=(const Query &query) {
|
15
|
+
ptr_ = query.ptr_;
|
16
|
+
length_ = query.length_;
|
17
|
+
id_ = query.id_;
|
18
|
+
return *this;
|
19
|
+
}
|
20
|
+
|
21
|
+
char operator[](std::size_t i) const {
|
22
|
+
MARISA_DEBUG_IF(i >= length_, MARISA_BOUND_ERROR);
|
23
|
+
return ptr_[i];
|
24
|
+
}
|
25
|
+
|
26
|
+
void set_str(const char *str) {
|
27
|
+
MARISA_DEBUG_IF(str == NULL, MARISA_NULL_ERROR);
|
28
|
+
std::size_t length = 0;
|
29
|
+
while (str[length] != '\0') {
|
30
|
+
++length;
|
31
|
+
}
|
32
|
+
ptr_ = str;
|
33
|
+
length_ = length;
|
34
|
+
}
|
35
|
+
void set_str(const char *ptr, std::size_t length) {
|
36
|
+
MARISA_DEBUG_IF((ptr == NULL) && (length != 0), MARISA_NULL_ERROR);
|
37
|
+
ptr_ = ptr;
|
38
|
+
length_ = length;
|
39
|
+
}
|
40
|
+
void set_id(std::size_t id) {
|
41
|
+
id_ = id;
|
42
|
+
}
|
43
|
+
|
44
|
+
const char *ptr() const {
|
45
|
+
return ptr_;
|
46
|
+
}
|
47
|
+
std::size_t length() const {
|
48
|
+
return length_;
|
49
|
+
}
|
50
|
+
std::size_t id() const {
|
51
|
+
return id_;
|
52
|
+
}
|
53
|
+
|
54
|
+
void clear() {
|
55
|
+
Query().swap(*this);
|
56
|
+
}
|
57
|
+
void swap(Query &rhs) {
|
58
|
+
marisa::swap(ptr_, rhs.ptr_);
|
59
|
+
marisa::swap(length_, rhs.length_);
|
60
|
+
marisa::swap(id_, rhs.id_);
|
61
|
+
}
|
62
|
+
|
63
|
+
private:
|
64
|
+
const char *ptr_;
|
65
|
+
std::size_t length_;
|
66
|
+
std::size_t id_;
|
67
|
+
};
|
68
|
+
|
69
|
+
} // namespace marisa
|
70
|
+
|
71
|
+
#endif // MARISA_QUERY_H_
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#ifndef MARISA_SCOPED_ARRAY_H_
|
2
|
+
#define MARISA_SCOPED_ARRAY_H_
|
3
|
+
|
4
|
+
#include "base.h"
|
5
|
+
|
6
|
+
namespace marisa {
|
7
|
+
|
8
|
+
template <typename T>
|
9
|
+
class scoped_array {
|
10
|
+
public:
|
11
|
+
scoped_array() : array_(NULL) {}
|
12
|
+
explicit scoped_array(T *array) : array_(array) {}
|
13
|
+
|
14
|
+
~scoped_array() {
|
15
|
+
delete [] array_;
|
16
|
+
}
|
17
|
+
|
18
|
+
void reset(T *array = NULL) {
|
19
|
+
MARISA_THROW_IF((array != NULL) && (array == array_), MARISA_RESET_ERROR);
|
20
|
+
scoped_array(array).swap(*this);
|
21
|
+
}
|
22
|
+
|
23
|
+
T &operator[](std::size_t i) const {
|
24
|
+
MARISA_DEBUG_IF(array_ == NULL, MARISA_STATE_ERROR);
|
25
|
+
return array_[i];
|
26
|
+
}
|
27
|
+
T *get() const {
|
28
|
+
return array_;
|
29
|
+
}
|
30
|
+
|
31
|
+
void clear() {
|
32
|
+
scoped_array().swap(*this);
|
33
|
+
}
|
34
|
+
void swap(scoped_array &rhs) {
|
35
|
+
marisa::swap(array_, rhs.array_);
|
36
|
+
}
|
37
|
+
|
38
|
+
private:
|
39
|
+
T *array_;
|
40
|
+
|
41
|
+
// Disallows copy and assignment.
|
42
|
+
scoped_array(const scoped_array &);
|
43
|
+
scoped_array &operator=(const scoped_array &);
|
44
|
+
};
|
45
|
+
|
46
|
+
} // namespace marisa
|
47
|
+
|
48
|
+
#endif // MARISA_SCOPED_ARRAY_H_
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#ifndef MARISA_SCOPED_PTR_H_
|
2
|
+
#define MARISA_SCOPED_PTR_H_
|
3
|
+
|
4
|
+
#include "base.h"
|
5
|
+
|
6
|
+
namespace marisa {
|
7
|
+
|
8
|
+
template <typename T>
|
9
|
+
class scoped_ptr {
|
10
|
+
public:
|
11
|
+
scoped_ptr() : ptr_(NULL) {}
|
12
|
+
explicit scoped_ptr(T *ptr) : ptr_(ptr) {}
|
13
|
+
|
14
|
+
~scoped_ptr() {
|
15
|
+
delete ptr_;
|
16
|
+
}
|
17
|
+
|
18
|
+
void reset(T *ptr = NULL) {
|
19
|
+
MARISA_THROW_IF((ptr != NULL) && (ptr == ptr_), MARISA_RESET_ERROR);
|
20
|
+
scoped_ptr(ptr).swap(*this);
|
21
|
+
}
|
22
|
+
|
23
|
+
T &operator*() const {
|
24
|
+
MARISA_DEBUG_IF(ptr_ == NULL, MARISA_STATE_ERROR);
|
25
|
+
return *ptr_;
|
26
|
+
}
|
27
|
+
T *operator->() const {
|
28
|
+
MARISA_DEBUG_IF(ptr_ == NULL, MARISA_STATE_ERROR);
|
29
|
+
return ptr_;
|
30
|
+
}
|
31
|
+
T *get() const {
|
32
|
+
return ptr_;
|
33
|
+
}
|
34
|
+
|
35
|
+
void clear() {
|
36
|
+
scoped_ptr().swap(*this);
|
37
|
+
}
|
38
|
+
void swap(scoped_ptr &rhs) {
|
39
|
+
marisa::swap(ptr_, rhs.ptr_);
|
40
|
+
}
|
41
|
+
|
42
|
+
private:
|
43
|
+
T *ptr_;
|
44
|
+
|
45
|
+
// Disallows copy and assignment.
|
46
|
+
scoped_ptr(const scoped_ptr &);
|
47
|
+
scoped_ptr &operator=(const scoped_ptr &);
|
48
|
+
};
|
49
|
+
|
50
|
+
} // namespace marisa
|
51
|
+
|
52
|
+
#endif // MARISA_SCOPED_PTR_H_
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#ifndef MARISA_STDIO_H_
|
2
|
+
#define MARISA_STDIO_H_
|
3
|
+
|
4
|
+
#include <cstdio>
|
5
|
+
|
6
|
+
namespace marisa {
|
7
|
+
|
8
|
+
class Trie;
|
9
|
+
|
10
|
+
void fread(std::FILE *file, Trie *trie);
|
11
|
+
void fwrite(std::FILE *file, const Trie &trie);
|
12
|
+
|
13
|
+
} // namespace marisa
|
14
|
+
|
15
|
+
#endif // MARISA_STDIO_H_
|