melisa 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -0
- data/ext/marisa/bindings/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/perl/marisa-swig.h +183 -0
- data/ext/marisa/bindings/perl/marisa-swig_wrap.cxx +5160 -0
- data/ext/marisa/bindings/python/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/python/marisa-swig.h +183 -0
- data/ext/marisa/bindings/python/marisa-swig_wrap.cxx +6090 -0
- data/ext/marisa/bindings/ruby/extconf.rb +5 -0
- data/ext/marisa/bindings/ruby/marisa-swig.cxx +253 -0
- data/ext/marisa/bindings/ruby/marisa-swig.h +183 -0
- data/ext/marisa/bindings/ruby/marisa-swig_wrap.cxx +4708 -0
- data/ext/marisa/lib/marisa.h +14 -0
- data/ext/marisa/lib/marisa/agent.cc +51 -0
- data/ext/marisa/lib/marisa/agent.h +73 -0
- data/ext/marisa/lib/marisa/base.h +193 -0
- data/ext/marisa/lib/marisa/exception.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm.h +26 -0
- data/ext/marisa/lib/marisa/grimoire/algorithm/sort.h +196 -0
- data/ext/marisa/lib/marisa/grimoire/intrin.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/io.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.cc +163 -0
- data/ext/marisa/lib/marisa/grimoire/io/mapper.h +67 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.cc +147 -0
- data/ext/marisa/lib/marisa/grimoire/io/reader.h +66 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.cc +148 -0
- data/ext/marisa/lib/marisa/grimoire/io/writer.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie.h +16 -0
- data/ext/marisa/lib/marisa/grimoire/trie/cache.h +81 -0
- data/ext/marisa/lib/marisa/grimoire/trie/config.h +155 -0
- data/ext/marisa/lib/marisa/grimoire/trie/entry.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/trie/header.h +61 -0
- data/ext/marisa/lib/marisa/grimoire/trie/history.h +65 -0
- data/ext/marisa/lib/marisa/grimoire/trie/key.h +228 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.cc +876 -0
- data/ext/marisa/lib/marisa/grimoire/trie/louds-trie.h +134 -0
- data/ext/marisa/lib/marisa/grimoire/trie/range.h +115 -0
- data/ext/marisa/lib/marisa/grimoire/trie/state.h +117 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.cc +218 -0
- data/ext/marisa/lib/marisa/grimoire/trie/tail.h +72 -0
- data/ext/marisa/lib/marisa/grimoire/vector.h +18 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.cc +826 -0
- data/ext/marisa/lib/marisa/grimoire/vector/bit-vector.h +179 -0
- data/ext/marisa/lib/marisa/grimoire/vector/flat-vector.h +205 -0
- data/ext/marisa/lib/marisa/grimoire/vector/pop-count.h +110 -0
- data/ext/marisa/lib/marisa/grimoire/vector/rank-index.h +82 -0
- data/ext/marisa/lib/marisa/grimoire/vector/vector.h +256 -0
- data/ext/marisa/lib/marisa/iostream.h +18 -0
- data/ext/marisa/lib/marisa/key.h +85 -0
- data/ext/marisa/lib/marisa/keyset.cc +181 -0
- data/ext/marisa/lib/marisa/keyset.h +80 -0
- data/ext/marisa/lib/marisa/query.h +71 -0
- data/ext/marisa/lib/marisa/scoped-array.h +48 -0
- data/ext/marisa/lib/marisa/scoped-ptr.h +52 -0
- data/ext/marisa/lib/marisa/stdio.h +15 -0
- data/ext/marisa/lib/marisa/trie.cc +249 -0
- data/ext/marisa/lib/marisa/trie.h +64 -0
- data/ext/marisa/tests/base-test.cc +309 -0
- data/ext/marisa/tests/io-test.cc +252 -0
- data/ext/marisa/tests/marisa-assert.h +26 -0
- data/ext/marisa/tests/marisa-test.cc +388 -0
- data/ext/marisa/tests/trie-test.cc +507 -0
- data/ext/marisa/tests/vector-test.cc +466 -0
- data/ext/marisa/tools/cmdopt.cc +298 -0
- data/ext/marisa/tools/cmdopt.h +58 -0
- data/ext/marisa/tools/marisa-benchmark.cc +418 -0
- data/ext/marisa/tools/marisa-build.cc +206 -0
- data/ext/marisa/tools/marisa-common-prefix-search.cc +143 -0
- data/ext/marisa/tools/marisa-dump.cc +151 -0
- data/ext/marisa/tools/marisa-lookup.cc +110 -0
- data/ext/marisa/tools/marisa-predictive-search.cc +143 -0
- data/ext/marisa/tools/marisa-reverse-lookup.cc +110 -0
- data/lib/melisa.rb +7 -0
- data/lib/melisa/base_config_flags.rb +76 -0
- data/lib/melisa/bytes_trie.rb +55 -0
- data/lib/melisa/int_trie.rb +14 -0
- data/lib/melisa/search.rb +55 -0
- data/lib/melisa/trie.rb +96 -0
- data/lib/melisa/version.rb +3 -0
- data/melisa.gemspec +36 -0
- data/spec/base_config_flags_spec.rb +73 -0
- data/spec/bytes_trie_spec.rb +16 -0
- data/spec/int_trie_spec.rb +16 -0
- data/spec/search_spec.rb +29 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/trie_spec.rb +30 -0
- metadata +207 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_INTRIN_H_
|
|
2
|
+
#define MARISA_GRIMOIRE_INTRIN_H_
|
|
3
|
+
|
|
4
|
+
#include "marisa/base.h"
|
|
5
|
+
|
|
6
|
+
#if defined(__x86_64__) || defined(_M_X64)
|
|
7
|
+
#define MARISA_X64
|
|
8
|
+
#elif defined(__i386__) || defined(_M_IX86)
|
|
9
|
+
#define MARISA_X86
|
|
10
|
+
#else // defined(__i386__) || defined(_M_IX86)
|
|
11
|
+
#ifdef MARISA_USE_POPCNT
|
|
12
|
+
#undef MARISA_USE_POPCNT
|
|
13
|
+
#endif // MARISA_USE_POPCNT
|
|
14
|
+
#ifdef MARISA_USE_SSE4A
|
|
15
|
+
#undef MARISA_USE_SSE4A
|
|
16
|
+
#endif // MARISA_USE_SSE4A
|
|
17
|
+
#ifdef MARISA_USE_SSE4
|
|
18
|
+
#undef MARISA_USE_SSE4
|
|
19
|
+
#endif // MARISA_USE_SSE4
|
|
20
|
+
#ifdef MARISA_USE_SSE4_2
|
|
21
|
+
#undef MARISA_USE_SSE4_2
|
|
22
|
+
#endif // MARISA_USE_SSE4_2
|
|
23
|
+
#ifdef MARISA_USE_SSE4_1
|
|
24
|
+
#undef MARISA_USE_SSE4_1
|
|
25
|
+
#endif // MARISA_USE_SSE4_1
|
|
26
|
+
#ifdef MARISA_USE_SSSE3
|
|
27
|
+
#undef MARISA_USE_SSSE3
|
|
28
|
+
#endif // MARISA_USE_SSSE3
|
|
29
|
+
#ifdef MARISA_USE_SSE3
|
|
30
|
+
#undef MARISA_USE_SSE3
|
|
31
|
+
#endif // MARISA_USE_SSE3
|
|
32
|
+
#ifdef MARISA_USE_SSE2
|
|
33
|
+
#undef MARISA_USE_SSE2
|
|
34
|
+
#endif // MARISA_USE_SSE2
|
|
35
|
+
#endif // defined(__i386__) || defined(_M_IX86)
|
|
36
|
+
|
|
37
|
+
#ifdef MARISA_USE_POPCNT
|
|
38
|
+
#ifndef MARISA_USE_SSE3
|
|
39
|
+
#define MARISA_USE_SSE3
|
|
40
|
+
#endif // MARISA_USE_SSE3
|
|
41
|
+
#ifdef _MSC_VER
|
|
42
|
+
#include <intrin.h>
|
|
43
|
+
#else // _MSC_VER
|
|
44
|
+
#include <popcntintrin.h>
|
|
45
|
+
#endif // _MSC_VER
|
|
46
|
+
#endif // MARISA_USE_POPCNT
|
|
47
|
+
|
|
48
|
+
#ifdef MARISA_USE_SSE4A
|
|
49
|
+
#ifndef MARISA_USE_SSE3
|
|
50
|
+
#define MARISA_USE_SSE3
|
|
51
|
+
#endif // MARISA_USE_SSE3
|
|
52
|
+
#ifndef MARISA_USE_POPCNT
|
|
53
|
+
#define MARISA_USE_POPCNT
|
|
54
|
+
#endif // MARISA_USE_POPCNT
|
|
55
|
+
#endif // MARISA_USE_SSE4A
|
|
56
|
+
|
|
57
|
+
#ifdef MARISA_USE_SSE4
|
|
58
|
+
#ifndef MARISA_USE_SSE4_2
|
|
59
|
+
#define MARISA_USE_SSE4_2
|
|
60
|
+
#endif // MARISA_USE_SSE4_2
|
|
61
|
+
#endif // MARISA_USE_SSE4
|
|
62
|
+
|
|
63
|
+
#ifdef MARISA_USE_SSE4_2
|
|
64
|
+
#ifndef MARISA_USE_SSE4_1
|
|
65
|
+
#define MARISA_USE_SSE4_1
|
|
66
|
+
#endif // MARISA_USE_SSE4_1
|
|
67
|
+
#ifndef MARISA_USE_POPCNT
|
|
68
|
+
#define MARISA_USE_POPCNT
|
|
69
|
+
#endif // MARISA_USE_POPCNT
|
|
70
|
+
#endif // MARISA_USE_SSE4_2
|
|
71
|
+
|
|
72
|
+
#ifdef MARISA_USE_SSE4_1
|
|
73
|
+
#ifndef MARISA_USE_SSSE3
|
|
74
|
+
#define MARISA_USE_SSSE3
|
|
75
|
+
#endif // MARISA_USE_SSSE3
|
|
76
|
+
#endif // MARISA_USE_SSE4_1
|
|
77
|
+
|
|
78
|
+
#ifdef MARISA_USE_SSSE3
|
|
79
|
+
#ifndef MARISA_USE_SSE3
|
|
80
|
+
#define MARISA_USE_SSE3
|
|
81
|
+
#endif // MARISA_USE_SSE3
|
|
82
|
+
#ifdef MARISA_X64
|
|
83
|
+
#define MARISA_X64_SSSE3
|
|
84
|
+
#else // MARISA_X64
|
|
85
|
+
#define MARISA_X86_SSSE3
|
|
86
|
+
#endif // MAIRSA_X64
|
|
87
|
+
#include <tmmintrin.h>
|
|
88
|
+
#endif // MARISA_USE_SSSE3
|
|
89
|
+
|
|
90
|
+
#ifdef MARISA_USE_SSE3
|
|
91
|
+
#ifndef MARISA_USE_SSE2
|
|
92
|
+
#define MARISA_USE_SSE2
|
|
93
|
+
#endif // MARISA_USE_SSE2
|
|
94
|
+
#endif // MARISA_USE_SSE3
|
|
95
|
+
|
|
96
|
+
#ifdef MARISA_USE_SSE2
|
|
97
|
+
#ifdef MARISA_X64
|
|
98
|
+
#define MARISA_X64_SSE2
|
|
99
|
+
#else // MARISA_X64
|
|
100
|
+
#define MARISA_X86_SSE2
|
|
101
|
+
#endif // MAIRSA_X64
|
|
102
|
+
#include <emmintrin.h>
|
|
103
|
+
#endif // MARISA_USE_SSE2
|
|
104
|
+
|
|
105
|
+
#ifdef _MSC_VER
|
|
106
|
+
#if MARISA_WORD_SIZE == 64
|
|
107
|
+
#include <intrin.h>
|
|
108
|
+
#pragma intrinsic(_BitScanForward64)
|
|
109
|
+
#else // MARISA_WORD_SIZE == 64
|
|
110
|
+
#include <intrin.h>
|
|
111
|
+
#pragma intrinsic(_BitScanForward)
|
|
112
|
+
#endif // MARISA_WORD_SIZE == 64
|
|
113
|
+
#endif // _MSC_VER
|
|
114
|
+
|
|
115
|
+
#endif // MARISA_GRIMOIRE_INTRIN_H_
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_IO_H_
|
|
2
|
+
#define MARISA_GRIMOIRE_IO_H_
|
|
3
|
+
|
|
4
|
+
#include "marisa/grimoire/io/mapper.h"
|
|
5
|
+
#include "marisa/grimoire/io/reader.h"
|
|
6
|
+
#include "marisa/grimoire/io/writer.h"
|
|
7
|
+
|
|
8
|
+
namespace marisa {
|
|
9
|
+
namespace grimoire {
|
|
10
|
+
|
|
11
|
+
using io::Mapper;
|
|
12
|
+
using io::Reader;
|
|
13
|
+
using io::Writer;
|
|
14
|
+
|
|
15
|
+
} // namespace grimoire
|
|
16
|
+
} // namespace marisa
|
|
17
|
+
|
|
18
|
+
#endif // MARISA_GRIMOIRE_IO_H_
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
#if (defined _WIN32) || (defined _WIN64)
|
|
2
|
+
#include <sys/types.h>
|
|
3
|
+
#include <sys/stat.h>
|
|
4
|
+
#include <windows.h>
|
|
5
|
+
#else // (defined _WIN32) || (defined _WIN64)
|
|
6
|
+
#include <sys/mman.h>
|
|
7
|
+
#include <sys/stat.h>
|
|
8
|
+
#include <sys/types.h>
|
|
9
|
+
#include <fcntl.h>
|
|
10
|
+
#include <unistd.h>
|
|
11
|
+
#endif // (defined _WIN32) || (defined _WIN64)
|
|
12
|
+
|
|
13
|
+
#include "marisa/grimoire/io/mapper.h"
|
|
14
|
+
|
|
15
|
+
namespace marisa {
|
|
16
|
+
namespace grimoire {
|
|
17
|
+
namespace io {
|
|
18
|
+
|
|
19
|
+
#if (defined _WIN32) || (defined _WIN64)
|
|
20
|
+
Mapper::Mapper()
|
|
21
|
+
: ptr_(NULL), origin_(NULL), avail_(0), size_(0),
|
|
22
|
+
file_(NULL), map_(NULL) {}
|
|
23
|
+
#else // (defined _WIN32) || (defined _WIN64)
|
|
24
|
+
Mapper::Mapper()
|
|
25
|
+
: ptr_(NULL), origin_(MAP_FAILED), avail_(0), size_(0), fd_(-1) {}
|
|
26
|
+
#endif // (defined _WIN32) || (defined _WIN64)
|
|
27
|
+
|
|
28
|
+
#if (defined _WIN32) || (defined _WIN64)
|
|
29
|
+
Mapper::~Mapper() {
|
|
30
|
+
if (origin_ != NULL) {
|
|
31
|
+
::UnmapViewOfFile(origin_);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (map_ != NULL) {
|
|
35
|
+
::CloseHandle(map_);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (file_ != NULL) {
|
|
39
|
+
::CloseHandle(file_);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
#else // (defined _WIN32) || (defined _WIN64)
|
|
43
|
+
Mapper::~Mapper() {
|
|
44
|
+
if (origin_ != MAP_FAILED) {
|
|
45
|
+
::munmap(origin_, size_);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (fd_ != -1) {
|
|
49
|
+
::close(fd_);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
#endif // (defined _WIN32) || (defined _WIN64)
|
|
53
|
+
|
|
54
|
+
void Mapper::open(const char *filename) {
|
|
55
|
+
MARISA_THROW_IF(filename == NULL, MARISA_NULL_ERROR);
|
|
56
|
+
|
|
57
|
+
Mapper temp;
|
|
58
|
+
temp.open_(filename);
|
|
59
|
+
swap(temp);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
void Mapper::open(const void *ptr, std::size_t size) {
|
|
63
|
+
MARISA_THROW_IF((ptr == NULL) && (size != 0), MARISA_NULL_ERROR);
|
|
64
|
+
|
|
65
|
+
Mapper temp;
|
|
66
|
+
temp.open_(ptr, size);
|
|
67
|
+
swap(temp);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
void Mapper::seek(std::size_t size) {
|
|
71
|
+
MARISA_THROW_IF(!is_open(), MARISA_STATE_ERROR);
|
|
72
|
+
MARISA_THROW_IF(size > avail_, MARISA_IO_ERROR);
|
|
73
|
+
|
|
74
|
+
map_data(size);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
bool Mapper::is_open() const {
|
|
78
|
+
return ptr_ != NULL;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
void Mapper::clear() {
|
|
82
|
+
Mapper().swap(*this);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
void Mapper::swap(Mapper &rhs) {
|
|
86
|
+
marisa::swap(ptr_, rhs.ptr_);
|
|
87
|
+
marisa::swap(avail_, rhs.avail_);
|
|
88
|
+
marisa::swap(origin_, rhs.origin_);
|
|
89
|
+
marisa::swap(size_, rhs.size_);
|
|
90
|
+
#if (defined _WIN32) || (defined _WIN64)
|
|
91
|
+
marisa::swap(file_, rhs.file_);
|
|
92
|
+
marisa::swap(map_, rhs.map_);
|
|
93
|
+
#else // (defined _WIN32) || (defined _WIN64)
|
|
94
|
+
marisa::swap(fd_, rhs.fd_);
|
|
95
|
+
#endif // (defined _WIN32) || (defined _WIN64)
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const void *Mapper::map_data(std::size_t size) {
|
|
99
|
+
MARISA_THROW_IF(!is_open(), MARISA_STATE_ERROR);
|
|
100
|
+
MARISA_THROW_IF(size > avail_, MARISA_IO_ERROR);
|
|
101
|
+
|
|
102
|
+
const char * const data = static_cast<const char *>(ptr_);
|
|
103
|
+
ptr_ = data + size;
|
|
104
|
+
avail_ -= size;
|
|
105
|
+
return data;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
#if (defined _WIN32) || (defined _WIN64)
|
|
109
|
+
#ifdef __MSVCRT_VERSION__
|
|
110
|
+
#if __MSVCRT_VERSION__ >= 0x0601
|
|
111
|
+
#define MARISA_HAS_STAT64
|
|
112
|
+
#endif // __MSVCRT_VERSION__ >= 0x0601
|
|
113
|
+
#endif // __MSVCRT_VERSION__
|
|
114
|
+
void Mapper::open_(const char *filename) {
|
|
115
|
+
#ifdef MARISA_HAS_STAT64
|
|
116
|
+
struct __stat64 st;
|
|
117
|
+
MARISA_THROW_IF(::_stat64(filename, &st) != 0, MARISA_IO_ERROR);
|
|
118
|
+
#else // MARISA_HAS_STAT64
|
|
119
|
+
struct _stat st;
|
|
120
|
+
MARISA_THROW_IF(::_stat(filename, &st) != 0, MARISA_IO_ERROR);
|
|
121
|
+
#endif // MARISA_HAS_STAT64
|
|
122
|
+
MARISA_THROW_IF((UInt64)st.st_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
|
|
123
|
+
size_ = (std::size_t)st.st_size;
|
|
124
|
+
|
|
125
|
+
file_ = ::CreateFileA(filename, GENERIC_READ, FILE_SHARE_READ,
|
|
126
|
+
NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
|
127
|
+
MARISA_THROW_IF(file_ == INVALID_HANDLE_VALUE, MARISA_IO_ERROR);
|
|
128
|
+
|
|
129
|
+
map_ = ::CreateFileMapping(file_, NULL, PAGE_READONLY, 0, 0, NULL);
|
|
130
|
+
MARISA_THROW_IF(map_ == NULL, MARISA_IO_ERROR);
|
|
131
|
+
|
|
132
|
+
origin_ = ::MapViewOfFile(map_, FILE_MAP_READ, 0, 0, 0);
|
|
133
|
+
MARISA_THROW_IF(origin_ == NULL, MARISA_IO_ERROR);
|
|
134
|
+
|
|
135
|
+
ptr_ = static_cast<const char *>(origin_);
|
|
136
|
+
avail_ = size_;
|
|
137
|
+
}
|
|
138
|
+
#else // (defined _WIN32) || (defined _WIN64)
|
|
139
|
+
void Mapper::open_(const char *filename) {
|
|
140
|
+
struct stat st;
|
|
141
|
+
MARISA_THROW_IF(::stat(filename, &st) != 0, MARISA_IO_ERROR);
|
|
142
|
+
MARISA_THROW_IF((UInt64)st.st_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR);
|
|
143
|
+
size_ = (std::size_t)st.st_size;
|
|
144
|
+
|
|
145
|
+
fd_ = ::open(filename, O_RDONLY);
|
|
146
|
+
MARISA_THROW_IF(fd_ == -1, MARISA_IO_ERROR);
|
|
147
|
+
|
|
148
|
+
origin_ = ::mmap(NULL, size_, PROT_READ, MAP_SHARED, fd_, 0);
|
|
149
|
+
MARISA_THROW_IF(origin_ == MAP_FAILED, MARISA_IO_ERROR);
|
|
150
|
+
|
|
151
|
+
ptr_ = static_cast<const char *>(origin_);
|
|
152
|
+
avail_ = size_;
|
|
153
|
+
}
|
|
154
|
+
#endif // (defined _WIN32) || (defined _WIN64)
|
|
155
|
+
|
|
156
|
+
void Mapper::open_(const void *ptr, std::size_t size) {
|
|
157
|
+
ptr_ = ptr;
|
|
158
|
+
avail_ = size;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
} // namespace io
|
|
162
|
+
} // namespace grimoire
|
|
163
|
+
} // namespace marisa
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#ifndef MARISA_GRIMOIRE_IO_MAPPER_H_
|
|
2
|
+
#define MARISA_GRIMOIRE_IO_MAPPER_H_
|
|
3
|
+
|
|
4
|
+
#include <cstdio>
|
|
5
|
+
|
|
6
|
+
#include "marisa/base.h"
|
|
7
|
+
|
|
8
|
+
namespace marisa {
|
|
9
|
+
namespace grimoire {
|
|
10
|
+
namespace io {
|
|
11
|
+
|
|
12
|
+
class Mapper {
|
|
13
|
+
public:
|
|
14
|
+
Mapper();
|
|
15
|
+
~Mapper();
|
|
16
|
+
|
|
17
|
+
void open(const char *filename);
|
|
18
|
+
void open(const void *ptr, std::size_t size);
|
|
19
|
+
|
|
20
|
+
template <typename T>
|
|
21
|
+
void map(T *obj) {
|
|
22
|
+
MARISA_THROW_IF(obj == NULL, MARISA_NULL_ERROR);
|
|
23
|
+
*obj = *static_cast<const T *>(map_data(sizeof(T)));
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
template <typename T>
|
|
27
|
+
void map(const T **objs, std::size_t num_objs) {
|
|
28
|
+
MARISA_THROW_IF((objs == NULL) && (num_objs != 0), MARISA_NULL_ERROR);
|
|
29
|
+
MARISA_THROW_IF(num_objs > (MARISA_SIZE_MAX / sizeof(T)),
|
|
30
|
+
MARISA_SIZE_ERROR);
|
|
31
|
+
*objs = static_cast<const T *>(map_data(sizeof(T) * num_objs));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
void seek(std::size_t size);
|
|
35
|
+
|
|
36
|
+
bool is_open() const;
|
|
37
|
+
|
|
38
|
+
void clear();
|
|
39
|
+
void swap(Mapper &rhs);
|
|
40
|
+
|
|
41
|
+
private:
|
|
42
|
+
const void *ptr_;
|
|
43
|
+
void *origin_;
|
|
44
|
+
std::size_t avail_;
|
|
45
|
+
std::size_t size_;
|
|
46
|
+
#if (defined _WIN32) || (defined _WIN64)
|
|
47
|
+
void *file_;
|
|
48
|
+
void *map_;
|
|
49
|
+
#else // (defined _WIN32) || (defined _WIN64)
|
|
50
|
+
int fd_;
|
|
51
|
+
#endif // (defined _WIN32) || (defined _WIN64)
|
|
52
|
+
|
|
53
|
+
void open_(const char *filename);
|
|
54
|
+
void open_(const void *ptr, std::size_t size);
|
|
55
|
+
|
|
56
|
+
const void *map_data(std::size_t size);
|
|
57
|
+
|
|
58
|
+
// Disallows copy and assignment.
|
|
59
|
+
Mapper(const Mapper &);
|
|
60
|
+
Mapper &operator=(const Mapper &);
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
} // namespace io
|
|
64
|
+
} // namespace grimoire
|
|
65
|
+
} // namespace marisa
|
|
66
|
+
|
|
67
|
+
#endif // MARISA_GRIMOIRE_IO_MAPPER_H_
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
|
|
3
|
+
#ifdef _WIN32
|
|
4
|
+
#include <io.h>
|
|
5
|
+
#else // _WIN32
|
|
6
|
+
#include <unistd.h>
|
|
7
|
+
#endif // _WIN32
|
|
8
|
+
|
|
9
|
+
#include <limits>
|
|
10
|
+
|
|
11
|
+
#include "marisa/grimoire/io/reader.h"
|
|
12
|
+
|
|
13
|
+
namespace marisa {
|
|
14
|
+
namespace grimoire {
|
|
15
|
+
namespace io {
|
|
16
|
+
|
|
17
|
+
Reader::Reader()
|
|
18
|
+
: file_(NULL), fd_(-1), stream_(NULL), needs_fclose_(false) {}
|
|
19
|
+
|
|
20
|
+
Reader::~Reader() {
|
|
21
|
+
if (needs_fclose_) {
|
|
22
|
+
::fclose(file_);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
void Reader::open(const char *filename) {
|
|
27
|
+
MARISA_THROW_IF(filename == NULL, MARISA_NULL_ERROR);
|
|
28
|
+
|
|
29
|
+
Reader temp;
|
|
30
|
+
temp.open_(filename);
|
|
31
|
+
swap(temp);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
void Reader::open(std::FILE *file) {
|
|
35
|
+
MARISA_THROW_IF(file == NULL, MARISA_NULL_ERROR);
|
|
36
|
+
|
|
37
|
+
Reader temp;
|
|
38
|
+
temp.open_(file);
|
|
39
|
+
swap(temp);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
void Reader::open(int fd) {
|
|
43
|
+
MARISA_THROW_IF(fd == -1, MARISA_CODE_ERROR);
|
|
44
|
+
|
|
45
|
+
Reader temp;
|
|
46
|
+
temp.open_(fd);
|
|
47
|
+
swap(temp);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
void Reader::open(std::istream &stream) {
|
|
51
|
+
Reader temp;
|
|
52
|
+
temp.open_(stream);
|
|
53
|
+
swap(temp);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
void Reader::clear() {
|
|
57
|
+
Reader().swap(*this);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
void Reader::swap(Reader &rhs) {
|
|
61
|
+
marisa::swap(file_, rhs.file_);
|
|
62
|
+
marisa::swap(fd_, rhs.fd_);
|
|
63
|
+
marisa::swap(stream_, rhs.stream_);
|
|
64
|
+
marisa::swap(needs_fclose_, rhs.needs_fclose_);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
void Reader::seek(std::size_t size) {
|
|
68
|
+
MARISA_THROW_IF(!is_open(), MARISA_STATE_ERROR);
|
|
69
|
+
if (size == 0) {
|
|
70
|
+
return;
|
|
71
|
+
} else if (size <= 16) {
|
|
72
|
+
char buf[16];
|
|
73
|
+
read_data(buf, size);
|
|
74
|
+
} else {
|
|
75
|
+
char buf[1024];
|
|
76
|
+
while (size != 0) {
|
|
77
|
+
const std::size_t count = (size < sizeof(buf)) ? size : sizeof(buf);
|
|
78
|
+
read_data(buf, count);
|
|
79
|
+
size -= count;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
bool Reader::is_open() const {
|
|
85
|
+
return (file_ != NULL) || (fd_ != -1) || (stream_ != NULL);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
void Reader::open_(const char *filename) {
|
|
89
|
+
std::FILE *file = NULL;
|
|
90
|
+
#ifdef _MSC_VER
|
|
91
|
+
MARISA_THROW_IF(::fopen_s(&file, filename, "rb") != 0, MARISA_IO_ERROR);
|
|
92
|
+
#else // _MSC_VER
|
|
93
|
+
file = ::fopen(filename, "rb");
|
|
94
|
+
MARISA_THROW_IF(file == NULL, MARISA_IO_ERROR);
|
|
95
|
+
#endif // _MSC_VER
|
|
96
|
+
file_ = file;
|
|
97
|
+
needs_fclose_ = true;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
void Reader::open_(std::FILE *file) {
|
|
101
|
+
file_ = file;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
void Reader::open_(int fd) {
|
|
105
|
+
fd_ = fd;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
void Reader::open_(std::istream &stream) {
|
|
109
|
+
stream_ = &stream;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
void Reader::read_data(void *buf, std::size_t size) {
|
|
113
|
+
MARISA_THROW_IF(!is_open(), MARISA_STATE_ERROR);
|
|
114
|
+
if (size == 0) {
|
|
115
|
+
return;
|
|
116
|
+
} else if (fd_ != -1) {
|
|
117
|
+
while (size != 0) {
|
|
118
|
+
#ifdef _WIN32
|
|
119
|
+
static const std::size_t CHUNK_SIZE =
|
|
120
|
+
std::numeric_limits<int>::max();
|
|
121
|
+
const unsigned int count = (size < CHUNK_SIZE) ? size : CHUNK_SIZE;
|
|
122
|
+
const int size_read = ::_read(fd_, buf, count);
|
|
123
|
+
#else // _WIN32
|
|
124
|
+
static const std::size_t CHUNK_SIZE =
|
|
125
|
+
std::numeric_limits< ::ssize_t>::max();
|
|
126
|
+
const ::size_t count = (size < CHUNK_SIZE) ? size : CHUNK_SIZE;
|
|
127
|
+
const ::ssize_t size_read = ::read(fd_, buf, count);
|
|
128
|
+
#endif // _WIN32
|
|
129
|
+
MARISA_THROW_IF(size_read <= 0, MARISA_IO_ERROR);
|
|
130
|
+
buf = static_cast<char *>(buf) + size_read;
|
|
131
|
+
size -= size_read;
|
|
132
|
+
}
|
|
133
|
+
} else if (file_ != NULL) {
|
|
134
|
+
MARISA_THROW_IF(::fread(buf, 1, size, file_) != size, MARISA_IO_ERROR);
|
|
135
|
+
} else if (stream_ != NULL) {
|
|
136
|
+
try {
|
|
137
|
+
MARISA_THROW_IF(!stream_->read(static_cast<char *>(buf), size),
|
|
138
|
+
MARISA_IO_ERROR);
|
|
139
|
+
} catch (const std::ios_base::failure &) {
|
|
140
|
+
MARISA_THROW(MARISA_IO_ERROR, "std::ios_base::failure");
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
} // namespace io
|
|
146
|
+
} // namespace grimoire
|
|
147
|
+
} // namespace marisa
|