tomoto 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +1 -1
- data/ext/tomoto/extconf.rb +4 -2
- data/lib/tomoto/version.rb +1 -1
- data/vendor/tomotopy/README.kr.rst +10 -1
- data/vendor/tomotopy/README.rst +10 -1
- data/vendor/tomotopy/src/TopicModel/CT.h +2 -2
- data/vendor/tomotopy/src/TopicModel/CTModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/DMR.h +2 -2
- data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/DT.h +2 -2
- data/vendor/tomotopy/src/TopicModel/DTModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +3 -0
- data/vendor/tomotopy/src/TopicModel/GDMR.h +2 -2
- data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/HDP.h +2 -2
- data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +2 -0
- data/vendor/tomotopy/src/TopicModel/HLDA.h +2 -2
- data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +9 -0
- data/vendor/tomotopy/src/TopicModel/HPA.h +2 -2
- data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +2 -0
- data/vendor/tomotopy/src/TopicModel/LDA.h +8 -2
- data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +8 -0
- data/vendor/tomotopy/src/TopicModel/LLDA.h +2 -2
- data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/MGLDA.h +2 -2
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/PA.h +2 -2
- data/vendor/tomotopy/src/TopicModel/PAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +2 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -3
- data/vendor/tomotopy/src/TopicModel/PTModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/SLDA.h +3 -2
- data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +77 -3
- data/vendor/tomotopy/src/Utils/Dictionary.cpp +102 -0
- data/vendor/tomotopy/src/Utils/Dictionary.h +26 -75
- data/vendor/tomotopy/src/Utils/Mmap.cpp +146 -0
- data/vendor/tomotopy/src/Utils/Mmap.h +139 -0
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -0
- data/vendor/tomotopy/src/Utils/SharedString.cpp +134 -0
- data/vendor/tomotopy/src/Utils/SharedString.h +104 -0
- data/vendor/tomotopy/src/Utils/serializer.cpp +166 -0
- data/vendor/tomotopy/src/Utils/serializer.hpp +261 -85
- metadata +9 -4
- data/vendor/tomotopy/src/Utils/SharedString.hpp +0 -206
@@ -0,0 +1,134 @@
|
|
1
|
+
#include "SharedString.h"
|
2
|
+
|
3
|
+
namespace tomoto
|
4
|
+
{
|
5
|
+
void SharedString::incref()
|
6
|
+
{
|
7
|
+
if (ptr)
|
8
|
+
{
|
9
|
+
++*(size_t*)ptr;
|
10
|
+
}
|
11
|
+
}
|
12
|
+
|
13
|
+
void SharedString::decref()
|
14
|
+
{
|
15
|
+
if (ptr)
|
16
|
+
{
|
17
|
+
if (--*(size_t*)ptr == 0)
|
18
|
+
{
|
19
|
+
delete[] ptr;
|
20
|
+
ptr = nullptr;
|
21
|
+
}
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
void SharedString::init(const char* _begin, const char* _end)
|
26
|
+
{
|
27
|
+
ptr = new char[_end - _begin + 9];
|
28
|
+
*(size_t*)ptr = 1;
|
29
|
+
len = _end - _begin;
|
30
|
+
std::memcpy((void*)(ptr + 8), _begin, _end - _begin);
|
31
|
+
((char*)ptr)[_end - _begin + 8] = 0;
|
32
|
+
}
|
33
|
+
|
34
|
+
SharedString::SharedString()
|
35
|
+
{
|
36
|
+
}
|
37
|
+
|
38
|
+
SharedString::SharedString(const char* _begin, const char* _end)
|
39
|
+
{
|
40
|
+
init(_begin, _end);
|
41
|
+
}
|
42
|
+
|
43
|
+
SharedString::SharedString(const char* _ptr)
|
44
|
+
{
|
45
|
+
if (_ptr)
|
46
|
+
{
|
47
|
+
init(_ptr, _ptr + std::strlen(_ptr));
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
SharedString::SharedString(const std::string& str)
|
52
|
+
{
|
53
|
+
if (!str.empty())
|
54
|
+
{
|
55
|
+
init(str.data(), str.data() + str.size());
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
SharedString::SharedString(const SharedString& o) noexcept
|
60
|
+
: ptr{ o.ptr }, len{ o.len }
|
61
|
+
{
|
62
|
+
incref();
|
63
|
+
}
|
64
|
+
|
65
|
+
SharedString::SharedString(SharedString&& o) noexcept
|
66
|
+
{
|
67
|
+
std::swap(ptr, o.ptr);
|
68
|
+
std::swap(len, o.len);
|
69
|
+
}
|
70
|
+
|
71
|
+
SharedString::~SharedString()
|
72
|
+
{
|
73
|
+
decref();
|
74
|
+
}
|
75
|
+
|
76
|
+
SharedString& SharedString::operator=(const SharedString& o)
|
77
|
+
{
|
78
|
+
if (this != &o)
|
79
|
+
{
|
80
|
+
decref();
|
81
|
+
ptr = o.ptr;
|
82
|
+
len = o.len;
|
83
|
+
incref();
|
84
|
+
}
|
85
|
+
return *this;
|
86
|
+
}
|
87
|
+
|
88
|
+
SharedString& SharedString::operator=(SharedString&& o) noexcept
|
89
|
+
{
|
90
|
+
std::swap(ptr, o.ptr);
|
91
|
+
std::swap(len, o.len);
|
92
|
+
return *this;
|
93
|
+
}
|
94
|
+
|
95
|
+
SharedString::operator std::string() const
|
96
|
+
{
|
97
|
+
if (!ptr) return {};
|
98
|
+
return { ptr + 8, ptr + 8 + len };
|
99
|
+
}
|
100
|
+
|
101
|
+
const char* SharedString::c_str() const
|
102
|
+
{
|
103
|
+
if (!ptr) return "";
|
104
|
+
return ptr + 8;
|
105
|
+
}
|
106
|
+
|
107
|
+
std::string SharedString::substr(size_t start, size_t len) const
|
108
|
+
{
|
109
|
+
return { c_str() + start, c_str() + start + len };
|
110
|
+
}
|
111
|
+
|
112
|
+
bool SharedString::operator==(const SharedString& o) const
|
113
|
+
{
|
114
|
+
if (ptr == o.ptr) return true;
|
115
|
+
if (size() != o.size()) return false;
|
116
|
+
return std::equal(begin(), end(), o.begin());
|
117
|
+
}
|
118
|
+
|
119
|
+
bool SharedString::operator==(const std::string& o) const
|
120
|
+
{
|
121
|
+
if (size() != o.size()) return false;
|
122
|
+
return std::equal(begin(), end(), o.begin());
|
123
|
+
}
|
124
|
+
|
125
|
+
bool SharedString::operator!=(const SharedString& o) const
|
126
|
+
{
|
127
|
+
return !operator==(o);
|
128
|
+
}
|
129
|
+
|
130
|
+
bool SharedString::operator!=(const std::string& o) const
|
131
|
+
{
|
132
|
+
return !operator==(o);
|
133
|
+
}
|
134
|
+
}
|
@@ -0,0 +1,104 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <string>
|
4
|
+
#include "serializer.hpp"
|
5
|
+
|
6
|
+
namespace tomoto
|
7
|
+
{
|
8
|
+
class SharedString
|
9
|
+
{
|
10
|
+
const char* ptr = nullptr;
|
11
|
+
size_t len = 0;
|
12
|
+
|
13
|
+
void incref();
|
14
|
+
|
15
|
+
void decref();
|
16
|
+
|
17
|
+
void init(const char* _begin, const char* _end);
|
18
|
+
|
19
|
+
public:
|
20
|
+
|
21
|
+
SharedString();
|
22
|
+
explicit SharedString(const char* _begin, const char* _end);
|
23
|
+
explicit SharedString(const char* _ptr);
|
24
|
+
explicit SharedString(const std::string& str);
|
25
|
+
SharedString(const SharedString& o) noexcept;
|
26
|
+
SharedString(SharedString&& o) noexcept;
|
27
|
+
~SharedString();
|
28
|
+
SharedString& operator=(const SharedString& o);
|
29
|
+
SharedString& operator=(SharedString&& o) noexcept;
|
30
|
+
|
31
|
+
size_t size() const
|
32
|
+
{
|
33
|
+
if (ptr) return len;
|
34
|
+
return 0;
|
35
|
+
}
|
36
|
+
|
37
|
+
bool empty() const
|
38
|
+
{
|
39
|
+
return size() == 0;
|
40
|
+
}
|
41
|
+
|
42
|
+
operator std::string() const;
|
43
|
+
|
44
|
+
const char* c_str() const;
|
45
|
+
|
46
|
+
const char* data() const
|
47
|
+
{
|
48
|
+
return c_str();
|
49
|
+
}
|
50
|
+
|
51
|
+
const char* begin() const
|
52
|
+
{
|
53
|
+
return data();
|
54
|
+
}
|
55
|
+
|
56
|
+
const char* end() const
|
57
|
+
{
|
58
|
+
return data() + size();
|
59
|
+
}
|
60
|
+
|
61
|
+
std::string substr(size_t start, size_t len) const;
|
62
|
+
|
63
|
+
bool operator==(const SharedString& o) const;
|
64
|
+
bool operator==(const std::string& o) const;
|
65
|
+
|
66
|
+
bool operator!=(const SharedString& o) const;
|
67
|
+
bool operator!=(const std::string& o) const;
|
68
|
+
};
|
69
|
+
|
70
|
+
namespace serializer
|
71
|
+
{
|
72
|
+
template<>
|
73
|
+
struct Serializer<SharedString>
|
74
|
+
{
|
75
|
+
using VTy = SharedString;
|
76
|
+
void write(std::ostream& ostr, const VTy& v)
|
77
|
+
{
|
78
|
+
writeToStream(ostr, (uint32_t)v.size());
|
79
|
+
if (!ostr.write((const char*)v.data(), v.size()))
|
80
|
+
throw std::ios_base::failure(std::string("writing type 'SharedString' is failed"));
|
81
|
+
}
|
82
|
+
|
83
|
+
void read(std::istream& istr, VTy& v)
|
84
|
+
{
|
85
|
+
auto size = readFromStream<uint32_t>(istr);
|
86
|
+
std::vector<char> t(size);
|
87
|
+
if (!istr.read((char*)t.data(), t.size()))
|
88
|
+
throw std::ios_base::failure(std::string("reading type 'SharedString' is failed"));
|
89
|
+
v = SharedString{ t.data(), t.data() + t.size() };
|
90
|
+
}
|
91
|
+
};
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
namespace std
|
96
|
+
{
|
97
|
+
template <> struct hash<tomoto::SharedString>
|
98
|
+
{
|
99
|
+
size_t operator()(const tomoto::SharedString& x) const
|
100
|
+
{
|
101
|
+
return hash<string>{}(x);
|
102
|
+
}
|
103
|
+
};
|
104
|
+
}
|
@@ -0,0 +1,166 @@
|
|
1
|
+
#include "serializer.hpp"
|
2
|
+
|
3
|
+
namespace tomoto
|
4
|
+
{
|
5
|
+
namespace serializer
|
6
|
+
{
|
7
|
+
membuf::membuf(bool read, bool write, char* base, std::ptrdiff_t n)
|
8
|
+
{
|
9
|
+
if (read)
|
10
|
+
{
|
11
|
+
this->setg(base, base, base + n);
|
12
|
+
}
|
13
|
+
|
14
|
+
if (write)
|
15
|
+
{
|
16
|
+
this->setp(base, base + n);
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
membuf::~membuf() = default;
|
21
|
+
|
22
|
+
std::streampos membuf::seekpos(pos_type sp, std::ios_base::openmode which)
|
23
|
+
{
|
24
|
+
return seekoff(sp - pos_type(off_type(0)), std::ios_base::beg, which);
|
25
|
+
}
|
26
|
+
|
27
|
+
std::streampos membuf::seekoff(off_type off,
|
28
|
+
std::ios_base::seekdir dir,
|
29
|
+
std::ios_base::openmode which
|
30
|
+
)
|
31
|
+
{
|
32
|
+
if (which & std::ios_base::in)
|
33
|
+
{
|
34
|
+
if (dir == std::ios_base::cur)
|
35
|
+
gbump(off);
|
36
|
+
else if (dir == std::ios_base::end)
|
37
|
+
setg(eback(), egptr() + off, egptr());
|
38
|
+
else if (dir == std::ios_base::beg)
|
39
|
+
setg(eback(), eback() + off, egptr());
|
40
|
+
}
|
41
|
+
if (which & std::ios_base::out)
|
42
|
+
{
|
43
|
+
if (dir == std::ios_base::cur)
|
44
|
+
pbump(off);
|
45
|
+
else if (dir == std::ios_base::end)
|
46
|
+
setp(epptr() + off, epptr());
|
47
|
+
else if (dir == std::ios_base::beg)
|
48
|
+
setp(pbase() + off, epptr());
|
49
|
+
|
50
|
+
if (!(which & std::ios_base::in))
|
51
|
+
{
|
52
|
+
return pptr() - pbase();
|
53
|
+
}
|
54
|
+
}
|
55
|
+
return gptr() - eback();
|
56
|
+
}
|
57
|
+
|
58
|
+
imstream::imstream(const char* base, std::ptrdiff_t n)
|
59
|
+
: std::istream(&buf), buf(true, false, (char*)base, n)
|
60
|
+
{
|
61
|
+
}
|
62
|
+
|
63
|
+
imstream::~imstream() = default;
|
64
|
+
|
65
|
+
omstream::omstream(char* base, std::ptrdiff_t n)
|
66
|
+
: std::ostream(&buf), buf(false, true, (char*)base, n)
|
67
|
+
{
|
68
|
+
}
|
69
|
+
|
70
|
+
omstream::~omstream() = default;
|
71
|
+
|
72
|
+
|
73
|
+
BlockStreamBuffer::BlockStreamBuffer(size_t _block_size) : block_size{ _block_size }
|
74
|
+
{
|
75
|
+
buffers.emplace_back(std::make_unique<uint8_t[]>(block_size));
|
76
|
+
this->setp((char*)buffers.back().get(), (char*)buffers.back().get() + block_size);
|
77
|
+
}
|
78
|
+
|
79
|
+
BlockStreamBuffer::~BlockStreamBuffer() = default;
|
80
|
+
|
81
|
+
int BlockStreamBuffer::overflow(int c)
|
82
|
+
{
|
83
|
+
if (this->pptr() == this->epptr())
|
84
|
+
{
|
85
|
+
buffers.emplace_back(std::make_unique<uint8_t[]>(block_size));
|
86
|
+
this->setp((char*)buffers.back().get(), (char*)buffers.back().get() + block_size);
|
87
|
+
}
|
88
|
+
else
|
89
|
+
{
|
90
|
+
*(this->pptr()) = c;
|
91
|
+
this->pbump(1);
|
92
|
+
}
|
93
|
+
return c;
|
94
|
+
}
|
95
|
+
|
96
|
+
std::streamsize BlockStreamBuffer::xsputn(const char* s, std::streamsize n)
|
97
|
+
{
|
98
|
+
auto rest = n;
|
99
|
+
auto buf_remain = this->epptr() - this->pptr();
|
100
|
+
while (rest > buf_remain)
|
101
|
+
{
|
102
|
+
std::copy(s, s + buf_remain, this->pptr());
|
103
|
+
this->pbump(buf_remain);
|
104
|
+
buffers.emplace_back(std::make_unique<uint8_t[]>(block_size));
|
105
|
+
this->setp((char*)buffers.back().get(), (char*)buffers.back().get() + block_size);
|
106
|
+
rest -= buf_remain;
|
107
|
+
s += buf_remain;
|
108
|
+
buf_remain = block_size;
|
109
|
+
}
|
110
|
+
std::copy(s, s + rest, this->pptr());
|
111
|
+
this->pbump(rest);
|
112
|
+
return n;
|
113
|
+
}
|
114
|
+
|
115
|
+
size_t BlockStreamBuffer::totalSize() const
|
116
|
+
{
|
117
|
+
return (buffers.size() - 1) * block_size + (this->pptr() - this->pbase());
|
118
|
+
}
|
119
|
+
|
120
|
+
TaggedDataMap readTaggedDataMap(std::istream& istr, uint32_t version)
|
121
|
+
{
|
122
|
+
std::unordered_map<std::string, std::pair<std::streampos, std::streampos>> ret;
|
123
|
+
TaggedDataHeader h;
|
124
|
+
do
|
125
|
+
{
|
126
|
+
istr.read((char*)&h, sizeof(h));
|
127
|
+
if (h.key != taggedDataKeyUint)
|
128
|
+
{
|
129
|
+
throw UnfitException("tagged data key is not found");
|
130
|
+
}
|
131
|
+
const std::streampos totsize_pos = istr.tellg() - (std::streamoff)16;
|
132
|
+
std::array<char, 256> key;
|
133
|
+
istr.read(key.data(), h.keysize);
|
134
|
+
const std::streampos start_pos = istr.tellg();
|
135
|
+
const std::streampos end_pos = totsize_pos + (std::streamoff)h.totsize;
|
136
|
+
ret.emplace(std::string{ key.data(), h.keysize }, std::make_pair(start_pos, end_pos));
|
137
|
+
ret[""] = std::make_pair(start_pos, end_pos);
|
138
|
+
istr.seekg(end_pos);
|
139
|
+
} while (h.trailing_cnt);
|
140
|
+
return ret;
|
141
|
+
}
|
142
|
+
|
143
|
+
uint64_t computeFastHash(const void* data, size_t size, uint64_t seed)
|
144
|
+
{
|
145
|
+
for (size_t i = 0; i < size / 4; ++i)
|
146
|
+
{
|
147
|
+
uint32_t x = ((const uint32_t*)data)[i];
|
148
|
+
x = ((x >> 16) ^ x) * 0x45d9f3b;
|
149
|
+
x = ((x >> 16) ^ x) * 0x45d9f3b;
|
150
|
+
x = (x >> 16) ^ x;
|
151
|
+
seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2);
|
152
|
+
}
|
153
|
+
|
154
|
+
if (size % 4)
|
155
|
+
{
|
156
|
+
uint32_t x = 0;
|
157
|
+
memcpy(&x, (const char*)data + (size / 4) * 4, size % 4);
|
158
|
+
x = ((x >> 16) ^ x) * 0x45d9f3b;
|
159
|
+
x = ((x >> 16) ^ x) * 0x45d9f3b;
|
160
|
+
x = (x >> 16) ^ x;
|
161
|
+
seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2);
|
162
|
+
}
|
163
|
+
return seed;
|
164
|
+
}
|
165
|
+
}
|
166
|
+
}
|