tomoto 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +1 -1
- data/ext/tomoto/extconf.rb +4 -2
- data/lib/tomoto/version.rb +1 -1
- data/vendor/tomotopy/README.kr.rst +10 -1
- data/vendor/tomotopy/README.rst +10 -1
- data/vendor/tomotopy/src/TopicModel/CT.h +2 -2
- data/vendor/tomotopy/src/TopicModel/CTModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/DMR.h +2 -2
- data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/DT.h +2 -2
- data/vendor/tomotopy/src/TopicModel/DTModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +3 -0
- data/vendor/tomotopy/src/TopicModel/GDMR.h +2 -2
- data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/HDP.h +2 -2
- data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +2 -0
- data/vendor/tomotopy/src/TopicModel/HLDA.h +2 -2
- data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +9 -0
- data/vendor/tomotopy/src/TopicModel/HPA.h +2 -2
- data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +2 -0
- data/vendor/tomotopy/src/TopicModel/LDA.h +8 -2
- data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +8 -0
- data/vendor/tomotopy/src/TopicModel/LLDA.h +2 -2
- data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/MGLDA.h +2 -2
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/PA.h +2 -2
- data/vendor/tomotopy/src/TopicModel/PAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +2 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/PT.h +3 -3
- data/vendor/tomotopy/src/TopicModel/PTModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/PTModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/SLDA.h +3 -2
- data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +5 -0
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +1 -0
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +77 -3
- data/vendor/tomotopy/src/Utils/Dictionary.cpp +102 -0
- data/vendor/tomotopy/src/Utils/Dictionary.h +26 -75
- data/vendor/tomotopy/src/Utils/Mmap.cpp +146 -0
- data/vendor/tomotopy/src/Utils/Mmap.h +139 -0
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -0
- data/vendor/tomotopy/src/Utils/SharedString.cpp +134 -0
- data/vendor/tomotopy/src/Utils/SharedString.h +104 -0
- data/vendor/tomotopy/src/Utils/serializer.cpp +166 -0
- data/vendor/tomotopy/src/Utils/serializer.hpp +261 -85
- metadata +9 -4
- data/vendor/tomotopy/src/Utils/SharedString.hpp +0 -206
@@ -0,0 +1,134 @@
|
|
1
|
+
#include "SharedString.h"
|
2
|
+
|
3
|
+
namespace tomoto
|
4
|
+
{
|
5
|
+
void SharedString::incref()
|
6
|
+
{
|
7
|
+
if (ptr)
|
8
|
+
{
|
9
|
+
++*(size_t*)ptr;
|
10
|
+
}
|
11
|
+
}
|
12
|
+
|
13
|
+
void SharedString::decref()
|
14
|
+
{
|
15
|
+
if (ptr)
|
16
|
+
{
|
17
|
+
if (--*(size_t*)ptr == 0)
|
18
|
+
{
|
19
|
+
delete[] ptr;
|
20
|
+
ptr = nullptr;
|
21
|
+
}
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
void SharedString::init(const char* _begin, const char* _end)
|
26
|
+
{
|
27
|
+
ptr = new char[_end - _begin + 9];
|
28
|
+
*(size_t*)ptr = 1;
|
29
|
+
len = _end - _begin;
|
30
|
+
std::memcpy((void*)(ptr + 8), _begin, _end - _begin);
|
31
|
+
((char*)ptr)[_end - _begin + 8] = 0;
|
32
|
+
}
|
33
|
+
|
34
|
+
SharedString::SharedString()
|
35
|
+
{
|
36
|
+
}
|
37
|
+
|
38
|
+
SharedString::SharedString(const char* _begin, const char* _end)
|
39
|
+
{
|
40
|
+
init(_begin, _end);
|
41
|
+
}
|
42
|
+
|
43
|
+
SharedString::SharedString(const char* _ptr)
|
44
|
+
{
|
45
|
+
if (_ptr)
|
46
|
+
{
|
47
|
+
init(_ptr, _ptr + std::strlen(_ptr));
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
SharedString::SharedString(const std::string& str)
|
52
|
+
{
|
53
|
+
if (!str.empty())
|
54
|
+
{
|
55
|
+
init(str.data(), str.data() + str.size());
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
SharedString::SharedString(const SharedString& o) noexcept
|
60
|
+
: ptr{ o.ptr }, len{ o.len }
|
61
|
+
{
|
62
|
+
incref();
|
63
|
+
}
|
64
|
+
|
65
|
+
SharedString::SharedString(SharedString&& o) noexcept
|
66
|
+
{
|
67
|
+
std::swap(ptr, o.ptr);
|
68
|
+
std::swap(len, o.len);
|
69
|
+
}
|
70
|
+
|
71
|
+
SharedString::~SharedString()
|
72
|
+
{
|
73
|
+
decref();
|
74
|
+
}
|
75
|
+
|
76
|
+
SharedString& SharedString::operator=(const SharedString& o)
|
77
|
+
{
|
78
|
+
if (this != &o)
|
79
|
+
{
|
80
|
+
decref();
|
81
|
+
ptr = o.ptr;
|
82
|
+
len = o.len;
|
83
|
+
incref();
|
84
|
+
}
|
85
|
+
return *this;
|
86
|
+
}
|
87
|
+
|
88
|
+
SharedString& SharedString::operator=(SharedString&& o) noexcept
|
89
|
+
{
|
90
|
+
std::swap(ptr, o.ptr);
|
91
|
+
std::swap(len, o.len);
|
92
|
+
return *this;
|
93
|
+
}
|
94
|
+
|
95
|
+
SharedString::operator std::string() const
|
96
|
+
{
|
97
|
+
if (!ptr) return {};
|
98
|
+
return { ptr + 8, ptr + 8 + len };
|
99
|
+
}
|
100
|
+
|
101
|
+
const char* SharedString::c_str() const
|
102
|
+
{
|
103
|
+
if (!ptr) return "";
|
104
|
+
return ptr + 8;
|
105
|
+
}
|
106
|
+
|
107
|
+
std::string SharedString::substr(size_t start, size_t len) const
|
108
|
+
{
|
109
|
+
return { c_str() + start, c_str() + start + len };
|
110
|
+
}
|
111
|
+
|
112
|
+
bool SharedString::operator==(const SharedString& o) const
|
113
|
+
{
|
114
|
+
if (ptr == o.ptr) return true;
|
115
|
+
if (size() != o.size()) return false;
|
116
|
+
return std::equal(begin(), end(), o.begin());
|
117
|
+
}
|
118
|
+
|
119
|
+
bool SharedString::operator==(const std::string& o) const
|
120
|
+
{
|
121
|
+
if (size() != o.size()) return false;
|
122
|
+
return std::equal(begin(), end(), o.begin());
|
123
|
+
}
|
124
|
+
|
125
|
+
bool SharedString::operator!=(const SharedString& o) const
|
126
|
+
{
|
127
|
+
return !operator==(o);
|
128
|
+
}
|
129
|
+
|
130
|
+
bool SharedString::operator!=(const std::string& o) const
|
131
|
+
{
|
132
|
+
return !operator==(o);
|
133
|
+
}
|
134
|
+
}
|
@@ -0,0 +1,104 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <string>
|
4
|
+
#include "serializer.hpp"
|
5
|
+
|
6
|
+
namespace tomoto
|
7
|
+
{
|
8
|
+
class SharedString
|
9
|
+
{
|
10
|
+
const char* ptr = nullptr;
|
11
|
+
size_t len = 0;
|
12
|
+
|
13
|
+
void incref();
|
14
|
+
|
15
|
+
void decref();
|
16
|
+
|
17
|
+
void init(const char* _begin, const char* _end);
|
18
|
+
|
19
|
+
public:
|
20
|
+
|
21
|
+
SharedString();
|
22
|
+
explicit SharedString(const char* _begin, const char* _end);
|
23
|
+
explicit SharedString(const char* _ptr);
|
24
|
+
explicit SharedString(const std::string& str);
|
25
|
+
SharedString(const SharedString& o) noexcept;
|
26
|
+
SharedString(SharedString&& o) noexcept;
|
27
|
+
~SharedString();
|
28
|
+
SharedString& operator=(const SharedString& o);
|
29
|
+
SharedString& operator=(SharedString&& o) noexcept;
|
30
|
+
|
31
|
+
size_t size() const
|
32
|
+
{
|
33
|
+
if (ptr) return len;
|
34
|
+
return 0;
|
35
|
+
}
|
36
|
+
|
37
|
+
bool empty() const
|
38
|
+
{
|
39
|
+
return size() == 0;
|
40
|
+
}
|
41
|
+
|
42
|
+
operator std::string() const;
|
43
|
+
|
44
|
+
const char* c_str() const;
|
45
|
+
|
46
|
+
const char* data() const
|
47
|
+
{
|
48
|
+
return c_str();
|
49
|
+
}
|
50
|
+
|
51
|
+
const char* begin() const
|
52
|
+
{
|
53
|
+
return data();
|
54
|
+
}
|
55
|
+
|
56
|
+
const char* end() const
|
57
|
+
{
|
58
|
+
return data() + size();
|
59
|
+
}
|
60
|
+
|
61
|
+
std::string substr(size_t start, size_t len) const;
|
62
|
+
|
63
|
+
bool operator==(const SharedString& o) const;
|
64
|
+
bool operator==(const std::string& o) const;
|
65
|
+
|
66
|
+
bool operator!=(const SharedString& o) const;
|
67
|
+
bool operator!=(const std::string& o) const;
|
68
|
+
};
|
69
|
+
|
70
|
+
namespace serializer
|
71
|
+
{
|
72
|
+
template<>
|
73
|
+
struct Serializer<SharedString>
|
74
|
+
{
|
75
|
+
using VTy = SharedString;
|
76
|
+
void write(std::ostream& ostr, const VTy& v)
|
77
|
+
{
|
78
|
+
writeToStream(ostr, (uint32_t)v.size());
|
79
|
+
if (!ostr.write((const char*)v.data(), v.size()))
|
80
|
+
throw std::ios_base::failure(std::string("writing type 'SharedString' is failed"));
|
81
|
+
}
|
82
|
+
|
83
|
+
void read(std::istream& istr, VTy& v)
|
84
|
+
{
|
85
|
+
auto size = readFromStream<uint32_t>(istr);
|
86
|
+
std::vector<char> t(size);
|
87
|
+
if (!istr.read((char*)t.data(), t.size()))
|
88
|
+
throw std::ios_base::failure(std::string("reading type 'SharedString' is failed"));
|
89
|
+
v = SharedString{ t.data(), t.data() + t.size() };
|
90
|
+
}
|
91
|
+
};
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
namespace std
|
96
|
+
{
|
97
|
+
template <> struct hash<tomoto::SharedString>
|
98
|
+
{
|
99
|
+
size_t operator()(const tomoto::SharedString& x) const
|
100
|
+
{
|
101
|
+
return hash<string>{}(x);
|
102
|
+
}
|
103
|
+
};
|
104
|
+
}
|
@@ -0,0 +1,166 @@
|
|
1
|
+
#include "serializer.hpp"
|
2
|
+
|
3
|
+
namespace tomoto
|
4
|
+
{
|
5
|
+
namespace serializer
|
6
|
+
{
|
7
|
+
membuf::membuf(bool read, bool write, char* base, std::ptrdiff_t n)
|
8
|
+
{
|
9
|
+
if (read)
|
10
|
+
{
|
11
|
+
this->setg(base, base, base + n);
|
12
|
+
}
|
13
|
+
|
14
|
+
if (write)
|
15
|
+
{
|
16
|
+
this->setp(base, base + n);
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
membuf::~membuf() = default;
|
21
|
+
|
22
|
+
std::streampos membuf::seekpos(pos_type sp, std::ios_base::openmode which)
|
23
|
+
{
|
24
|
+
return seekoff(sp - pos_type(off_type(0)), std::ios_base::beg, which);
|
25
|
+
}
|
26
|
+
|
27
|
+
std::streampos membuf::seekoff(off_type off,
|
28
|
+
std::ios_base::seekdir dir,
|
29
|
+
std::ios_base::openmode which
|
30
|
+
)
|
31
|
+
{
|
32
|
+
if (which & std::ios_base::in)
|
33
|
+
{
|
34
|
+
if (dir == std::ios_base::cur)
|
35
|
+
gbump(off);
|
36
|
+
else if (dir == std::ios_base::end)
|
37
|
+
setg(eback(), egptr() + off, egptr());
|
38
|
+
else if (dir == std::ios_base::beg)
|
39
|
+
setg(eback(), eback() + off, egptr());
|
40
|
+
}
|
41
|
+
if (which & std::ios_base::out)
|
42
|
+
{
|
43
|
+
if (dir == std::ios_base::cur)
|
44
|
+
pbump(off);
|
45
|
+
else if (dir == std::ios_base::end)
|
46
|
+
setp(epptr() + off, epptr());
|
47
|
+
else if (dir == std::ios_base::beg)
|
48
|
+
setp(pbase() + off, epptr());
|
49
|
+
|
50
|
+
if (!(which & std::ios_base::in))
|
51
|
+
{
|
52
|
+
return pptr() - pbase();
|
53
|
+
}
|
54
|
+
}
|
55
|
+
return gptr() - eback();
|
56
|
+
}
|
57
|
+
|
58
|
+
imstream::imstream(const char* base, std::ptrdiff_t n)
|
59
|
+
: std::istream(&buf), buf(true, false, (char*)base, n)
|
60
|
+
{
|
61
|
+
}
|
62
|
+
|
63
|
+
imstream::~imstream() = default;
|
64
|
+
|
65
|
+
omstream::omstream(char* base, std::ptrdiff_t n)
|
66
|
+
: std::ostream(&buf), buf(false, true, (char*)base, n)
|
67
|
+
{
|
68
|
+
}
|
69
|
+
|
70
|
+
omstream::~omstream() = default;
|
71
|
+
|
72
|
+
|
73
|
+
BlockStreamBuffer::BlockStreamBuffer(size_t _block_size) : block_size{ _block_size }
|
74
|
+
{
|
75
|
+
buffers.emplace_back(std::make_unique<uint8_t[]>(block_size));
|
76
|
+
this->setp((char*)buffers.back().get(), (char*)buffers.back().get() + block_size);
|
77
|
+
}
|
78
|
+
|
79
|
+
BlockStreamBuffer::~BlockStreamBuffer() = default;
|
80
|
+
|
81
|
+
int BlockStreamBuffer::overflow(int c)
|
82
|
+
{
|
83
|
+
if (this->pptr() == this->epptr())
|
84
|
+
{
|
85
|
+
buffers.emplace_back(std::make_unique<uint8_t[]>(block_size));
|
86
|
+
this->setp((char*)buffers.back().get(), (char*)buffers.back().get() + block_size);
|
87
|
+
}
|
88
|
+
else
|
89
|
+
{
|
90
|
+
*(this->pptr()) = c;
|
91
|
+
this->pbump(1);
|
92
|
+
}
|
93
|
+
return c;
|
94
|
+
}
|
95
|
+
|
96
|
+
std::streamsize BlockStreamBuffer::xsputn(const char* s, std::streamsize n)
|
97
|
+
{
|
98
|
+
auto rest = n;
|
99
|
+
auto buf_remain = this->epptr() - this->pptr();
|
100
|
+
while (rest > buf_remain)
|
101
|
+
{
|
102
|
+
std::copy(s, s + buf_remain, this->pptr());
|
103
|
+
this->pbump(buf_remain);
|
104
|
+
buffers.emplace_back(std::make_unique<uint8_t[]>(block_size));
|
105
|
+
this->setp((char*)buffers.back().get(), (char*)buffers.back().get() + block_size);
|
106
|
+
rest -= buf_remain;
|
107
|
+
s += buf_remain;
|
108
|
+
buf_remain = block_size;
|
109
|
+
}
|
110
|
+
std::copy(s, s + rest, this->pptr());
|
111
|
+
this->pbump(rest);
|
112
|
+
return n;
|
113
|
+
}
|
114
|
+
|
115
|
+
size_t BlockStreamBuffer::totalSize() const
|
116
|
+
{
|
117
|
+
return (buffers.size() - 1) * block_size + (this->pptr() - this->pbase());
|
118
|
+
}
|
119
|
+
|
120
|
+
TaggedDataMap readTaggedDataMap(std::istream& istr, uint32_t version)
|
121
|
+
{
|
122
|
+
std::unordered_map<std::string, std::pair<std::streampos, std::streampos>> ret;
|
123
|
+
TaggedDataHeader h;
|
124
|
+
do
|
125
|
+
{
|
126
|
+
istr.read((char*)&h, sizeof(h));
|
127
|
+
if (h.key != taggedDataKeyUint)
|
128
|
+
{
|
129
|
+
throw UnfitException("tagged data key is not found");
|
130
|
+
}
|
131
|
+
const std::streampos totsize_pos = istr.tellg() - (std::streamoff)16;
|
132
|
+
std::array<char, 256> key;
|
133
|
+
istr.read(key.data(), h.keysize);
|
134
|
+
const std::streampos start_pos = istr.tellg();
|
135
|
+
const std::streampos end_pos = totsize_pos + (std::streamoff)h.totsize;
|
136
|
+
ret.emplace(std::string{ key.data(), h.keysize }, std::make_pair(start_pos, end_pos));
|
137
|
+
ret[""] = std::make_pair(start_pos, end_pos);
|
138
|
+
istr.seekg(end_pos);
|
139
|
+
} while (h.trailing_cnt);
|
140
|
+
return ret;
|
141
|
+
}
|
142
|
+
|
143
|
+
uint64_t computeFastHash(const void* data, size_t size, uint64_t seed)
|
144
|
+
{
|
145
|
+
for (size_t i = 0; i < size / 4; ++i)
|
146
|
+
{
|
147
|
+
uint32_t x = ((const uint32_t*)data)[i];
|
148
|
+
x = ((x >> 16) ^ x) * 0x45d9f3b;
|
149
|
+
x = ((x >> 16) ^ x) * 0x45d9f3b;
|
150
|
+
x = (x >> 16) ^ x;
|
151
|
+
seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2);
|
152
|
+
}
|
153
|
+
|
154
|
+
if (size % 4)
|
155
|
+
{
|
156
|
+
uint32_t x = 0;
|
157
|
+
memcpy(&x, (const char*)data + (size / 4) * 4, size % 4);
|
158
|
+
x = ((x >> 16) ^ x) * 0x45d9f3b;
|
159
|
+
x = ((x >> 16) ^ x) * 0x45d9f3b;
|
160
|
+
x = (x >> 16) ^ x;
|
161
|
+
seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2);
|
162
|
+
}
|
163
|
+
return seed;
|
164
|
+
}
|
165
|
+
}
|
166
|
+
}
|