tomoto 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +3 -3
- data/ext/tomoto/ext.cpp +34 -9
- data/ext/tomoto/extconf.rb +2 -1
- data/lib/tomoto/dmr.rb +1 -1
- data/lib/tomoto/gdmr.rb +1 -1
- data/lib/tomoto/version.rb +1 -1
- data/vendor/tomotopy/LICENSE +1 -1
- data/vendor/tomotopy/README.kr.rst +32 -3
- data/vendor/tomotopy/README.rst +30 -1
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +133 -147
- data/vendor/tomotopy/src/Labeling/FoRelevance.h +158 -5
- data/vendor/tomotopy/src/TopicModel/DMR.h +1 -16
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +15 -34
- data/vendor/tomotopy/src/TopicModel/DT.h +1 -16
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +15 -32
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +18 -37
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +16 -20
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +3 -3
- data/vendor/tomotopy/src/TopicModel/LDA.h +0 -11
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +9 -21
- data/vendor/tomotopy/src/TopicModel/LLDA.h +0 -15
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +12 -30
- data/vendor/tomotopy/src/TopicModel/MGLDA.h +0 -15
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +59 -72
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +12 -30
- data/vendor/tomotopy/src/TopicModel/SLDA.h +0 -15
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +17 -35
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +158 -38
- data/vendor/tomotopy/src/Utils/Dictionary.h +40 -2
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +122 -3
- data/vendor/tomotopy/src/Utils/SharedString.hpp +181 -0
- data/vendor/tomotopy/src/Utils/math.h +1 -1
- data/vendor/tomotopy/src/Utils/sample.hpp +1 -1
- data/vendor/tomotopy/src/Utils/serializer.hpp +17 -0
- data/vendor/variant/LICENSE +25 -0
- data/vendor/variant/LICENSE_1_0.txt +23 -0
- data/vendor/variant/README.md +102 -0
- data/vendor/variant/include/mapbox/optional.hpp +74 -0
- data/vendor/variant/include/mapbox/recursive_wrapper.hpp +122 -0
- data/vendor/variant/include/mapbox/variant.hpp +974 -0
- data/vendor/variant/include/mapbox/variant_io.hpp +45 -0
- metadata +15 -7
@@ -11,9 +11,15 @@
|
|
11
11
|
namespace tomoto
|
12
12
|
{
|
13
13
|
using Vid = uint32_t;
|
14
|
+
static constexpr Vid non_vocab_id = (Vid)-1;
|
14
15
|
using Tid = uint16_t;
|
15
16
|
using Float = float;
|
16
17
|
|
18
|
+
struct VidPair : public std::pair<Vid, Vid>
|
19
|
+
{
|
20
|
+
using std::pair<Vid, Vid>::pair;
|
21
|
+
};
|
22
|
+
|
17
23
|
class Dictionary
|
18
24
|
{
|
19
25
|
protected:
|
@@ -34,7 +40,7 @@ namespace tomoto
|
|
34
40
|
|
35
41
|
size_t size() const { return dict.size(); }
|
36
42
|
|
37
|
-
std::string toWord(Vid vid) const
|
43
|
+
const std::string& toWord(Vid vid) const
|
38
44
|
{
|
39
45
|
assert(vid < id2word.size());
|
40
46
|
return id2word[vid];
|
@@ -43,7 +49,7 @@ namespace tomoto
|
|
43
49
|
Vid toWid(const std::string& word) const
|
44
50
|
{
|
45
51
|
auto it = dict.find(word);
|
46
|
-
if (it == dict.end()) return
|
52
|
+
if (it == dict.end()) return non_vocab_id;
|
47
53
|
return it->second;
|
48
54
|
}
|
49
55
|
|
@@ -75,6 +81,38 @@ namespace tomoto
|
|
75
81
|
id2word[p.second] = p.first;
|
76
82
|
}
|
77
83
|
}
|
84
|
+
|
85
|
+
const std::vector<std::string>& getRaw() const
|
86
|
+
{
|
87
|
+
return id2word;
|
88
|
+
}
|
89
|
+
|
90
|
+
Vid mapToNewDict(Vid v, const Dictionary& newDict) const
|
91
|
+
{
|
92
|
+
return newDict.toWid(toWord(v));
|
93
|
+
}
|
94
|
+
|
95
|
+
std::vector<Vid> mapToNewDict(const std::vector<Vid>& v, const Dictionary& newDict) const
|
96
|
+
{
|
97
|
+
std::vector<Vid> r(v.size());
|
98
|
+
for (size_t i = 0; i < v.size(); ++i)
|
99
|
+
{
|
100
|
+
r[i] = mapToNewDict(v[i], newDict);
|
101
|
+
}
|
102
|
+
return r;
|
103
|
+
}
|
78
104
|
};
|
79
105
|
|
80
106
|
}
|
107
|
+
|
108
|
+
namespace std
|
109
|
+
{
|
110
|
+
template<>
|
111
|
+
struct hash<tomoto::VidPair>
|
112
|
+
{
|
113
|
+
size_t operator()(const tomoto::VidPair& p) const
|
114
|
+
{
|
115
|
+
return hash<size_t>{}(p.first) ^ hash<size_t>{}(p.second);
|
116
|
+
}
|
117
|
+
};
|
118
|
+
}
|
@@ -12,6 +12,12 @@ namespace Eigen
|
|
12
12
|
{
|
13
13
|
typedef PacketType type;
|
14
14
|
};
|
15
|
+
|
16
|
+
template<typename PacketType>
|
17
|
+
struct to_float_packet
|
18
|
+
{
|
19
|
+
typedef PacketType type;
|
20
|
+
};
|
15
21
|
}
|
16
22
|
}
|
17
23
|
|
@@ -28,15 +34,29 @@ namespace Eigen
|
|
28
34
|
typedef Packet8i type;
|
29
35
|
};
|
30
36
|
|
37
|
+
template<> struct to_float_packet<Packet8i>
|
38
|
+
{
|
39
|
+
typedef Packet8f type;
|
40
|
+
};
|
41
|
+
|
31
42
|
EIGEN_STRONG_INLINE Packet8f p_to_f32(const Packet8i& a)
|
32
43
|
{
|
33
44
|
return _mm256_cvtepi32_ps(a);
|
34
45
|
}
|
35
46
|
|
47
|
+
EIGEN_STRONG_INLINE Packet8f p_bool2float(const Packet8f& a)
|
48
|
+
{
|
49
|
+
return _mm256_and_ps(_mm256_cmp_ps(a, _mm256_set1_ps(0), _CMP_NEQ_OQ), _mm256_set1_ps(1));
|
50
|
+
}
|
51
|
+
|
52
|
+
EIGEN_STRONG_INLINE Packet8f p_bool2float(const Packet8i& a)
|
53
|
+
{
|
54
|
+
return p_bool2float(_mm256_castsi256_ps(a));
|
55
|
+
}
|
36
56
|
}
|
37
57
|
}
|
38
|
-
|
39
|
-
#
|
58
|
+
#endif
|
59
|
+
#if defined(EIGEN_VECTORIZE_SSE2)
|
40
60
|
#include <xmmintrin.h>
|
41
61
|
#include "sse_gamma.h"
|
42
62
|
|
@@ -49,11 +69,25 @@ namespace Eigen
|
|
49
69
|
typedef Packet4i type;
|
50
70
|
};
|
51
71
|
|
72
|
+
template<> struct to_float_packet<Packet4i>
|
73
|
+
{
|
74
|
+
typedef Packet4f type;
|
75
|
+
};
|
76
|
+
|
52
77
|
EIGEN_STRONG_INLINE Packet4f p_to_f32(const Packet4i& a)
|
53
78
|
{
|
54
79
|
return _mm_cvtepi32_ps(a);
|
55
80
|
}
|
81
|
+
|
82
|
+
EIGEN_STRONG_INLINE Packet4f p_bool2float(const Packet4f& a)
|
83
|
+
{
|
84
|
+
return _mm_and_ps(_mm_cmpneq_ps(a, _mm_set1_ps(0)), _mm_set1_ps(1));
|
85
|
+
}
|
56
86
|
|
87
|
+
EIGEN_STRONG_INLINE Packet4f p_bool2float(const Packet4i& a)
|
88
|
+
{
|
89
|
+
return p_bool2float(_mm_castsi128_ps(a));
|
90
|
+
}
|
57
91
|
}
|
58
92
|
}
|
59
93
|
#endif
|
@@ -84,7 +118,7 @@ namespace Eigen
|
|
84
118
|
template<>
|
85
119
|
struct scalar_cast_op<int32_t, float> {
|
86
120
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
|
87
|
-
|
121
|
+
typedef float result_type;
|
88
122
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const float operator() (const int32_t& a) const { return cast<int32_t, float>(a); }
|
89
123
|
|
90
124
|
template<typename Packet>
|
@@ -156,6 +190,81 @@ namespace Eigen
|
|
156
190
|
evaluator<ArgType> m_argImpl;
|
157
191
|
};
|
158
192
|
|
193
|
+
struct scalar_bool2float
|
194
|
+
{
|
195
|
+
EIGEN_EMPTY_STRUCT_CTOR(scalar_bool2float)
|
196
|
+
typedef float result_type;
|
197
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const float operator() (const int32_t& a) const { return a ? 1.f : 0.f; }
|
198
|
+
|
199
|
+
template<typename Packet>
|
200
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename to_float_packet<Packet>::type packetOp(const Packet& a) const
|
201
|
+
{
|
202
|
+
return p_bool2float(a);
|
203
|
+
}
|
204
|
+
};
|
205
|
+
|
206
|
+
template<>
|
207
|
+
struct functor_traits<scalar_bool2float>
|
208
|
+
{
|
209
|
+
enum { Cost = NumTraits<float>::AddCost, PacketAccess = 1 };
|
210
|
+
};
|
211
|
+
|
212
|
+
template<typename ArgType>
|
213
|
+
struct unary_evaluator<CwiseUnaryOp<scalar_bool2float, ArgType>, IndexBased >
|
214
|
+
: evaluator_base<CwiseUnaryOp<scalar_bool2float, ArgType> >
|
215
|
+
{
|
216
|
+
typedef CwiseUnaryOp<scalar_bool2float, ArgType> XprType;
|
217
|
+
|
218
|
+
enum {
|
219
|
+
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<scalar_bool2float>::Cost,
|
220
|
+
|
221
|
+
Flags = evaluator<ArgType>::Flags
|
222
|
+
& (HereditaryBits | LinearAccessBit | (functor_traits<scalar_bool2float>::PacketAccess ? PacketAccessBit : 0)),
|
223
|
+
Alignment = evaluator<ArgType>::Alignment
|
224
|
+
};
|
225
|
+
|
226
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
227
|
+
explicit unary_evaluator(const XprType& op)
|
228
|
+
: m_functor(op.functor()),
|
229
|
+
m_argImpl(op.nestedExpression())
|
230
|
+
{
|
231
|
+
EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<float>::AddCost);
|
232
|
+
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
233
|
+
}
|
234
|
+
|
235
|
+
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
236
|
+
|
237
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
238
|
+
CoeffReturnType coeff(Index row, Index col) const
|
239
|
+
{
|
240
|
+
return m_functor(m_argImpl.coeff(row, col));
|
241
|
+
}
|
242
|
+
|
243
|
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
244
|
+
CoeffReturnType coeff(Index index) const
|
245
|
+
{
|
246
|
+
return m_functor(m_argImpl.coeff(index));
|
247
|
+
}
|
248
|
+
|
249
|
+
template<int LoadMode, typename PacketType>
|
250
|
+
EIGEN_STRONG_INLINE
|
251
|
+
PacketType packet(Index row, Index col) const
|
252
|
+
{
|
253
|
+
return m_functor.packetOp(m_argImpl.template packet<LoadMode, typename to_int_packet<PacketType>::type>(row, col));
|
254
|
+
}
|
255
|
+
|
256
|
+
template<int LoadMode, typename PacketType>
|
257
|
+
EIGEN_STRONG_INLINE
|
258
|
+
PacketType packet(Index index) const
|
259
|
+
{
|
260
|
+
return m_functor.packetOp(m_argImpl.template packet<LoadMode, typename to_int_packet<PacketType>::type>(index));
|
261
|
+
}
|
262
|
+
|
263
|
+
protected:
|
264
|
+
const scalar_bool2float m_functor;
|
265
|
+
evaluator<ArgType> m_argImpl;
|
266
|
+
};
|
267
|
+
|
159
268
|
}
|
160
269
|
|
161
270
|
template <typename Derived, typename T> EIGEN_DEVICE_FUNC inline
|
@@ -178,4 +287,14 @@ namespace Eigen
|
|
178
287
|
y.derived()
|
179
288
|
);
|
180
289
|
}
|
290
|
+
|
291
|
+
|
292
|
+
template<typename Derived>
|
293
|
+
inline const CwiseUnaryOp<internal::scalar_bool2float, const Derived>
|
294
|
+
bool2float(const Eigen::ArrayBase<Derived>& x)
|
295
|
+
{
|
296
|
+
return CwiseUnaryOp<internal::scalar_bool2float, const Derived>(
|
297
|
+
x.derived()
|
298
|
+
);
|
299
|
+
}
|
181
300
|
}
|
@@ -0,0 +1,181 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <string>
|
4
|
+
|
5
|
+
namespace tomoto
|
6
|
+
{
|
7
|
+
class SharedString
|
8
|
+
{
|
9
|
+
const char* ptr = nullptr;
|
10
|
+
size_t len = 0;
|
11
|
+
|
12
|
+
void incref()
|
13
|
+
{
|
14
|
+
if (ptr)
|
15
|
+
{
|
16
|
+
++*(size_t*)ptr;
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
void decref()
|
21
|
+
{
|
22
|
+
if (ptr)
|
23
|
+
{
|
24
|
+
if (--*(size_t*)ptr == 0)
|
25
|
+
{
|
26
|
+
delete[] ptr;
|
27
|
+
ptr = nullptr;
|
28
|
+
}
|
29
|
+
}
|
30
|
+
}
|
31
|
+
|
32
|
+
void init(const char* _begin, const char* _end)
|
33
|
+
{
|
34
|
+
ptr = new char[_end - _begin + 9];
|
35
|
+
*(size_t*)ptr = 1;
|
36
|
+
len = _end - _begin;
|
37
|
+
std::memcpy((void*)(ptr + 8), _begin, _end - _begin);
|
38
|
+
((char*)ptr)[_end - _begin + 8] = 0;
|
39
|
+
}
|
40
|
+
|
41
|
+
public:
|
42
|
+
|
43
|
+
SharedString()
|
44
|
+
{
|
45
|
+
}
|
46
|
+
|
47
|
+
explicit SharedString(const char* _begin, const char* _end)
|
48
|
+
{
|
49
|
+
init(_begin, _end);
|
50
|
+
}
|
51
|
+
|
52
|
+
explicit SharedString(const char* _ptr)
|
53
|
+
{
|
54
|
+
if (_ptr)
|
55
|
+
{
|
56
|
+
init(_ptr, _ptr + std::strlen(_ptr));
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
explicit SharedString(const std::string& str)
|
61
|
+
{
|
62
|
+
if (!str.empty())
|
63
|
+
{
|
64
|
+
init(str.data(), str.data() + str.size());
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
SharedString(const SharedString& o) noexcept
|
69
|
+
: ptr{ o.ptr }, len{ o.len }
|
70
|
+
{
|
71
|
+
incref();
|
72
|
+
}
|
73
|
+
|
74
|
+
SharedString(SharedString&& o) noexcept
|
75
|
+
{
|
76
|
+
std::swap(ptr, o.ptr);
|
77
|
+
std::swap(len, o.len);
|
78
|
+
}
|
79
|
+
|
80
|
+
~SharedString()
|
81
|
+
{
|
82
|
+
decref();
|
83
|
+
}
|
84
|
+
|
85
|
+
SharedString& operator=(const SharedString& o)
|
86
|
+
{
|
87
|
+
if (this != &o)
|
88
|
+
{
|
89
|
+
decref();
|
90
|
+
ptr = o.ptr;
|
91
|
+
len = o.len;
|
92
|
+
incref();
|
93
|
+
}
|
94
|
+
return *this;
|
95
|
+
}
|
96
|
+
|
97
|
+
SharedString& operator=(SharedString&& o) noexcept
|
98
|
+
{
|
99
|
+
std::swap(ptr, o.ptr);
|
100
|
+
std::swap(len, o.len);
|
101
|
+
return *this;
|
102
|
+
}
|
103
|
+
|
104
|
+
size_t size() const
|
105
|
+
{
|
106
|
+
if (ptr) return len;
|
107
|
+
return 0;
|
108
|
+
}
|
109
|
+
|
110
|
+
bool empty() const
|
111
|
+
{
|
112
|
+
return ptr == nullptr;
|
113
|
+
}
|
114
|
+
|
115
|
+
operator std::string() const
|
116
|
+
{
|
117
|
+
if (!ptr) return {};
|
118
|
+
return { ptr + 8, ptr + 8 + len };
|
119
|
+
}
|
120
|
+
|
121
|
+
const char* c_str() const
|
122
|
+
{
|
123
|
+
if (!ptr) return "";
|
124
|
+
return ptr + 8;
|
125
|
+
}
|
126
|
+
|
127
|
+
const char* data() const
|
128
|
+
{
|
129
|
+
return c_str();
|
130
|
+
}
|
131
|
+
|
132
|
+
const char* begin() const
|
133
|
+
{
|
134
|
+
return data();
|
135
|
+
}
|
136
|
+
|
137
|
+
const char* end() const
|
138
|
+
{
|
139
|
+
return data() + size();
|
140
|
+
}
|
141
|
+
|
142
|
+
std::string substr(size_t start, size_t len) const
|
143
|
+
{
|
144
|
+
return { c_str() + start, c_str() + start + len };
|
145
|
+
}
|
146
|
+
|
147
|
+
bool operator==(const SharedString& o) const
|
148
|
+
{
|
149
|
+
if (ptr == o.ptr) return true;
|
150
|
+
if (size() != o.size()) return false;
|
151
|
+
return std::equal(begin(), end(), o.begin());
|
152
|
+
}
|
153
|
+
|
154
|
+
bool operator==(const std::string& o) const
|
155
|
+
{
|
156
|
+
if (size() != o.size()) return false;
|
157
|
+
return std::equal(begin(), end(), o.begin());
|
158
|
+
}
|
159
|
+
|
160
|
+
bool operator!=(const SharedString& o) const
|
161
|
+
{
|
162
|
+
return !operator==(o);
|
163
|
+
}
|
164
|
+
|
165
|
+
bool operator!=(const std::string& o) const
|
166
|
+
{
|
167
|
+
return !operator==(o);
|
168
|
+
}
|
169
|
+
};
|
170
|
+
}
|
171
|
+
|
172
|
+
namespace std
|
173
|
+
{
|
174
|
+
template <> struct hash<tomoto::SharedString>
|
175
|
+
{
|
176
|
+
size_t operator()(const tomoto::SharedString& x) const
|
177
|
+
{
|
178
|
+
return hash<string>{}(x);
|
179
|
+
}
|
180
|
+
};
|
181
|
+
}
|
@@ -154,7 +154,7 @@ namespace tomoto
|
|
154
154
|
inline float digammaT(float x) { return detail::LUT_digamma::get(x); }
|
155
155
|
|
156
156
|
template<class _T>
|
157
|
-
inline
|
157
|
+
inline auto lgammaApprox(_T z) -> decltype((z + 2.5)* log(z + 3) - (z + 3) + 0.91893853 + 1. / 12. / (z + 3) - log(z * (z + 1) * (z + 2)))
|
158
158
|
{
|
159
159
|
// approximation : lgamma(z) ~= (z+2.5)ln(z+3) - z - 3 + 0.5 ln (2pi) + 1/12/(z + 3) - ln (z(z+1)(z+2))
|
160
160
|
return (z + 2.5) * log(z + 3) - (z + 3) + 0.91893853 + 1. / 12. / (z + 3) - log(z * (z + 1) * (z + 2));
|
@@ -8,6 +8,7 @@
|
|
8
8
|
#include <vector>
|
9
9
|
#include "tvector.hpp"
|
10
10
|
#include "text.hpp"
|
11
|
+
#include "SharedString.hpp"
|
11
12
|
|
12
13
|
/*
|
13
14
|
|
@@ -435,6 +436,22 @@ namespace tomoto
|
|
435
436
|
throw std::ios_base::failure( std::string("reading type '") + typeid(_Ty).name() + std::string("' is failed") );
|
436
437
|
}
|
437
438
|
|
439
|
+
inline void writeToBinStreamImpl(std::ostream& ostr, const SharedString& v)
|
440
|
+
{
|
441
|
+
writeToStream<uint32_t>(ostr, (uint32_t)v.size());
|
442
|
+
if (!ostr.write((const char*)v.data(), v.size()))
|
443
|
+
throw std::ios_base::failure(std::string("writing type 'SharedString' is failed"));
|
444
|
+
}
|
445
|
+
|
446
|
+
inline void readFromBinStreamImpl(std::istream& istr, SharedString& v)
|
447
|
+
{
|
448
|
+
uint32_t size = readFromStream<uint32_t>(istr);
|
449
|
+
std::vector<char> t(size);
|
450
|
+
if (!istr.read((char*)t.data(), t.size()))
|
451
|
+
throw std::ios_base::failure(std::string("reading type 'SharedString' is failed"));
|
452
|
+
v = SharedString{ t.data(), t.data() + t.size() };
|
453
|
+
}
|
454
|
+
|
438
455
|
template<class _Ty>
|
439
456
|
inline typename std::enable_if<std::is_abstract<_Ty>::value>::type writeToBinStreamImpl(std::ostream& ostr, const std::unique_ptr<_Ty>& v)
|
440
457
|
{
|