tomoto 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +3 -3
  5. data/ext/tomoto/ext.cpp +34 -9
  6. data/ext/tomoto/extconf.rb +2 -1
  7. data/lib/tomoto/dmr.rb +1 -1
  8. data/lib/tomoto/gdmr.rb +1 -1
  9. data/lib/tomoto/version.rb +1 -1
  10. data/vendor/tomotopy/LICENSE +1 -1
  11. data/vendor/tomotopy/README.kr.rst +32 -3
  12. data/vendor/tomotopy/README.rst +30 -1
  13. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +133 -147
  14. data/vendor/tomotopy/src/Labeling/FoRelevance.h +158 -5
  15. data/vendor/tomotopy/src/TopicModel/DMR.h +1 -16
  16. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +15 -34
  17. data/vendor/tomotopy/src/TopicModel/DT.h +1 -16
  18. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +15 -32
  19. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +18 -37
  20. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +16 -20
  21. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +3 -3
  22. data/vendor/tomotopy/src/TopicModel/LDA.h +0 -11
  23. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +9 -21
  24. data/vendor/tomotopy/src/TopicModel/LLDA.h +0 -15
  25. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +12 -30
  26. data/vendor/tomotopy/src/TopicModel/MGLDA.h +0 -15
  27. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +59 -72
  28. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +12 -30
  29. data/vendor/tomotopy/src/TopicModel/SLDA.h +0 -15
  30. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +17 -35
  31. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +158 -38
  32. data/vendor/tomotopy/src/Utils/Dictionary.h +40 -2
  33. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +122 -3
  34. data/vendor/tomotopy/src/Utils/SharedString.hpp +181 -0
  35. data/vendor/tomotopy/src/Utils/math.h +1 -1
  36. data/vendor/tomotopy/src/Utils/sample.hpp +1 -1
  37. data/vendor/tomotopy/src/Utils/serializer.hpp +17 -0
  38. data/vendor/variant/LICENSE +25 -0
  39. data/vendor/variant/LICENSE_1_0.txt +23 -0
  40. data/vendor/variant/README.md +102 -0
  41. data/vendor/variant/include/mapbox/optional.hpp +74 -0
  42. data/vendor/variant/include/mapbox/recursive_wrapper.hpp +122 -0
  43. data/vendor/variant/include/mapbox/variant.hpp +974 -0
  44. data/vendor/variant/include/mapbox/variant_io.hpp +45 -0
  45. metadata +15 -7
@@ -11,9 +11,15 @@
11
11
  namespace tomoto
12
12
  {
13
13
  using Vid = uint32_t;
14
+ static constexpr Vid non_vocab_id = (Vid)-1;
14
15
  using Tid = uint16_t;
15
16
  using Float = float;
16
17
 
18
+ struct VidPair : public std::pair<Vid, Vid>
19
+ {
20
+ using std::pair<Vid, Vid>::pair;
21
+ };
22
+
17
23
  class Dictionary
18
24
  {
19
25
  protected:
@@ -34,7 +40,7 @@ namespace tomoto
34
40
 
35
41
  size_t size() const { return dict.size(); }
36
42
 
37
- std::string toWord(Vid vid) const
43
+ const std::string& toWord(Vid vid) const
38
44
  {
39
45
  assert(vid < id2word.size());
40
46
  return id2word[vid];
@@ -43,7 +49,7 @@ namespace tomoto
43
49
  Vid toWid(const std::string& word) const
44
50
  {
45
51
  auto it = dict.find(word);
46
- if (it == dict.end()) return (Vid)-1;
52
+ if (it == dict.end()) return non_vocab_id;
47
53
  return it->second;
48
54
  }
49
55
 
@@ -75,6 +81,38 @@ namespace tomoto
75
81
  id2word[p.second] = p.first;
76
82
  }
77
83
  }
84
+
85
+ const std::vector<std::string>& getRaw() const
86
+ {
87
+ return id2word;
88
+ }
89
+
90
+ Vid mapToNewDict(Vid v, const Dictionary& newDict) const
91
+ {
92
+ return newDict.toWid(toWord(v));
93
+ }
94
+
95
+ std::vector<Vid> mapToNewDict(const std::vector<Vid>& v, const Dictionary& newDict) const
96
+ {
97
+ std::vector<Vid> r(v.size());
98
+ for (size_t i = 0; i < v.size(); ++i)
99
+ {
100
+ r[i] = mapToNewDict(v[i], newDict);
101
+ }
102
+ return r;
103
+ }
78
104
  };
79
105
 
80
106
  }
107
+
108
+ namespace std
109
+ {
110
+ template<>
111
+ struct hash<tomoto::VidPair>
112
+ {
113
+ size_t operator()(const tomoto::VidPair& p) const
114
+ {
115
+ return hash<size_t>{}(p.first) ^ hash<size_t>{}(p.second);
116
+ }
117
+ };
118
+ }
@@ -12,6 +12,12 @@ namespace Eigen
12
12
  {
13
13
  typedef PacketType type;
14
14
  };
15
+
16
+ template<typename PacketType>
17
+ struct to_float_packet
18
+ {
19
+ typedef PacketType type;
20
+ };
15
21
  }
16
22
  }
17
23
 
@@ -28,15 +34,29 @@ namespace Eigen
28
34
  typedef Packet8i type;
29
35
  };
30
36
 
37
+ template<> struct to_float_packet<Packet8i>
38
+ {
39
+ typedef Packet8f type;
40
+ };
41
+
31
42
  EIGEN_STRONG_INLINE Packet8f p_to_f32(const Packet8i& a)
32
43
  {
33
44
  return _mm256_cvtepi32_ps(a);
34
45
  }
35
46
 
47
+ EIGEN_STRONG_INLINE Packet8f p_bool2float(const Packet8f& a)
48
+ {
49
+ return _mm256_and_ps(_mm256_cmp_ps(a, _mm256_set1_ps(0), _CMP_NEQ_OQ), _mm256_set1_ps(1));
50
+ }
51
+
52
+ EIGEN_STRONG_INLINE Packet8f p_bool2float(const Packet8i& a)
53
+ {
54
+ return p_bool2float(_mm256_castsi256_ps(a));
55
+ }
36
56
  }
37
57
  }
38
-
39
- #elif defined(EIGEN_VECTORIZE_SSE2)
58
+ #endif
59
+ #if defined(EIGEN_VECTORIZE_SSE2)
40
60
  #include <xmmintrin.h>
41
61
  #include "sse_gamma.h"
42
62
 
@@ -49,11 +69,25 @@ namespace Eigen
49
69
  typedef Packet4i type;
50
70
  };
51
71
 
72
+ template<> struct to_float_packet<Packet4i>
73
+ {
74
+ typedef Packet4f type;
75
+ };
76
+
52
77
  EIGEN_STRONG_INLINE Packet4f p_to_f32(const Packet4i& a)
53
78
  {
54
79
  return _mm_cvtepi32_ps(a);
55
80
  }
81
+
82
+ EIGEN_STRONG_INLINE Packet4f p_bool2float(const Packet4f& a)
83
+ {
84
+ return _mm_and_ps(_mm_cmpneq_ps(a, _mm_set1_ps(0)), _mm_set1_ps(1));
85
+ }
56
86
 
87
+ EIGEN_STRONG_INLINE Packet4f p_bool2float(const Packet4i& a)
88
+ {
89
+ return p_bool2float(_mm_castsi128_ps(a));
90
+ }
57
91
  }
58
92
  }
59
93
  #endif
@@ -84,7 +118,7 @@ namespace Eigen
84
118
  template<>
85
119
  struct scalar_cast_op<int32_t, float> {
86
120
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
87
- typedef float result_type;
121
+ typedef float result_type;
88
122
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const float operator() (const int32_t& a) const { return cast<int32_t, float>(a); }
89
123
 
90
124
  template<typename Packet>
@@ -156,6 +190,81 @@ namespace Eigen
156
190
  evaluator<ArgType> m_argImpl;
157
191
  };
158
192
 
193
+ struct scalar_bool2float
194
+ {
195
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bool2float)
196
+ typedef float result_type;
197
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const float operator() (const int32_t& a) const { return a ? 1.f : 0.f; }
198
+
199
+ template<typename Packet>
200
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename to_float_packet<Packet>::type packetOp(const Packet& a) const
201
+ {
202
+ return p_bool2float(a);
203
+ }
204
+ };
205
+
206
+ template<>
207
+ struct functor_traits<scalar_bool2float>
208
+ {
209
+ enum { Cost = NumTraits<float>::AddCost, PacketAccess = 1 };
210
+ };
211
+
212
+ template<typename ArgType>
213
+ struct unary_evaluator<CwiseUnaryOp<scalar_bool2float, ArgType>, IndexBased >
214
+ : evaluator_base<CwiseUnaryOp<scalar_bool2float, ArgType> >
215
+ {
216
+ typedef CwiseUnaryOp<scalar_bool2float, ArgType> XprType;
217
+
218
+ enum {
219
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<scalar_bool2float>::Cost,
220
+
221
+ Flags = evaluator<ArgType>::Flags
222
+ & (HereditaryBits | LinearAccessBit | (functor_traits<scalar_bool2float>::PacketAccess ? PacketAccessBit : 0)),
223
+ Alignment = evaluator<ArgType>::Alignment
224
+ };
225
+
226
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
227
+ explicit unary_evaluator(const XprType& op)
228
+ : m_functor(op.functor()),
229
+ m_argImpl(op.nestedExpression())
230
+ {
231
+ EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<float>::AddCost);
232
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
233
+ }
234
+
235
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
236
+
237
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
238
+ CoeffReturnType coeff(Index row, Index col) const
239
+ {
240
+ return m_functor(m_argImpl.coeff(row, col));
241
+ }
242
+
243
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
244
+ CoeffReturnType coeff(Index index) const
245
+ {
246
+ return m_functor(m_argImpl.coeff(index));
247
+ }
248
+
249
+ template<int LoadMode, typename PacketType>
250
+ EIGEN_STRONG_INLINE
251
+ PacketType packet(Index row, Index col) const
252
+ {
253
+ return m_functor.packetOp(m_argImpl.template packet<LoadMode, typename to_int_packet<PacketType>::type>(row, col));
254
+ }
255
+
256
+ template<int LoadMode, typename PacketType>
257
+ EIGEN_STRONG_INLINE
258
+ PacketType packet(Index index) const
259
+ {
260
+ return m_functor.packetOp(m_argImpl.template packet<LoadMode, typename to_int_packet<PacketType>::type>(index));
261
+ }
262
+
263
+ protected:
264
+ const scalar_bool2float m_functor;
265
+ evaluator<ArgType> m_argImpl;
266
+ };
267
+
159
268
  }
160
269
 
161
270
  template <typename Derived, typename T> EIGEN_DEVICE_FUNC inline
@@ -178,4 +287,14 @@ namespace Eigen
178
287
  y.derived()
179
288
  );
180
289
  }
290
+
291
+
292
+ template<typename Derived>
293
+ inline const CwiseUnaryOp<internal::scalar_bool2float, const Derived>
294
+ bool2float(const Eigen::ArrayBase<Derived>& x)
295
+ {
296
+ return CwiseUnaryOp<internal::scalar_bool2float, const Derived>(
297
+ x.derived()
298
+ );
299
+ }
181
300
  }
@@ -0,0 +1,181 @@
1
+ #pragma once
2
+
3
+ #include <string>
4
+
5
+ namespace tomoto
6
+ {
7
+ class SharedString
8
+ {
9
+ const char* ptr = nullptr;
10
+ size_t len = 0;
11
+
12
+ void incref()
13
+ {
14
+ if (ptr)
15
+ {
16
+ ++*(size_t*)ptr;
17
+ }
18
+ }
19
+
20
+ void decref()
21
+ {
22
+ if (ptr)
23
+ {
24
+ if (--*(size_t*)ptr == 0)
25
+ {
26
+ delete[] ptr;
27
+ ptr = nullptr;
28
+ }
29
+ }
30
+ }
31
+
32
+ void init(const char* _begin, const char* _end)
33
+ {
34
+ ptr = new char[_end - _begin + 9];
35
+ *(size_t*)ptr = 1;
36
+ len = _end - _begin;
37
+ std::memcpy((void*)(ptr + 8), _begin, _end - _begin);
38
+ ((char*)ptr)[_end - _begin + 8] = 0;
39
+ }
40
+
41
+ public:
42
+
43
+ SharedString()
44
+ {
45
+ }
46
+
47
+ explicit SharedString(const char* _begin, const char* _end)
48
+ {
49
+ init(_begin, _end);
50
+ }
51
+
52
+ explicit SharedString(const char* _ptr)
53
+ {
54
+ if (_ptr)
55
+ {
56
+ init(_ptr, _ptr + std::strlen(_ptr));
57
+ }
58
+ }
59
+
60
+ explicit SharedString(const std::string& str)
61
+ {
62
+ if (!str.empty())
63
+ {
64
+ init(str.data(), str.data() + str.size());
65
+ }
66
+ }
67
+
68
+ SharedString(const SharedString& o) noexcept
69
+ : ptr{ o.ptr }, len{ o.len }
70
+ {
71
+ incref();
72
+ }
73
+
74
+ SharedString(SharedString&& o) noexcept
75
+ {
76
+ std::swap(ptr, o.ptr);
77
+ std::swap(len, o.len);
78
+ }
79
+
80
+ ~SharedString()
81
+ {
82
+ decref();
83
+ }
84
+
85
+ SharedString& operator=(const SharedString& o)
86
+ {
87
+ if (this != &o)
88
+ {
89
+ decref();
90
+ ptr = o.ptr;
91
+ len = o.len;
92
+ incref();
93
+ }
94
+ return *this;
95
+ }
96
+
97
+ SharedString& operator=(SharedString&& o) noexcept
98
+ {
99
+ std::swap(ptr, o.ptr);
100
+ std::swap(len, o.len);
101
+ return *this;
102
+ }
103
+
104
+ size_t size() const
105
+ {
106
+ if (ptr) return len;
107
+ return 0;
108
+ }
109
+
110
+ bool empty() const
111
+ {
112
+ return ptr == nullptr;
113
+ }
114
+
115
+ operator std::string() const
116
+ {
117
+ if (!ptr) return {};
118
+ return { ptr + 8, ptr + 8 + len };
119
+ }
120
+
121
+ const char* c_str() const
122
+ {
123
+ if (!ptr) return "";
124
+ return ptr + 8;
125
+ }
126
+
127
+ const char* data() const
128
+ {
129
+ return c_str();
130
+ }
131
+
132
+ const char* begin() const
133
+ {
134
+ return data();
135
+ }
136
+
137
+ const char* end() const
138
+ {
139
+ return data() + size();
140
+ }
141
+
142
+ std::string substr(size_t start, size_t len) const
143
+ {
144
+ return { c_str() + start, c_str() + start + len };
145
+ }
146
+
147
+ bool operator==(const SharedString& o) const
148
+ {
149
+ if (ptr == o.ptr) return true;
150
+ if (size() != o.size()) return false;
151
+ return std::equal(begin(), end(), o.begin());
152
+ }
153
+
154
+ bool operator==(const std::string& o) const
155
+ {
156
+ if (size() != o.size()) return false;
157
+ return std::equal(begin(), end(), o.begin());
158
+ }
159
+
160
+ bool operator!=(const SharedString& o) const
161
+ {
162
+ return !operator==(o);
163
+ }
164
+
165
+ bool operator!=(const std::string& o) const
166
+ {
167
+ return !operator==(o);
168
+ }
169
+ };
170
+ }
171
+
172
+ namespace std
173
+ {
174
+ template <> struct hash<tomoto::SharedString>
175
+ {
176
+ size_t operator()(const tomoto::SharedString& x) const
177
+ {
178
+ return hash<string>{}(x);
179
+ }
180
+ };
181
+ }
@@ -154,7 +154,7 @@ namespace tomoto
154
154
  inline float digammaT(float x) { return detail::LUT_digamma::get(x); }
155
155
 
156
156
  template<class _T>
157
- inline _T lgammaApprox(_T z)
157
+ inline auto lgammaApprox(_T z) -> decltype((z + 2.5)* log(z + 3) - (z + 3) + 0.91893853 + 1. / 12. / (z + 3) - log(z * (z + 1) * (z + 2)))
158
158
  {
159
159
  // approximation : lgamma(z) ~= (z+2.5)ln(z+3) - z - 3 + 0.5 ln (2pi) + 1/12/(z + 3) - ln (z(z+1)(z+2))
160
160
  return (z + 2.5) * log(z + 3) - (z + 3) + 0.91893853 + 1. / 12. / (z + 3) - log(z * (z + 1) * (z + 2));
@@ -77,7 +77,7 @@ namespace tomoto
77
77
  offset = _mm_shuffle_ps(out, out, _MM_SHUFFLE(3, 3, 3, 3));
78
78
  }
79
79
  if (!n4) n4 = 1;
80
- for (size_t i = n4; i < n; ++i)
80
+ for (int i = n4; i < n; ++i)
81
81
  {
82
82
  arr[i] += arr[i - 1];
83
83
  }
@@ -8,6 +8,7 @@
8
8
  #include <vector>
9
9
  #include "tvector.hpp"
10
10
  #include "text.hpp"
11
+ #include "SharedString.hpp"
11
12
 
12
13
  /*
13
14
 
@@ -435,6 +436,22 @@ namespace tomoto
435
436
  throw std::ios_base::failure( std::string("reading type '") + typeid(_Ty).name() + std::string("' is failed") );
436
437
  }
437
438
 
439
+ inline void writeToBinStreamImpl(std::ostream& ostr, const SharedString& v)
440
+ {
441
+ writeToStream<uint32_t>(ostr, (uint32_t)v.size());
442
+ if (!ostr.write((const char*)v.data(), v.size()))
443
+ throw std::ios_base::failure(std::string("writing type 'SharedString' is failed"));
444
+ }
445
+
446
+ inline void readFromBinStreamImpl(std::istream& istr, SharedString& v)
447
+ {
448
+ uint32_t size = readFromStream<uint32_t>(istr);
449
+ std::vector<char> t(size);
450
+ if (!istr.read((char*)t.data(), t.size()))
451
+ throw std::ios_base::failure(std::string("reading type 'SharedString' is failed"));
452
+ v = SharedString{ t.data(), t.data() + t.size() };
453
+ }
454
+
438
455
  template<class _Ty>
439
456
  inline typename std::enable_if<std::is_abstract<_Ty>::value>::type writeToBinStreamImpl(std::ostream& ostr, const std::unique_ptr<_Ty>& v)
440
457
  {