tomoto 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +3 -3
  5. data/ext/tomoto/ext.cpp +34 -9
  6. data/ext/tomoto/extconf.rb +2 -1
  7. data/lib/tomoto/dmr.rb +1 -1
  8. data/lib/tomoto/gdmr.rb +1 -1
  9. data/lib/tomoto/version.rb +1 -1
  10. data/vendor/tomotopy/LICENSE +1 -1
  11. data/vendor/tomotopy/README.kr.rst +32 -3
  12. data/vendor/tomotopy/README.rst +30 -1
  13. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +133 -147
  14. data/vendor/tomotopy/src/Labeling/FoRelevance.h +158 -5
  15. data/vendor/tomotopy/src/TopicModel/DMR.h +1 -16
  16. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +15 -34
  17. data/vendor/tomotopy/src/TopicModel/DT.h +1 -16
  18. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +15 -32
  19. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +18 -37
  20. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +16 -20
  21. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +3 -3
  22. data/vendor/tomotopy/src/TopicModel/LDA.h +0 -11
  23. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +9 -21
  24. data/vendor/tomotopy/src/TopicModel/LLDA.h +0 -15
  25. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +12 -30
  26. data/vendor/tomotopy/src/TopicModel/MGLDA.h +0 -15
  27. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +59 -72
  28. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +12 -30
  29. data/vendor/tomotopy/src/TopicModel/SLDA.h +0 -15
  30. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +17 -35
  31. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +158 -38
  32. data/vendor/tomotopy/src/Utils/Dictionary.h +40 -2
  33. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +122 -3
  34. data/vendor/tomotopy/src/Utils/SharedString.hpp +181 -0
  35. data/vendor/tomotopy/src/Utils/math.h +1 -1
  36. data/vendor/tomotopy/src/Utils/sample.hpp +1 -1
  37. data/vendor/tomotopy/src/Utils/serializer.hpp +17 -0
  38. data/vendor/variant/LICENSE +25 -0
  39. data/vendor/variant/LICENSE_1_0.txt +23 -0
  40. data/vendor/variant/README.md +102 -0
  41. data/vendor/variant/include/mapbox/optional.hpp +74 -0
  42. data/vendor/variant/include/mapbox/recursive_wrapper.hpp +122 -0
  43. data/vendor/variant/include/mapbox/variant.hpp +974 -0
  44. data/vendor/variant/include/mapbox/variant_io.hpp +45 -0
  45. metadata +15 -7
@@ -11,9 +11,15 @@
11
11
  namespace tomoto
12
12
  {
13
13
  using Vid = uint32_t;
14
+ static constexpr Vid non_vocab_id = (Vid)-1;
14
15
  using Tid = uint16_t;
15
16
  using Float = float;
16
17
 
18
+ struct VidPair : public std::pair<Vid, Vid>
19
+ {
20
+ using std::pair<Vid, Vid>::pair;
21
+ };
22
+
17
23
  class Dictionary
18
24
  {
19
25
  protected:
@@ -34,7 +40,7 @@ namespace tomoto
34
40
 
35
41
  size_t size() const { return dict.size(); }
36
42
 
37
- std::string toWord(Vid vid) const
43
+ const std::string& toWord(Vid vid) const
38
44
  {
39
45
  assert(vid < id2word.size());
40
46
  return id2word[vid];
@@ -43,7 +49,7 @@ namespace tomoto
43
49
  Vid toWid(const std::string& word) const
44
50
  {
45
51
  auto it = dict.find(word);
46
- if (it == dict.end()) return (Vid)-1;
52
+ if (it == dict.end()) return non_vocab_id;
47
53
  return it->second;
48
54
  }
49
55
 
@@ -75,6 +81,38 @@ namespace tomoto
75
81
  id2word[p.second] = p.first;
76
82
  }
77
83
  }
84
+
85
+ const std::vector<std::string>& getRaw() const
86
+ {
87
+ return id2word;
88
+ }
89
+
90
+ Vid mapToNewDict(Vid v, const Dictionary& newDict) const
91
+ {
92
+ return newDict.toWid(toWord(v));
93
+ }
94
+
95
+ std::vector<Vid> mapToNewDict(const std::vector<Vid>& v, const Dictionary& newDict) const
96
+ {
97
+ std::vector<Vid> r(v.size());
98
+ for (size_t i = 0; i < v.size(); ++i)
99
+ {
100
+ r[i] = mapToNewDict(v[i], newDict);
101
+ }
102
+ return r;
103
+ }
78
104
  };
79
105
 
80
106
  }
107
+
108
+ namespace std
109
+ {
110
+ template<>
111
+ struct hash<tomoto::VidPair>
112
+ {
113
+ size_t operator()(const tomoto::VidPair& p) const
114
+ {
115
+ return hash<size_t>{}(p.first) ^ hash<size_t>{}(p.second);
116
+ }
117
+ };
118
+ }
@@ -12,6 +12,12 @@ namespace Eigen
12
12
  {
13
13
  typedef PacketType type;
14
14
  };
15
+
16
+ template<typename PacketType>
17
+ struct to_float_packet
18
+ {
19
+ typedef PacketType type;
20
+ };
15
21
  }
16
22
  }
17
23
 
@@ -28,15 +34,29 @@ namespace Eigen
28
34
  typedef Packet8i type;
29
35
  };
30
36
 
37
+ template<> struct to_float_packet<Packet8i>
38
+ {
39
+ typedef Packet8f type;
40
+ };
41
+
31
42
  EIGEN_STRONG_INLINE Packet8f p_to_f32(const Packet8i& a)
32
43
  {
33
44
  return _mm256_cvtepi32_ps(a);
34
45
  }
35
46
 
47
+ EIGEN_STRONG_INLINE Packet8f p_bool2float(const Packet8f& a)
48
+ {
49
+ return _mm256_and_ps(_mm256_cmp_ps(a, _mm256_set1_ps(0), _CMP_NEQ_OQ), _mm256_set1_ps(1));
50
+ }
51
+
52
+ EIGEN_STRONG_INLINE Packet8f p_bool2float(const Packet8i& a)
53
+ {
54
+ return p_bool2float(_mm256_castsi256_ps(a));
55
+ }
36
56
  }
37
57
  }
38
-
39
- #elif defined(EIGEN_VECTORIZE_SSE2)
58
+ #endif
59
+ #if defined(EIGEN_VECTORIZE_SSE2)
40
60
  #include <xmmintrin.h>
41
61
  #include "sse_gamma.h"
42
62
 
@@ -49,11 +69,25 @@ namespace Eigen
49
69
  typedef Packet4i type;
50
70
  };
51
71
 
72
+ template<> struct to_float_packet<Packet4i>
73
+ {
74
+ typedef Packet4f type;
75
+ };
76
+
52
77
  EIGEN_STRONG_INLINE Packet4f p_to_f32(const Packet4i& a)
53
78
  {
54
79
  return _mm_cvtepi32_ps(a);
55
80
  }
81
+
82
+ EIGEN_STRONG_INLINE Packet4f p_bool2float(const Packet4f& a)
83
+ {
84
+ return _mm_and_ps(_mm_cmpneq_ps(a, _mm_set1_ps(0)), _mm_set1_ps(1));
85
+ }
56
86
 
87
+ EIGEN_STRONG_INLINE Packet4f p_bool2float(const Packet4i& a)
88
+ {
89
+ return p_bool2float(_mm_castsi128_ps(a));
90
+ }
57
91
  }
58
92
  }
59
93
  #endif
@@ -84,7 +118,7 @@ namespace Eigen
84
118
  template<>
85
119
  struct scalar_cast_op<int32_t, float> {
86
120
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
87
- typedef float result_type;
121
+ typedef float result_type;
88
122
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const float operator() (const int32_t& a) const { return cast<int32_t, float>(a); }
89
123
 
90
124
  template<typename Packet>
@@ -156,6 +190,81 @@ namespace Eigen
156
190
  evaluator<ArgType> m_argImpl;
157
191
  };
158
192
 
193
+ struct scalar_bool2float
194
+ {
195
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_bool2float)
196
+ typedef float result_type;
197
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const float operator() (const int32_t& a) const { return a ? 1.f : 0.f; }
198
+
199
+ template<typename Packet>
200
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename to_float_packet<Packet>::type packetOp(const Packet& a) const
201
+ {
202
+ return p_bool2float(a);
203
+ }
204
+ };
205
+
206
+ template<>
207
+ struct functor_traits<scalar_bool2float>
208
+ {
209
+ enum { Cost = NumTraits<float>::AddCost, PacketAccess = 1 };
210
+ };
211
+
212
+ template<typename ArgType>
213
+ struct unary_evaluator<CwiseUnaryOp<scalar_bool2float, ArgType>, IndexBased >
214
+ : evaluator_base<CwiseUnaryOp<scalar_bool2float, ArgType> >
215
+ {
216
+ typedef CwiseUnaryOp<scalar_bool2float, ArgType> XprType;
217
+
218
+ enum {
219
+ CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<scalar_bool2float>::Cost,
220
+
221
+ Flags = evaluator<ArgType>::Flags
222
+ & (HereditaryBits | LinearAccessBit | (functor_traits<scalar_bool2float>::PacketAccess ? PacketAccessBit : 0)),
223
+ Alignment = evaluator<ArgType>::Alignment
224
+ };
225
+
226
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
227
+ explicit unary_evaluator(const XprType& op)
228
+ : m_functor(op.functor()),
229
+ m_argImpl(op.nestedExpression())
230
+ {
231
+ EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<float>::AddCost);
232
+ EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
233
+ }
234
+
235
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
236
+
237
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
238
+ CoeffReturnType coeff(Index row, Index col) const
239
+ {
240
+ return m_functor(m_argImpl.coeff(row, col));
241
+ }
242
+
243
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
244
+ CoeffReturnType coeff(Index index) const
245
+ {
246
+ return m_functor(m_argImpl.coeff(index));
247
+ }
248
+
249
+ template<int LoadMode, typename PacketType>
250
+ EIGEN_STRONG_INLINE
251
+ PacketType packet(Index row, Index col) const
252
+ {
253
+ return m_functor.packetOp(m_argImpl.template packet<LoadMode, typename to_int_packet<PacketType>::type>(row, col));
254
+ }
255
+
256
+ template<int LoadMode, typename PacketType>
257
+ EIGEN_STRONG_INLINE
258
+ PacketType packet(Index index) const
259
+ {
260
+ return m_functor.packetOp(m_argImpl.template packet<LoadMode, typename to_int_packet<PacketType>::type>(index));
261
+ }
262
+
263
+ protected:
264
+ const scalar_bool2float m_functor;
265
+ evaluator<ArgType> m_argImpl;
266
+ };
267
+
159
268
  }
160
269
 
161
270
  template <typename Derived, typename T> EIGEN_DEVICE_FUNC inline
@@ -178,4 +287,14 @@ namespace Eigen
178
287
  y.derived()
179
288
  );
180
289
  }
290
+
291
+
292
+ template<typename Derived>
293
+ inline const CwiseUnaryOp<internal::scalar_bool2float, const Derived>
294
+ bool2float(const Eigen::ArrayBase<Derived>& x)
295
+ {
296
+ return CwiseUnaryOp<internal::scalar_bool2float, const Derived>(
297
+ x.derived()
298
+ );
299
+ }
181
300
  }
@@ -0,0 +1,181 @@
1
+ #pragma once
2
+
3
+ #include <string>
4
+
5
+ namespace tomoto
6
+ {
7
+ class SharedString
8
+ {
9
+ const char* ptr = nullptr;
10
+ size_t len = 0;
11
+
12
+ void incref()
13
+ {
14
+ if (ptr)
15
+ {
16
+ ++*(size_t*)ptr;
17
+ }
18
+ }
19
+
20
+ void decref()
21
+ {
22
+ if (ptr)
23
+ {
24
+ if (--*(size_t*)ptr == 0)
25
+ {
26
+ delete[] ptr;
27
+ ptr = nullptr;
28
+ }
29
+ }
30
+ }
31
+
32
+ void init(const char* _begin, const char* _end)
33
+ {
34
+ ptr = new char[_end - _begin + 9];
35
+ *(size_t*)ptr = 1;
36
+ len = _end - _begin;
37
+ std::memcpy((void*)(ptr + 8), _begin, _end - _begin);
38
+ ((char*)ptr)[_end - _begin + 8] = 0;
39
+ }
40
+
41
+ public:
42
+
43
+ SharedString()
44
+ {
45
+ }
46
+
47
+ explicit SharedString(const char* _begin, const char* _end)
48
+ {
49
+ init(_begin, _end);
50
+ }
51
+
52
+ explicit SharedString(const char* _ptr)
53
+ {
54
+ if (_ptr)
55
+ {
56
+ init(_ptr, _ptr + std::strlen(_ptr));
57
+ }
58
+ }
59
+
60
+ explicit SharedString(const std::string& str)
61
+ {
62
+ if (!str.empty())
63
+ {
64
+ init(str.data(), str.data() + str.size());
65
+ }
66
+ }
67
+
68
+ SharedString(const SharedString& o) noexcept
69
+ : ptr{ o.ptr }, len{ o.len }
70
+ {
71
+ incref();
72
+ }
73
+
74
+ SharedString(SharedString&& o) noexcept
75
+ {
76
+ std::swap(ptr, o.ptr);
77
+ std::swap(len, o.len);
78
+ }
79
+
80
+ ~SharedString()
81
+ {
82
+ decref();
83
+ }
84
+
85
+ SharedString& operator=(const SharedString& o)
86
+ {
87
+ if (this != &o)
88
+ {
89
+ decref();
90
+ ptr = o.ptr;
91
+ len = o.len;
92
+ incref();
93
+ }
94
+ return *this;
95
+ }
96
+
97
+ SharedString& operator=(SharedString&& o) noexcept
98
+ {
99
+ std::swap(ptr, o.ptr);
100
+ std::swap(len, o.len);
101
+ return *this;
102
+ }
103
+
104
+ size_t size() const
105
+ {
106
+ if (ptr) return len;
107
+ return 0;
108
+ }
109
+
110
+ bool empty() const
111
+ {
112
+ return ptr == nullptr;
113
+ }
114
+
115
+ operator std::string() const
116
+ {
117
+ if (!ptr) return {};
118
+ return { ptr + 8, ptr + 8 + len };
119
+ }
120
+
121
+ const char* c_str() const
122
+ {
123
+ if (!ptr) return "";
124
+ return ptr + 8;
125
+ }
126
+
127
+ const char* data() const
128
+ {
129
+ return c_str();
130
+ }
131
+
132
+ const char* begin() const
133
+ {
134
+ return data();
135
+ }
136
+
137
+ const char* end() const
138
+ {
139
+ return data() + size();
140
+ }
141
+
142
+ std::string substr(size_t start, size_t len) const
143
+ {
144
+ return { c_str() + start, c_str() + start + len };
145
+ }
146
+
147
+ bool operator==(const SharedString& o) const
148
+ {
149
+ if (ptr == o.ptr) return true;
150
+ if (size() != o.size()) return false;
151
+ return std::equal(begin(), end(), o.begin());
152
+ }
153
+
154
+ bool operator==(const std::string& o) const
155
+ {
156
+ if (size() != o.size()) return false;
157
+ return std::equal(begin(), end(), o.begin());
158
+ }
159
+
160
+ bool operator!=(const SharedString& o) const
161
+ {
162
+ return !operator==(o);
163
+ }
164
+
165
+ bool operator!=(const std::string& o) const
166
+ {
167
+ return !operator==(o);
168
+ }
169
+ };
170
+ }
171
+
172
+ namespace std
173
+ {
174
+ template <> struct hash<tomoto::SharedString>
175
+ {
176
+ size_t operator()(const tomoto::SharedString& x) const
177
+ {
178
+ return hash<string>{}(x);
179
+ }
180
+ };
181
+ }
@@ -154,7 +154,7 @@ namespace tomoto
154
154
  inline float digammaT(float x) { return detail::LUT_digamma::get(x); }
155
155
 
156
156
  template<class _T>
157
- inline _T lgammaApprox(_T z)
157
+ inline auto lgammaApprox(_T z) -> decltype((z + 2.5)* log(z + 3) - (z + 3) + 0.91893853 + 1. / 12. / (z + 3) - log(z * (z + 1) * (z + 2)))
158
158
  {
159
159
  // approximation : lgamma(z) ~= (z+2.5)ln(z+3) - z - 3 + 0.5 ln (2pi) + 1/12/(z + 3) - ln (z(z+1)(z+2))
160
160
  return (z + 2.5) * log(z + 3) - (z + 3) + 0.91893853 + 1. / 12. / (z + 3) - log(z * (z + 1) * (z + 2));
@@ -77,7 +77,7 @@ namespace tomoto
77
77
  offset = _mm_shuffle_ps(out, out, _MM_SHUFFLE(3, 3, 3, 3));
78
78
  }
79
79
  if (!n4) n4 = 1;
80
- for (size_t i = n4; i < n; ++i)
80
+ for (int i = n4; i < n; ++i)
81
81
  {
82
82
  arr[i] += arr[i - 1];
83
83
  }
@@ -8,6 +8,7 @@
8
8
  #include <vector>
9
9
  #include "tvector.hpp"
10
10
  #include "text.hpp"
11
+ #include "SharedString.hpp"
11
12
 
12
13
  /*
13
14
 
@@ -435,6 +436,22 @@ namespace tomoto
435
436
  throw std::ios_base::failure( std::string("reading type '") + typeid(_Ty).name() + std::string("' is failed") );
436
437
  }
437
438
 
439
+ inline void writeToBinStreamImpl(std::ostream& ostr, const SharedString& v)
440
+ {
441
+ writeToStream<uint32_t>(ostr, (uint32_t)v.size());
442
+ if (!ostr.write((const char*)v.data(), v.size()))
443
+ throw std::ios_base::failure(std::string("writing type 'SharedString' is failed"));
444
+ }
445
+
446
+ inline void readFromBinStreamImpl(std::istream& istr, SharedString& v)
447
+ {
448
+ uint32_t size = readFromStream<uint32_t>(istr);
449
+ std::vector<char> t(size);
450
+ if (!istr.read((char*)t.data(), t.size()))
451
+ throw std::ios_base::failure(std::string("reading type 'SharedString' is failed"));
452
+ v = SharedString{ t.data(), t.data() + t.size() };
453
+ }
454
+
438
455
  template<class _Ty>
439
456
  inline typename std::enable_if<std::is_abstract<_Ty>::value>::type writeToBinStreamImpl(std::ostream& ostr, const std::unique_ptr<_Ty>& v)
440
457
  {