tomoto 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +1 -1
  4. data/ext/tomoto/extconf.rb +4 -2
  5. data/lib/tomoto/version.rb +1 -1
  6. data/vendor/tomotopy/README.kr.rst +10 -1
  7. data/vendor/tomotopy/README.rst +10 -1
  8. data/vendor/tomotopy/src/TopicModel/CT.h +2 -2
  9. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +5 -0
  10. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +1 -0
  11. data/vendor/tomotopy/src/TopicModel/DMR.h +2 -2
  12. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +5 -0
  13. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +1 -0
  14. data/vendor/tomotopy/src/TopicModel/DT.h +2 -2
  15. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +5 -0
  16. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +3 -0
  17. data/vendor/tomotopy/src/TopicModel/GDMR.h +2 -2
  18. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +5 -0
  19. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +1 -0
  20. data/vendor/tomotopy/src/TopicModel/HDP.h +2 -2
  21. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +5 -0
  22. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +2 -0
  23. data/vendor/tomotopy/src/TopicModel/HLDA.h +2 -2
  24. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +5 -0
  25. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +9 -0
  26. data/vendor/tomotopy/src/TopicModel/HPA.h +2 -2
  27. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +5 -0
  28. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +2 -0
  29. data/vendor/tomotopy/src/TopicModel/LDA.h +8 -2
  30. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +5 -0
  31. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +8 -0
  32. data/vendor/tomotopy/src/TopicModel/LLDA.h +2 -2
  33. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +5 -0
  34. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +1 -0
  35. data/vendor/tomotopy/src/TopicModel/MGLDA.h +2 -2
  36. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +5 -0
  37. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +1 -0
  38. data/vendor/tomotopy/src/TopicModel/PA.h +2 -2
  39. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +5 -0
  40. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +2 -0
  41. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +1 -0
  42. data/vendor/tomotopy/src/TopicModel/PT.h +3 -3
  43. data/vendor/tomotopy/src/TopicModel/PTModel.cpp +5 -0
  44. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +1 -0
  45. data/vendor/tomotopy/src/TopicModel/SLDA.h +3 -2
  46. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +5 -0
  47. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +1 -0
  48. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +77 -3
  49. data/vendor/tomotopy/src/Utils/Dictionary.cpp +102 -0
  50. data/vendor/tomotopy/src/Utils/Dictionary.h +26 -75
  51. data/vendor/tomotopy/src/Utils/Mmap.cpp +146 -0
  52. data/vendor/tomotopy/src/Utils/Mmap.h +139 -0
  53. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -0
  54. data/vendor/tomotopy/src/Utils/SharedString.cpp +134 -0
  55. data/vendor/tomotopy/src/Utils/SharedString.h +104 -0
  56. data/vendor/tomotopy/src/Utils/serializer.cpp +166 -0
  57. data/vendor/tomotopy/src/Utils/serializer.hpp +261 -85
  58. metadata +9 -4
  59. data/vendor/tomotopy/src/Utils/SharedString.hpp +0 -206
@@ -58,6 +58,7 @@ namespace tomoto
58
58
  }
59
59
 
60
60
  DEFINE_SERIALIZER_CALLBACK(onRead, mean, cov);
61
+ DEFINE_HASHER(mean, cov);
61
62
  private:
62
63
  void onRead()
63
64
  {
@@ -0,0 +1,134 @@
1
+ #include "SharedString.h"
2
+
3
+ namespace tomoto
4
+ {
5
+ void SharedString::incref()
6
+ {
7
+ if (ptr)
8
+ {
9
+ ++*(size_t*)ptr;
10
+ }
11
+ }
12
+
13
+ void SharedString::decref()
14
+ {
15
+ if (ptr)
16
+ {
17
+ if (--*(size_t*)ptr == 0)
18
+ {
19
+ delete[] ptr;
20
+ ptr = nullptr;
21
+ }
22
+ }
23
+ }
24
+
25
+ void SharedString::init(const char* _begin, const char* _end)
26
+ {
27
+ ptr = new char[_end - _begin + 9];
28
+ *(size_t*)ptr = 1;
29
+ len = _end - _begin;
30
+ std::memcpy((void*)(ptr + 8), _begin, _end - _begin);
31
+ ((char*)ptr)[_end - _begin + 8] = 0;
32
+ }
33
+
34
+ SharedString::SharedString()
35
+ {
36
+ }
37
+
38
+ SharedString::SharedString(const char* _begin, const char* _end)
39
+ {
40
+ init(_begin, _end);
41
+ }
42
+
43
+ SharedString::SharedString(const char* _ptr)
44
+ {
45
+ if (_ptr)
46
+ {
47
+ init(_ptr, _ptr + std::strlen(_ptr));
48
+ }
49
+ }
50
+
51
+ SharedString::SharedString(const std::string& str)
52
+ {
53
+ if (!str.empty())
54
+ {
55
+ init(str.data(), str.data() + str.size());
56
+ }
57
+ }
58
+
59
+ SharedString::SharedString(const SharedString& o) noexcept
60
+ : ptr{ o.ptr }, len{ o.len }
61
+ {
62
+ incref();
63
+ }
64
+
65
+ SharedString::SharedString(SharedString&& o) noexcept
66
+ {
67
+ std::swap(ptr, o.ptr);
68
+ std::swap(len, o.len);
69
+ }
70
+
71
+ SharedString::~SharedString()
72
+ {
73
+ decref();
74
+ }
75
+
76
+ SharedString& SharedString::operator=(const SharedString& o)
77
+ {
78
+ if (this != &o)
79
+ {
80
+ decref();
81
+ ptr = o.ptr;
82
+ len = o.len;
83
+ incref();
84
+ }
85
+ return *this;
86
+ }
87
+
88
+ SharedString& SharedString::operator=(SharedString&& o) noexcept
89
+ {
90
+ std::swap(ptr, o.ptr);
91
+ std::swap(len, o.len);
92
+ return *this;
93
+ }
94
+
95
+ SharedString::operator std::string() const
96
+ {
97
+ if (!ptr) return {};
98
+ return { ptr + 8, ptr + 8 + len };
99
+ }
100
+
101
+ const char* SharedString::c_str() const
102
+ {
103
+ if (!ptr) return "";
104
+ return ptr + 8;
105
+ }
106
+
107
+ std::string SharedString::substr(size_t start, size_t len) const
108
+ {
109
+ return { c_str() + start, c_str() + start + len };
110
+ }
111
+
112
+ bool SharedString::operator==(const SharedString& o) const
113
+ {
114
+ if (ptr == o.ptr) return true;
115
+ if (size() != o.size()) return false;
116
+ return std::equal(begin(), end(), o.begin());
117
+ }
118
+
119
+ bool SharedString::operator==(const std::string& o) const
120
+ {
121
+ if (size() != o.size()) return false;
122
+ return std::equal(begin(), end(), o.begin());
123
+ }
124
+
125
+ bool SharedString::operator!=(const SharedString& o) const
126
+ {
127
+ return !operator==(o);
128
+ }
129
+
130
+ bool SharedString::operator!=(const std::string& o) const
131
+ {
132
+ return !operator==(o);
133
+ }
134
+ }
@@ -0,0 +1,104 @@
1
+ #pragma once
2
+
3
+ #include <string>
4
+ #include "serializer.hpp"
5
+
6
+ namespace tomoto
7
+ {
8
+ class SharedString
9
+ {
10
+ const char* ptr = nullptr;
11
+ size_t len = 0;
12
+
13
+ void incref();
14
+
15
+ void decref();
16
+
17
+ void init(const char* _begin, const char* _end);
18
+
19
+ public:
20
+
21
+ SharedString();
22
+ explicit SharedString(const char* _begin, const char* _end);
23
+ explicit SharedString(const char* _ptr);
24
+ explicit SharedString(const std::string& str);
25
+ SharedString(const SharedString& o) noexcept;
26
+ SharedString(SharedString&& o) noexcept;
27
+ ~SharedString();
28
+ SharedString& operator=(const SharedString& o);
29
+ SharedString& operator=(SharedString&& o) noexcept;
30
+
31
+ size_t size() const
32
+ {
33
+ if (ptr) return len;
34
+ return 0;
35
+ }
36
+
37
+ bool empty() const
38
+ {
39
+ return size() == 0;
40
+ }
41
+
42
+ operator std::string() const;
43
+
44
+ const char* c_str() const;
45
+
46
+ const char* data() const
47
+ {
48
+ return c_str();
49
+ }
50
+
51
+ const char* begin() const
52
+ {
53
+ return data();
54
+ }
55
+
56
+ const char* end() const
57
+ {
58
+ return data() + size();
59
+ }
60
+
61
+ std::string substr(size_t start, size_t len) const;
62
+
63
+ bool operator==(const SharedString& o) const;
64
+ bool operator==(const std::string& o) const;
65
+
66
+ bool operator!=(const SharedString& o) const;
67
+ bool operator!=(const std::string& o) const;
68
+ };
69
+
70
+ namespace serializer
71
+ {
72
+ template<>
73
+ struct Serializer<SharedString>
74
+ {
75
+ using VTy = SharedString;
76
+ void write(std::ostream& ostr, const VTy& v)
77
+ {
78
+ writeToStream(ostr, (uint32_t)v.size());
79
+ if (!ostr.write((const char*)v.data(), v.size()))
80
+ throw std::ios_base::failure(std::string("writing type 'SharedString' is failed"));
81
+ }
82
+
83
+ void read(std::istream& istr, VTy& v)
84
+ {
85
+ auto size = readFromStream<uint32_t>(istr);
86
+ std::vector<char> t(size);
87
+ if (!istr.read((char*)t.data(), t.size()))
88
+ throw std::ios_base::failure(std::string("reading type 'SharedString' is failed"));
89
+ v = SharedString{ t.data(), t.data() + t.size() };
90
+ }
91
+ };
92
+ }
93
+ }
94
+
95
+ namespace std
96
+ {
97
+ template <> struct hash<tomoto::SharedString>
98
+ {
99
+ size_t operator()(const tomoto::SharedString& x) const
100
+ {
101
+ return hash<string>{}(x);
102
+ }
103
+ };
104
+ }
@@ -0,0 +1,166 @@
1
+ #include "serializer.hpp"
2
+
3
+ namespace tomoto
4
+ {
5
+ namespace serializer
6
+ {
7
+ membuf::membuf(bool read, bool write, char* base, std::ptrdiff_t n)
8
+ {
9
+ if (read)
10
+ {
11
+ this->setg(base, base, base + n);
12
+ }
13
+
14
+ if (write)
15
+ {
16
+ this->setp(base, base + n);
17
+ }
18
+ }
19
+
20
+ membuf::~membuf() = default;
21
+
22
+ std::streampos membuf::seekpos(pos_type sp, std::ios_base::openmode which)
23
+ {
24
+ return seekoff(sp - pos_type(off_type(0)), std::ios_base::beg, which);
25
+ }
26
+
27
+ std::streampos membuf::seekoff(off_type off,
28
+ std::ios_base::seekdir dir,
29
+ std::ios_base::openmode which
30
+ )
31
+ {
32
+ if (which & std::ios_base::in)
33
+ {
34
+ if (dir == std::ios_base::cur)
35
+ gbump(off);
36
+ else if (dir == std::ios_base::end)
37
+ setg(eback(), egptr() + off, egptr());
38
+ else if (dir == std::ios_base::beg)
39
+ setg(eback(), eback() + off, egptr());
40
+ }
41
+ if (which & std::ios_base::out)
42
+ {
43
+ if (dir == std::ios_base::cur)
44
+ pbump(off);
45
+ else if (dir == std::ios_base::end)
46
+ setp(epptr() + off, epptr());
47
+ else if (dir == std::ios_base::beg)
48
+ setp(pbase() + off, epptr());
49
+
50
+ if (!(which & std::ios_base::in))
51
+ {
52
+ return pptr() - pbase();
53
+ }
54
+ }
55
+ return gptr() - eback();
56
+ }
57
+
58
+ imstream::imstream(const char* base, std::ptrdiff_t n)
59
+ : std::istream(&buf), buf(true, false, (char*)base, n)
60
+ {
61
+ }
62
+
63
+ imstream::~imstream() = default;
64
+
65
+ omstream::omstream(char* base, std::ptrdiff_t n)
66
+ : std::ostream(&buf), buf(false, true, (char*)base, n)
67
+ {
68
+ }
69
+
70
+ omstream::~omstream() = default;
71
+
72
+
73
+ BlockStreamBuffer::BlockStreamBuffer(size_t _block_size) : block_size{ _block_size }
74
+ {
75
+ buffers.emplace_back(std::make_unique<uint8_t[]>(block_size));
76
+ this->setp((char*)buffers.back().get(), (char*)buffers.back().get() + block_size);
77
+ }
78
+
79
+ BlockStreamBuffer::~BlockStreamBuffer() = default;
80
+
81
+ int BlockStreamBuffer::overflow(int c)
82
+ {
83
+ if (this->pptr() == this->epptr())
84
+ {
85
+ buffers.emplace_back(std::make_unique<uint8_t[]>(block_size));
86
+ this->setp((char*)buffers.back().get(), (char*)buffers.back().get() + block_size);
87
+ }
88
+ else
89
+ {
90
+ *(this->pptr()) = c;
91
+ this->pbump(1);
92
+ }
93
+ return c;
94
+ }
95
+
96
+ std::streamsize BlockStreamBuffer::xsputn(const char* s, std::streamsize n)
97
+ {
98
+ auto rest = n;
99
+ auto buf_remain = this->epptr() - this->pptr();
100
+ while (rest > buf_remain)
101
+ {
102
+ std::copy(s, s + buf_remain, this->pptr());
103
+ this->pbump(buf_remain);
104
+ buffers.emplace_back(std::make_unique<uint8_t[]>(block_size));
105
+ this->setp((char*)buffers.back().get(), (char*)buffers.back().get() + block_size);
106
+ rest -= buf_remain;
107
+ s += buf_remain;
108
+ buf_remain = block_size;
109
+ }
110
+ std::copy(s, s + rest, this->pptr());
111
+ this->pbump(rest);
112
+ return n;
113
+ }
114
+
115
+ size_t BlockStreamBuffer::totalSize() const
116
+ {
117
+ return (buffers.size() - 1) * block_size + (this->pptr() - this->pbase());
118
+ }
119
+
120
+ TaggedDataMap readTaggedDataMap(std::istream& istr, uint32_t version)
121
+ {
122
+ std::unordered_map<std::string, std::pair<std::streampos, std::streampos>> ret;
123
+ TaggedDataHeader h;
124
+ do
125
+ {
126
+ istr.read((char*)&h, sizeof(h));
127
+ if (h.key != taggedDataKeyUint)
128
+ {
129
+ throw UnfitException("tagged data key is not found");
130
+ }
131
+ const std::streampos totsize_pos = istr.tellg() - (std::streamoff)16;
132
+ std::array<char, 256> key;
133
+ istr.read(key.data(), h.keysize);
134
+ const std::streampos start_pos = istr.tellg();
135
+ const std::streampos end_pos = totsize_pos + (std::streamoff)h.totsize;
136
+ ret.emplace(std::string{ key.data(), h.keysize }, std::make_pair(start_pos, end_pos));
137
+ ret[""] = std::make_pair(start_pos, end_pos);
138
+ istr.seekg(end_pos);
139
+ } while (h.trailing_cnt);
140
+ return ret;
141
+ }
142
+
143
+ uint64_t computeFastHash(const void* data, size_t size, uint64_t seed)
144
+ {
145
+ for (size_t i = 0; i < size / 4; ++i)
146
+ {
147
+ uint32_t x = ((const uint32_t*)data)[i];
148
+ x = ((x >> 16) ^ x) * 0x45d9f3b;
149
+ x = ((x >> 16) ^ x) * 0x45d9f3b;
150
+ x = (x >> 16) ^ x;
151
+ seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2);
152
+ }
153
+
154
+ if (size % 4)
155
+ {
156
+ uint32_t x = 0;
157
+ memcpy(&x, (const char*)data + (size / 4) * 4, size % 4);
158
+ x = ((x >> 16) ^ x) * 0x45d9f3b;
159
+ x = ((x >> 16) ^ x) * 0x45d9f3b;
160
+ x = (x >> 16) ^ x;
161
+ seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2);
162
+ }
163
+ return seed;
164
+ }
165
+ }
166
+ }