tomoto 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +1 -1
  4. data/ext/tomoto/extconf.rb +4 -2
  5. data/lib/tomoto/version.rb +1 -1
  6. data/vendor/tomotopy/README.kr.rst +10 -1
  7. data/vendor/tomotopy/README.rst +10 -1
  8. data/vendor/tomotopy/src/TopicModel/CT.h +2 -2
  9. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +5 -0
  10. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +1 -0
  11. data/vendor/tomotopy/src/TopicModel/DMR.h +2 -2
  12. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +5 -0
  13. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +1 -0
  14. data/vendor/tomotopy/src/TopicModel/DT.h +2 -2
  15. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +5 -0
  16. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +3 -0
  17. data/vendor/tomotopy/src/TopicModel/GDMR.h +2 -2
  18. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +5 -0
  19. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +1 -0
  20. data/vendor/tomotopy/src/TopicModel/HDP.h +2 -2
  21. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +5 -0
  22. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +2 -0
  23. data/vendor/tomotopy/src/TopicModel/HLDA.h +2 -2
  24. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +5 -0
  25. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +9 -0
  26. data/vendor/tomotopy/src/TopicModel/HPA.h +2 -2
  27. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +5 -0
  28. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +2 -0
  29. data/vendor/tomotopy/src/TopicModel/LDA.h +8 -2
  30. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +5 -0
  31. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +8 -0
  32. data/vendor/tomotopy/src/TopicModel/LLDA.h +2 -2
  33. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +5 -0
  34. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +1 -0
  35. data/vendor/tomotopy/src/TopicModel/MGLDA.h +2 -2
  36. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +5 -0
  37. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +1 -0
  38. data/vendor/tomotopy/src/TopicModel/PA.h +2 -2
  39. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +5 -0
  40. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +2 -0
  41. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +1 -0
  42. data/vendor/tomotopy/src/TopicModel/PT.h +3 -3
  43. data/vendor/tomotopy/src/TopicModel/PTModel.cpp +5 -0
  44. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +1 -0
  45. data/vendor/tomotopy/src/TopicModel/SLDA.h +3 -2
  46. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +5 -0
  47. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +1 -0
  48. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +77 -3
  49. data/vendor/tomotopy/src/Utils/Dictionary.cpp +102 -0
  50. data/vendor/tomotopy/src/Utils/Dictionary.h +26 -75
  51. data/vendor/tomotopy/src/Utils/Mmap.cpp +146 -0
  52. data/vendor/tomotopy/src/Utils/Mmap.h +139 -0
  53. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -0
  54. data/vendor/tomotopy/src/Utils/SharedString.cpp +134 -0
  55. data/vendor/tomotopy/src/Utils/SharedString.h +104 -0
  56. data/vendor/tomotopy/src/Utils/serializer.cpp +166 -0
  57. data/vendor/tomotopy/src/Utils/serializer.hpp +261 -85
  58. metadata +9 -4
  59. data/vendor/tomotopy/src/Utils/SharedString.hpp +0 -206
@@ -58,6 +58,7 @@ namespace tomoto
58
58
  }
59
59
 
60
60
  DEFINE_SERIALIZER_CALLBACK(onRead, mean, cov);
61
+ DEFINE_HASHER(mean, cov);
61
62
  private:
62
63
  void onRead()
63
64
  {
@@ -0,0 +1,134 @@
1
+ #include "SharedString.h"
2
+
3
+ namespace tomoto
4
+ {
5
+ void SharedString::incref()
6
+ {
7
+ if (ptr)
8
+ {
9
+ ++*(size_t*)ptr;
10
+ }
11
+ }
12
+
13
+ void SharedString::decref()
14
+ {
15
+ if (ptr)
16
+ {
17
+ if (--*(size_t*)ptr == 0)
18
+ {
19
+ delete[] ptr;
20
+ ptr = nullptr;
21
+ }
22
+ }
23
+ }
24
+
25
+ void SharedString::init(const char* _begin, const char* _end)
26
+ {
27
+ ptr = new char[_end - _begin + 9];
28
+ *(size_t*)ptr = 1;
29
+ len = _end - _begin;
30
+ std::memcpy((void*)(ptr + 8), _begin, _end - _begin);
31
+ ((char*)ptr)[_end - _begin + 8] = 0;
32
+ }
33
+
34
+ SharedString::SharedString()
35
+ {
36
+ }
37
+
38
+ SharedString::SharedString(const char* _begin, const char* _end)
39
+ {
40
+ init(_begin, _end);
41
+ }
42
+
43
+ SharedString::SharedString(const char* _ptr)
44
+ {
45
+ if (_ptr)
46
+ {
47
+ init(_ptr, _ptr + std::strlen(_ptr));
48
+ }
49
+ }
50
+
51
+ SharedString::SharedString(const std::string& str)
52
+ {
53
+ if (!str.empty())
54
+ {
55
+ init(str.data(), str.data() + str.size());
56
+ }
57
+ }
58
+
59
+ SharedString::SharedString(const SharedString& o) noexcept
60
+ : ptr{ o.ptr }, len{ o.len }
61
+ {
62
+ incref();
63
+ }
64
+
65
+ SharedString::SharedString(SharedString&& o) noexcept
66
+ {
67
+ std::swap(ptr, o.ptr);
68
+ std::swap(len, o.len);
69
+ }
70
+
71
+ SharedString::~SharedString()
72
+ {
73
+ decref();
74
+ }
75
+
76
+ SharedString& SharedString::operator=(const SharedString& o)
77
+ {
78
+ if (this != &o)
79
+ {
80
+ decref();
81
+ ptr = o.ptr;
82
+ len = o.len;
83
+ incref();
84
+ }
85
+ return *this;
86
+ }
87
+
88
+ SharedString& SharedString::operator=(SharedString&& o) noexcept
89
+ {
90
+ std::swap(ptr, o.ptr);
91
+ std::swap(len, o.len);
92
+ return *this;
93
+ }
94
+
95
+ SharedString::operator std::string() const
96
+ {
97
+ if (!ptr) return {};
98
+ return { ptr + 8, ptr + 8 + len };
99
+ }
100
+
101
+ const char* SharedString::c_str() const
102
+ {
103
+ if (!ptr) return "";
104
+ return ptr + 8;
105
+ }
106
+
107
+ std::string SharedString::substr(size_t start, size_t len) const
108
+ {
109
+ return { c_str() + start, c_str() + start + len };
110
+ }
111
+
112
+ bool SharedString::operator==(const SharedString& o) const
113
+ {
114
+ if (ptr == o.ptr) return true;
115
+ if (size() != o.size()) return false;
116
+ return std::equal(begin(), end(), o.begin());
117
+ }
118
+
119
+ bool SharedString::operator==(const std::string& o) const
120
+ {
121
+ if (size() != o.size()) return false;
122
+ return std::equal(begin(), end(), o.begin());
123
+ }
124
+
125
+ bool SharedString::operator!=(const SharedString& o) const
126
+ {
127
+ return !operator==(o);
128
+ }
129
+
130
+ bool SharedString::operator!=(const std::string& o) const
131
+ {
132
+ return !operator==(o);
133
+ }
134
+ }
@@ -0,0 +1,104 @@
1
+ #pragma once
2
+
3
+ #include <string>
4
+ #include "serializer.hpp"
5
+
6
+ namespace tomoto
7
+ {
8
+ class SharedString
9
+ {
10
+ const char* ptr = nullptr;
11
+ size_t len = 0;
12
+
13
+ void incref();
14
+
15
+ void decref();
16
+
17
+ void init(const char* _begin, const char* _end);
18
+
19
+ public:
20
+
21
+ SharedString();
22
+ explicit SharedString(const char* _begin, const char* _end);
23
+ explicit SharedString(const char* _ptr);
24
+ explicit SharedString(const std::string& str);
25
+ SharedString(const SharedString& o) noexcept;
26
+ SharedString(SharedString&& o) noexcept;
27
+ ~SharedString();
28
+ SharedString& operator=(const SharedString& o);
29
+ SharedString& operator=(SharedString&& o) noexcept;
30
+
31
+ size_t size() const
32
+ {
33
+ if (ptr) return len;
34
+ return 0;
35
+ }
36
+
37
+ bool empty() const
38
+ {
39
+ return size() == 0;
40
+ }
41
+
42
+ operator std::string() const;
43
+
44
+ const char* c_str() const;
45
+
46
+ const char* data() const
47
+ {
48
+ return c_str();
49
+ }
50
+
51
+ const char* begin() const
52
+ {
53
+ return data();
54
+ }
55
+
56
+ const char* end() const
57
+ {
58
+ return data() + size();
59
+ }
60
+
61
+ std::string substr(size_t start, size_t len) const;
62
+
63
+ bool operator==(const SharedString& o) const;
64
+ bool operator==(const std::string& o) const;
65
+
66
+ bool operator!=(const SharedString& o) const;
67
+ bool operator!=(const std::string& o) const;
68
+ };
69
+
70
+ namespace serializer
71
+ {
72
+ template<>
73
+ struct Serializer<SharedString>
74
+ {
75
+ using VTy = SharedString;
76
+ void write(std::ostream& ostr, const VTy& v)
77
+ {
78
+ writeToStream(ostr, (uint32_t)v.size());
79
+ if (!ostr.write((const char*)v.data(), v.size()))
80
+ throw std::ios_base::failure(std::string("writing type 'SharedString' is failed"));
81
+ }
82
+
83
+ void read(std::istream& istr, VTy& v)
84
+ {
85
+ auto size = readFromStream<uint32_t>(istr);
86
+ std::vector<char> t(size);
87
+ if (!istr.read((char*)t.data(), t.size()))
88
+ throw std::ios_base::failure(std::string("reading type 'SharedString' is failed"));
89
+ v = SharedString{ t.data(), t.data() + t.size() };
90
+ }
91
+ };
92
+ }
93
+ }
94
+
95
+ namespace std
96
+ {
97
+ template <> struct hash<tomoto::SharedString>
98
+ {
99
+ size_t operator()(const tomoto::SharedString& x) const
100
+ {
101
+ return hash<string>{}(x);
102
+ }
103
+ };
104
+ }
@@ -0,0 +1,166 @@
1
+ #include "serializer.hpp"
2
+
3
+ namespace tomoto
4
+ {
5
+ namespace serializer
6
+ {
7
+ membuf::membuf(bool read, bool write, char* base, std::ptrdiff_t n)
8
+ {
9
+ if (read)
10
+ {
11
+ this->setg(base, base, base + n);
12
+ }
13
+
14
+ if (write)
15
+ {
16
+ this->setp(base, base + n);
17
+ }
18
+ }
19
+
20
+ membuf::~membuf() = default;
21
+
22
+ std::streampos membuf::seekpos(pos_type sp, std::ios_base::openmode which)
23
+ {
24
+ return seekoff(sp - pos_type(off_type(0)), std::ios_base::beg, which);
25
+ }
26
+
27
+ std::streampos membuf::seekoff(off_type off,
28
+ std::ios_base::seekdir dir,
29
+ std::ios_base::openmode which
30
+ )
31
+ {
32
+ if (which & std::ios_base::in)
33
+ {
34
+ if (dir == std::ios_base::cur)
35
+ gbump(off);
36
+ else if (dir == std::ios_base::end)
37
+ setg(eback(), egptr() + off, egptr());
38
+ else if (dir == std::ios_base::beg)
39
+ setg(eback(), eback() + off, egptr());
40
+ }
41
+ if (which & std::ios_base::out)
42
+ {
43
+ if (dir == std::ios_base::cur)
44
+ pbump(off);
45
+ else if (dir == std::ios_base::end)
46
+ setp(epptr() + off, epptr());
47
+ else if (dir == std::ios_base::beg)
48
+ setp(pbase() + off, epptr());
49
+
50
+ if (!(which & std::ios_base::in))
51
+ {
52
+ return pptr() - pbase();
53
+ }
54
+ }
55
+ return gptr() - eback();
56
+ }
57
+
58
+ imstream::imstream(const char* base, std::ptrdiff_t n)
59
+ : std::istream(&buf), buf(true, false, (char*)base, n)
60
+ {
61
+ }
62
+
63
+ imstream::~imstream() = default;
64
+
65
+ omstream::omstream(char* base, std::ptrdiff_t n)
66
+ : std::ostream(&buf), buf(false, true, (char*)base, n)
67
+ {
68
+ }
69
+
70
+ omstream::~omstream() = default;
71
+
72
+
73
+ BlockStreamBuffer::BlockStreamBuffer(size_t _block_size) : block_size{ _block_size }
74
+ {
75
+ buffers.emplace_back(std::make_unique<uint8_t[]>(block_size));
76
+ this->setp((char*)buffers.back().get(), (char*)buffers.back().get() + block_size);
77
+ }
78
+
79
+ BlockStreamBuffer::~BlockStreamBuffer() = default;
80
+
81
+ int BlockStreamBuffer::overflow(int c)
82
+ {
83
+ if (this->pptr() == this->epptr())
84
+ {
85
+ buffers.emplace_back(std::make_unique<uint8_t[]>(block_size));
86
+ this->setp((char*)buffers.back().get(), (char*)buffers.back().get() + block_size);
87
+ }
88
+ else
89
+ {
90
+ *(this->pptr()) = c;
91
+ this->pbump(1);
92
+ }
93
+ return c;
94
+ }
95
+
96
+ std::streamsize BlockStreamBuffer::xsputn(const char* s, std::streamsize n)
97
+ {
98
+ auto rest = n;
99
+ auto buf_remain = this->epptr() - this->pptr();
100
+ while (rest > buf_remain)
101
+ {
102
+ std::copy(s, s + buf_remain, this->pptr());
103
+ this->pbump(buf_remain);
104
+ buffers.emplace_back(std::make_unique<uint8_t[]>(block_size));
105
+ this->setp((char*)buffers.back().get(), (char*)buffers.back().get() + block_size);
106
+ rest -= buf_remain;
107
+ s += buf_remain;
108
+ buf_remain = block_size;
109
+ }
110
+ std::copy(s, s + rest, this->pptr());
111
+ this->pbump(rest);
112
+ return n;
113
+ }
114
+
115
+ size_t BlockStreamBuffer::totalSize() const
116
+ {
117
+ return (buffers.size() - 1) * block_size + (this->pptr() - this->pbase());
118
+ }
119
+
120
+ TaggedDataMap readTaggedDataMap(std::istream& istr, uint32_t version)
121
+ {
122
+ std::unordered_map<std::string, std::pair<std::streampos, std::streampos>> ret;
123
+ TaggedDataHeader h;
124
+ do
125
+ {
126
+ istr.read((char*)&h, sizeof(h));
127
+ if (h.key != taggedDataKeyUint)
128
+ {
129
+ throw UnfitException("tagged data key is not found");
130
+ }
131
+ const std::streampos totsize_pos = istr.tellg() - (std::streamoff)16;
132
+ std::array<char, 256> key;
133
+ istr.read(key.data(), h.keysize);
134
+ const std::streampos start_pos = istr.tellg();
135
+ const std::streampos end_pos = totsize_pos + (std::streamoff)h.totsize;
136
+ ret.emplace(std::string{ key.data(), h.keysize }, std::make_pair(start_pos, end_pos));
137
+ ret[""] = std::make_pair(start_pos, end_pos);
138
+ istr.seekg(end_pos);
139
+ } while (h.trailing_cnt);
140
+ return ret;
141
+ }
142
+
143
+ uint64_t computeFastHash(const void* data, size_t size, uint64_t seed)
144
+ {
145
+ for (size_t i = 0; i < size / 4; ++i)
146
+ {
147
+ uint32_t x = ((const uint32_t*)data)[i];
148
+ x = ((x >> 16) ^ x) * 0x45d9f3b;
149
+ x = ((x >> 16) ^ x) * 0x45d9f3b;
150
+ x = (x >> 16) ^ x;
151
+ seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2);
152
+ }
153
+
154
+ if (size % 4)
155
+ {
156
+ uint32_t x = 0;
157
+ memcpy(&x, (const char*)data + (size / 4) * 4, size % 4);
158
+ x = ((x >> 16) ^ x) * 0x45d9f3b;
159
+ x = ((x >> 16) ^ x) * 0x45d9f3b;
160
+ x = (x >> 16) ^ x;
161
+ seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2);
162
+ }
163
+ return seed;
164
+ }
165
+ }
166
+ }