RubyGems - tomoto - Versions diffs - 0.4.0 → 0.4.1 - Mend

tomoto 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/README.md +1 -1
data/ext/tomoto/extconf.rb +4 -2
data/lib/tomoto/version.rb +1 -1
data/vendor/tomotopy/README.kr.rst +10 -1
data/vendor/tomotopy/README.rst +10 -1
data/vendor/tomotopy/src/TopicModel/CT.h +2 -2
data/vendor/tomotopy/src/TopicModel/CTModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/CTModel.hpp +1 -0
data/vendor/tomotopy/src/TopicModel/DMR.h +2 -2
data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +1 -0
data/vendor/tomotopy/src/TopicModel/DT.h +2 -2
data/vendor/tomotopy/src/TopicModel/DTModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/DTModel.hpp +3 -0
data/vendor/tomotopy/src/TopicModel/GDMR.h +2 -2
data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +1 -0
data/vendor/tomotopy/src/TopicModel/HDP.h +2 -2
data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +2 -0
data/vendor/tomotopy/src/TopicModel/HLDA.h +2 -2
data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +9 -0
data/vendor/tomotopy/src/TopicModel/HPA.h +2 -2
data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +2 -0
data/vendor/tomotopy/src/TopicModel/LDA.h +8 -2
data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +8 -0
data/vendor/tomotopy/src/TopicModel/LLDA.h +2 -2
data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +1 -0
data/vendor/tomotopy/src/TopicModel/MGLDA.h +2 -2
data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +1 -0
data/vendor/tomotopy/src/TopicModel/PA.h +2 -2
data/vendor/tomotopy/src/TopicModel/PAModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/PAModel.hpp +2 -0
data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +1 -0
data/vendor/tomotopy/src/TopicModel/PT.h +3 -3
data/vendor/tomotopy/src/TopicModel/PTModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/PTModel.hpp +1 -0
data/vendor/tomotopy/src/TopicModel/SLDA.h +3 -2
data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +5 -0
data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +1 -0
data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +77 -3
data/vendor/tomotopy/src/Utils/Dictionary.cpp +102 -0
data/vendor/tomotopy/src/Utils/Dictionary.h +26 -75
data/vendor/tomotopy/src/Utils/Mmap.cpp +146 -0
data/vendor/tomotopy/src/Utils/Mmap.h +139 -0
data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -0
data/vendor/tomotopy/src/Utils/SharedString.cpp +134 -0
data/vendor/tomotopy/src/Utils/SharedString.h +104 -0
data/vendor/tomotopy/src/Utils/serializer.cpp +166 -0
data/vendor/tomotopy/src/Utils/serializer.hpp +261 -85
metadata +9 -4
data/vendor/tomotopy/src/Utils/SharedString.hpp +0 -206

data/vendor/tomotopy/src/TopicModel/SLDA.h CHANGED Viewed

@@ -16,8 +16,9 @@ namespace tomoto
 			ret["y"] = y;
 			return ret;
 		}
-		DEFINE_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseDocument, 0, y);
-		DEFINE_TAGGED_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseDocument, 1, 0x00010001, y);
+		DECLARE_SERIALIZER_WITH_VERSION(0);
+		DECLARE_SERIALIZER_WITH_VERSION(1);
 	};
 	struct SLDAArgs;

data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp CHANGED Viewed

@@ -2,6 +2,11 @@
 namespace tomoto
 {
+	DEFINE_OUT_SERIALIZER_AFTER_BASE_WITH_VERSION(DocumentSLDA, BaseDocument, 0, y);
+	DEFINE_OUT_TAGGED_SERIALIZER_AFTER_BASE_WITH_VERSION(DocumentSLDA, BaseDocument, 1, 0x00010001, y);
+	TMT_INSTANTIATE_DOC(DocumentSLDA);
     ISLDAModel* ISLDAModel::create(TermWeight _weight, const SLDAArgs& args, bool scalarRng)
 	{
 		TMT_SWITCH_TW(_weight, scalarRng, SLDAModel, args);

data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp CHANGED Viewed

@@ -348,6 +348,7 @@ namespace tomoto
 	public:
 		DEFINE_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseClass, 0, F, responseVars, mu, nuSq);
 		DEFINE_TAGGED_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseClass, 1, 0x00010001, F, responseVars, mu, nuSq);
+		DEFINE_HASHER_AFTER_BASE(BaseClass, F, mu, nuSq);
 		SLDAModel(const SLDAArgs& args)
 			: BaseClass(args), F(args.vars.size()), varTypes(args.vars),

data/vendor/tomotopy/src/TopicModel/TopicModel.hpp CHANGED Viewed

@@ -1,4 +1,4 @@
-#pragma once
+#pragma once
 #include <numeric>
 #include <unordered_set>
 #include "../Utils/Utils.hpp"
@@ -7,7 +7,7 @@
 #include "../Utils/ThreadPool.hpp"
 #include "../Utils/serializer.hpp"
 #include "../Utils/exception.h"
-#include "../Utils/SharedString.hpp"
+#include "../Utils/SharedString.h"
 #include <EigenRand/EigenRand>
 #include <mapbox/variant.hpp>
@@ -107,7 +107,7 @@ namespace tomoto
 		virtual operator RawDoc() const
 		{
-			RawDoc raw{ *this };
+			RawDoc raw{ *static_cast<const RawDocKernel*>(this) };
 			if (wOrder.empty())
 			{
 				raw.words.insert(raw.words.begin(), words.begin(), words.end());
@@ -224,6 +224,8 @@ namespace tomoto
 		virtual void loadModel(std::istream& reader,
 			std::vector<uint8_t>* extra_data = nullptr) = 0;
+		virtual std::array<uint64_t, 2> getHash() const = 0;
 		virtual std::unique_ptr<ITopicModel> copy() const = 0;
 		virtual const DocumentBase* getDoc(size_t docId) const = 0;
@@ -251,6 +253,7 @@ namespace tomoto
 		virtual const std::vector<uint64_t>& getVocabCf() const = 0;
 		virtual std::vector<double> getVocabWeightedCf() const = 0;
 		virtual const std::vector<uint64_t>& getVocabDf() const = 0;
+		virtual const std::vector<std::vector<std::pair<std::string, size_t>>>& getWordFormCnts() const = 0;
 		virtual int train(size_t iteration, size_t numWorkers, ParallelScheme ps = ParallelScheme::default_, bool freeze_topics = false) = 0;
 		virtual size_t getGlobalStep() const = 0;
@@ -260,6 +263,7 @@ namespace tomoto
 		virtual size_t getNumTopicsForPrior() const = 0;
 		virtual std::vector<Float> getWidsByTopic(size_t tid, bool normalize = true) const = 0;
 		virtual std::vector<std::pair<std::string, Float>> getWordsByTopicSorted(size_t tid, size_t topN) const = 0;
+		virtual std::vector<std::tuple<std::string, Vid, Float>> getWordIdsByTopicSorted(size_t tid, size_t topN) const = 0;
 		virtual std::vector<std::pair<std::string, Float>> getWordsByDocSorted(const DocumentBase* doc, size_t topN) const = 0;
@@ -319,6 +323,7 @@ namespace tomoto
 		size_t globalStep = 0;
 		_ModelState globalState, tState;
 		Dictionary dict;
+		std::vector<std::vector<std::pair<std::string, size_t>>> wordFormCnts;
 		uint64_t realV = 0; // vocab size after removing stopwords
 		uint64_t realN = 0; // total word size after removing stopwords
 		double weightedN = 0;
@@ -565,6 +570,44 @@ namespace tomoto
 			}
 		}
+		void updateWordFormCnts()
+		{
+			wordFormCnts.clear();
+			wordFormCnts.resize(realV);
+			std::vector<std::unordered_map<std::string, size_t>> cnts(realV);
+			for (auto& doc : docs)
+			{
+				for (size_t i = 0; i < doc.words.size(); ++i)
+				{
+					auto w = doc.words[doc.wOrder.empty() ? i : doc.wOrder[i]];
+					if (w >= realV) continue;
+					auto& cnt = cnts[w];
+					std::string word;
+					if (!doc.rawStr.empty() && i < doc.origWordPos.size())
+					{
+						word = doc.rawStr.substr(doc.origWordPos[i], doc.origWordLen[i]);
+					}
+					else
+					{
+						word = dict.toWord(w);
+					}
+					++cnt[word];
+				}
+			}
+			for (size_t i = 0; i < realV; ++i)
+			{
+				auto& cnt = cnts[i];
+				std::vector<std::pair<std::string, size_t>> v{ std::make_move_iterator(cnt.begin()), std::make_move_iterator(cnt.end()) };
+				std::sort(v.begin(), v.end(), [](const std::pair<std::string, size_t>& a, const std::pair<std::string, size_t>& b)
+				{
+					return a.second > b.second;
+				});
+				wordFormCnts[i] = move(v);
+				cnt.clear();
+			}
+		}
 		int restoreFromTrainingError(const exc::TrainingError& e, ThreadPool& pool, _ModelState* localData, _RandGen* rgs)
 		{
 			throw e;
@@ -751,11 +794,26 @@ namespace tomoto
 			return ret;
 		}
+		std::vector<std::tuple<std::string, Vid, Float>> vid2StringVid(const std::vector<std::pair<Vid, Float>>& vids) const
+		{
+			std::vector<std::tuple<std::string, Vid, Float>> ret(vids.size());
+			for (size_t i = 0; i < vids.size(); ++i)
+			{
+				ret[i] = std::make_tuple(dict.toWord(vids[i].first), vids[i].first, vids[i].second);
+			}
+			return ret;
+		}
 		std::vector<std::pair<std::string, Float>> getWordsByTopicSorted(size_t tid, size_t topN) const override
 		{
 			return vid2String(getWidsByTopicSorted(tid, topN));
 		}
+		std::vector<std::tuple<std::string, Vid, Float>> getWordIdsByTopicSorted(size_t tid, size_t topN) const override
+		{
+			return vid2StringVid(getWidsByTopicSorted(tid, topN));
+		}
 		std::vector<std::pair<Vid, Float>> getWidsByDocSorted(const DocumentBase* doc, size_t topN) const
 		{
 			std::vector<Float> cnt(dict.size());
@@ -872,6 +930,11 @@ namespace tomoto
 			return vocabDf;
 		}
+		const std::vector<std::vector<std::pair<std::string, size_t>>>& getWordFormCnts() const override
+		{
+			return wordFormCnts;
+		}
 		void saveModel(std::ostream& writer, bool fullModel, const std::vector<uint8_t>* extra_data) const override
 		{
 			static_cast<const _Derived*>(this)->_saveModel(writer, fullModel, extra_data);
@@ -882,6 +945,17 @@ namespace tomoto
 			static_cast<_Derived*>(this)->_loadModel(reader, extra_data);
 			static_cast<_Derived*>(this)->prepare(false);
 		}
+		std::array<uint64_t, 2> getHash() const override
+		{
+			std::array<uint64_t, 2> ret;
+			ret[0] = dict.computeHash(0);
+			const std::string s = static_cast<const _Derived*>(this)->tmid().str() + static_cast<const _Derived*>(this)->twid().str();
+			ret[0] = serializer::computeHashMany(ret[0], s, realV, globalStep, docs.size());
+			ret[1] = globalState.computeHash(0);
+			ret[1] = static_cast<const _Derived*>(this)->computeHash(ret[1]);
+			return ret;
+		}
 	};
 }

data/vendor/tomotopy/src/Utils/Dictionary.cpp ADDED Viewed

@@ -0,0 +1,102 @@
+#include "Dictionary.h"
+namespace tomoto
+{
+    Dictionary::Dictionary() = default;
+    Dictionary::~Dictionary() = default;
+    Dictionary::Dictionary(const Dictionary&) = default;
+    Dictionary& Dictionary::operator=(const Dictionary&) = default;
+    Dictionary::Dictionary(Dictionary&&) noexcept = default;
+    Dictionary& Dictionary::operator=(Dictionary&&) noexcept = default;
+    Vid Dictionary::add(const std::string& word)
+    {
+        auto it = dict.find(word);
+        if (it == dict.end())
+        {
+            dict.emplace(word, (Vid)dict.size());
+            id2word.emplace_back(word);
+            return (Vid)(dict.size() - 1);
+        }
+        return it->second;
+    }
+    const std::string& Dictionary::toWord(Vid vid) const
+    {
+        assert(vid < id2word.size());
+        return id2word[vid];
+    }
+    Vid Dictionary::toWid(const std::string& word) const
+    {
+        auto it = dict.find(word);
+        if (it == dict.end()) return non_vocab_id;
+        return it->second;
+    }
+    void Dictionary::serializerWrite(std::ostream& writer) const
+    {
+        serializer::writeMany(writer, serializer::to_key("Dict"), id2word);
+    }
+    void Dictionary::serializerRead(std::istream& reader)
+    {
+        serializer::readMany(reader, serializer::to_key("Dict"), id2word);
+        for (size_t i = 0; i < id2word.size(); ++i)
+        {
+            dict.emplace(id2word[i], (Vid)i);
+        }
+    }
+    uint64_t Dictionary::computeHash(uint64_t seed) const
+	{
+        return serializer::computeHashMany(seed, id2word);
+	}
+    void Dictionary::swap(Dictionary& rhs)
+    {
+        std::swap(dict, rhs.dict);
+        std::swap(id2word, rhs.id2word);
+    }
+    void Dictionary::reorder(const std::vector<Vid>& order)
+    {
+        for (auto& p : dict)
+        {
+            p.second = order[p.second];
+            id2word[p.second] = p.first;
+        }
+    }
+    const std::vector<std::string>& Dictionary::getRaw() const
+    {
+        return id2word;
+    }
+    Vid Dictionary::mapToNewDict(Vid v, const Dictionary& newDict) const
+    {
+        return newDict.toWid(toWord(v));
+    }
+    std::vector<Vid> Dictionary::mapToNewDict(const std::vector<Vid>& v, const Dictionary& newDict) const
+    {
+        std::vector<Vid> r(v.size());
+        for (size_t i = 0; i < v.size(); ++i)
+        {
+            r[i] = mapToNewDict(v[i], newDict);
+        }
+        return r;
+    }
+    std::vector<Vid> Dictionary::mapToNewDictAdd(const std::vector<Vid>& v, Dictionary& newDict) const
+    {
+        std::vector<Vid> r(v.size());
+        for (size_t i = 0; i < v.size(); ++i)
+        {
+            r[i] = mapToNewDict(v[i], newDict);
+        }
+        return r;
+    }
+}

data/vendor/tomotopy/src/Utils/Dictionary.h CHANGED Viewed

@@ -12,8 +12,9 @@ namespace tomoto
 {
 	using Vid = uint32_t;
 	static constexpr Vid non_vocab_id = (Vid)-1;
+	static constexpr Vid rm_vocab_id = (Vid)-2;
 	using Tid = uint16_t;
-	static constexpr Vid non_topic_id = (Tid)-1;
+	static constexpr Tid non_topic_id = (Tid)-1;
 	using Float = float;
 	struct VidPair : public std::pair<Vid, Vid>
@@ -27,91 +28,41 @@ namespace tomoto
 		std::unordered_map<std::string, Vid> dict;
 		std::vector<std::string> id2word;
 	public:
-		Vid add(const std::string& word)
-		{
-			auto it = dict.find(word);
-			if (it == dict.end())
-			{
-				dict.emplace(word, (Vid)dict.size());
-				id2word.emplace_back(word);
-				return (Vid)(dict.size() - 1);
-			}
-			return it->second;
-		}
+		Dictionary();
+		~Dictionary();
+		Dictionary(const Dictionary&);
+		Dictionary& operator=(const Dictionary&);
+		Dictionary(Dictionary&&) noexcept;
+		Dictionary& operator=(Dictionary&&) noexcept;
+		Vid add(const std::string& word);
 		size_t size() const { return dict.size(); }
-		const std::string& toWord(Vid vid) const
-		{
-			assert(vid < id2word.size());
-			return id2word[vid];
-		}
+		const std::string& toWord(Vid vid) const;
-		Vid toWid(const std::string& word) const
-		{
-			auto it = dict.find(word);
-			if (it == dict.end()) return non_vocab_id;
-			return it->second;
-		}
+		Vid toWid(const std::string& word) const;
-		void serializerWrite(std::ostream& writer) const
-		{
-			serializer::writeMany(writer, serializer::to_key("Dict"), id2word);
-		}
+		void serializerWrite(std::ostream& writer) const;
-		void serializerRead(std::istream& reader)
-		{
-			serializer::readMany(reader, serializer::to_key("Dict"), id2word);
-			for (size_t i = 0; i < id2word.size(); ++i)
-			{
-				dict.emplace(id2word[i], (Vid)i);
-			}
-		}
+		void serializerRead(std::istream& reader);
-		void swap(Dictionary& rhs)
-		{
-			std::swap(dict, rhs.dict);
-			std::swap(id2word, rhs.id2word);
-		}
+		uint64_t computeHash(uint64_t seed) const;
-		void reorder(const std::vector<Vid>& order)
-		{
-			for (auto& p : dict)
-			{
-				p.second = order[p.second];
-				id2word[p.second] = p.first;
-			}
-		}
+		void swap(Dictionary& rhs);
-		const std::vector<std::string>& getRaw() const
-		{
-			return id2word;
-		}
+		void reorder(const std::vector<Vid>& order);
-		Vid mapToNewDict(Vid v, const Dictionary& newDict) const
-		{
-			return newDict.toWid(toWord(v));
-		}
+		const std::vector<std::string>& getRaw() const;
-		std::vector<Vid> mapToNewDict(const std::vector<Vid>& v, const Dictionary& newDict) const
-		{
-			std::vector<Vid> r(v.size());
-			for (size_t i = 0; i < v.size(); ++i)
-			{
-				r[i] = mapToNewDict(v[i], newDict);
-			}
-			return r;
-		}
+		Vid mapToNewDict(Vid v, const Dictionary& newDict) const;
-		std::vector<Vid> mapToNewDictAdd(const std::vector<Vid>& v, Dictionary& newDict) const
-		{
-			std::vector<Vid> r(v.size());
-			for (size_t i = 0; i < v.size(); ++i)
-			{
-				r[i] = mapToNewDict(v[i], newDict);
-			}
-			return r;
-		}
+		std::vector<Vid> mapToNewDict(const std::vector<Vid>& v, const Dictionary& newDict) const;
+		std::vector<Vid> mapToNewDictAdd(const std::vector<Vid>& v, Dictionary& newDict) const;
 	};
 }
@@ -126,4 +77,4 @@ namespace std
 			return hash<size_t>{}(p.first) ^ hash<size_t>{}(p.second);
 		}
 	};
-}
+}

data/vendor/tomotopy/src/Utils/Mmap.cpp ADDED Viewed

@@ -0,0 +1,146 @@
+#include <cstdint>
+#include "Mmap.h"
+namespace tomoto
+{
+	namespace utils
+	{
+		static std::u16string utf8To16(const std::string& str)
+		{
+			std::u16string ret;
+			for (auto it = str.begin(); it != str.end(); ++it)
+			{
+				uint32_t code = 0;
+				uint32_t byte = (uint8_t)*it;
+				if ((byte & 0xF8) == 0xF0)
+				{
+					code = (uint32_t)((byte & 0x07) << 18);
+					if (++it == str.end()) throw std::invalid_argument{ "unexpected ending" };
+					if (((byte = *it) & 0xC0) != 0x80) throw std::invalid_argument{ "unexpected trailing byte" };
+					code |= (uint32_t)((byte & 0x3F) << 12);
+					if (++it == str.end()) throw std::invalid_argument{ "unexpected ending" };
+					if (((byte = *it) & 0xC0) != 0x80) throw std::invalid_argument{ "unexpected trailing byte" };
+					code |= (uint32_t)((byte & 0x3F) << 6);
+					if (++it == str.end()) throw std::invalid_argument{ "unexpected ending" };
+					if (((byte = *it) & 0xC0) != 0x80) throw std::invalid_argument{ "unexpected trailing byte" };
+					code |= (byte & 0x3F);
+				}
+				else if ((byte & 0xF0) == 0xE0)
+				{
+					code = (uint32_t)((byte & 0x0F) << 12);
+					if (++it == str.end()) throw std::invalid_argument{ "unexpected ending" };
+					if (((byte = *it) & 0xC0) != 0x80) throw std::invalid_argument{ "unexpected trailing byte" };
+					code |= (uint32_t)((byte & 0x3F) << 6);
+					if (++it == str.end()) throw std::invalid_argument{ "unexpected ending" };
+					if (((byte = *it) & 0xC0) != 0x80) throw std::invalid_argument{ "unexpected trailing byte" };
+					code |= (byte & 0x3F);
+				}
+				else if ((byte & 0xE0) == 0xC0)
+				{
+					code = (uint32_t)((byte & 0x1F) << 6);
+					if (++it == str.end()) throw std::invalid_argument{ "unexpected ending" };
+					if (((byte = *it) & 0xC0) != 0x80) throw std::invalid_argument{ "unexpected trailing byte" };
+					code |= (byte & 0x3F);
+				}
+				else if ((byte & 0x80) == 0x00)
+				{
+					code = byte;
+				}
+				else
+				{
+					throw std::invalid_argument{ "unicode error" };
+				}
+				if (code < 0x10000)
+				{
+					ret.push_back((char16_t)code);
+				}
+				else if (code < 0x10FFFF)
+				{
+					code -= 0x10000;
+					ret.push_back((char16_t)(0xD800 | (code >> 10)));
+					ret.push_back((char16_t)(0xDC00 | (code & 0x3FF)));
+				}
+				else
+				{
+					throw std::invalid_argument{ "unicode error" };
+				}
+			}
+			return ret;
+		}
+	}
+}
+namespace tomoto
+{
+	namespace utils
+	{
+		MMap::MMap(const std::string& filepath)
+		{
+#ifdef _WIN32
+			hFile = CreateFileW((const wchar_t*)utf8To16(filepath).c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, nullptr);
+			if (hFile == INVALID_HANDLE_VALUE) throw std::ios_base::failure("Cannot open '" + filepath + "'");
+			hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0, 0, nullptr);
+			if (hFileMap == nullptr) throw std::ios_base::failure("Cannot open '" + filepath + "' Code:" + std::to_string(GetLastError()));
+			view = (const char*)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, 0);
+			if (!view) throw std::ios_base::failure("Cannot MapViewOfFile() Code:" + std::to_string(GetLastError()));
+			DWORD high;
+			len = GetFileSize(hFile, &high);
+			len |= (uint64_t)high << 32;
+#else
+			fd = open(filepath.c_str(), O_RDONLY);
+			if (fd == -1) throw std::ios_base::failure("Cannot open '" + filepath + "'");
+			struct stat sb;
+			if (fstat(fd, &sb) < 0) throw std::ios_base::failure("Cannot open '" + filepath + "'");
+			len = sb.st_size;
+			view = (const char*)mmap(nullptr, len, PROT_READ, MAP_PRIVATE, fd, 0);
+			if (view == MAP_FAILED) throw std::ios_base::failure("Mapping failed");
+#endif
+		}
+#ifdef _WIN32
+		MMap::MMap(MMap&& o) noexcept
+			: view{ o.view }, len{ o.len }
+		{
+			o.view = nullptr;
+			std::swap(hFile, o.hFile);
+			std::swap(hFileMap, o.hFileMap);
+		}
+#else
+		MMap::MMap(MMap&& o) noexcept
+			: len{ o.len }, fd{ std::move(o.fd) }
+		{
+			std::swap(view, o.view);
+		}
+#endif
+		MMap& MMap::operator=(MMap&& o) noexcept
+		{
+			std::swap(view, o.view);
+			std::swap(len, o.len);
+#ifdef _WIN32
+			std::swap(hFile, o.hFile);
+			std::swap(hFileMap, o.hFileMap);
+#else
+			std::swap(fd, o.fd);
+#endif
+			return *this;
+		}
+		MMap::~MMap()
+		{
+#ifdef _WIN32
+			if (hFileMap)
+			{
+				UnmapViewOfFile(view);
+				view = nullptr;
+			}
+#else
+			if (view)
+			{
+				munmap((void*)view, len);
+			}
+#endif
+		}
+	}
+}

data/vendor/tomotopy/src/Utils/Mmap.h ADDED Viewed

@@ -0,0 +1,139 @@
+#pragma once
+#include <string>
+#include <iostream>
+#ifdef _WIN32
+#define NOMINMAX
+#include <Windows.h>
+namespace tomoto
+{
+	namespace utils
+	{
+		namespace detail
+		{
+			class HandleGuard
+			{
+				HANDLE handle = nullptr;
+			public:
+				HandleGuard(HANDLE _handle = nullptr) : handle(_handle)
+				{
+				}
+				HandleGuard(const HandleGuard&) = delete;
+				HandleGuard& operator =(const HandleGuard&) = delete;
+				HandleGuard(HandleGuard&& o) noexcept
+				{
+					std::swap(handle, o.handle);
+				}
+				HandleGuard& operator=(HandleGuard&& o) noexcept
+				{
+					std::swap(handle, o.handle);
+					return *this;
+				}
+				~HandleGuard()
+				{
+					if (handle && handle != INVALID_HANDLE_VALUE)
+					{
+						CloseHandle(handle);
+						handle = nullptr;
+					}
+				}
+				operator HANDLE() const
+				{
+					return handle;
+				}
+			};
+		}
+		class MMap
+		{
+			const char* view = nullptr;
+			uint64_t len = 0;
+			detail::HandleGuard hFile, hFileMap;
+		public:
+			MMap(const std::string& filepath);
+			MMap(const MMap&) = delete;
+			MMap& operator=(const MMap&) = delete;
+			MMap(MMap&& o) noexcept;
+			MMap& operator=(MMap&& o) noexcept;
+			~MMap();
+			const char* get() const { return view; }
+			size_t size() const { return len; }
+		};
+	}
+}
+#else
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+namespace tomoto
+{
+	namespace utils
+	{
+		namespace detail
+		{
+			class FDGuard
+			{
+				int fd = 0;
+			public:
+				FDGuard(int _fd = 0) : fd(_fd)
+				{
+				}
+				FDGuard(const FDGuard&) = delete;
+				FDGuard& operator =(const FDGuard&) = delete;
+				FDGuard(FDGuard&& o)
+				{
+					std::swap(fd, o.fd);
+				}
+				FDGuard& operator=(FDGuard&& o)
+				{
+					std::swap(fd, o.fd);
+					return *this;
+				}
+				~FDGuard()
+				{
+					if (fd && fd != -1)
+					{
+						close(fd);
+						fd = 0;
+					}
+				}
+				operator int() const
+				{
+					return fd;
+				}
+			};
+		}
+		class MMap
+		{
+			const char* view = nullptr;
+			size_t len = 0;
+			detail::FDGuard fd;
+		public:
+			MMap(const std::string& filepath);
+			MMap(const MMap&) = delete;
+			MMap& operator=(const MMap&) = delete;
+			MMap(MMap&& o) noexcept;
+			MMap& operator=(MMap&& o) noexcept;
+			~MMap();
+			const char* get() const { return view; }
+			size_t size() const { return len; }
+		};
+	}
+}
+#endif