ooxml_crypt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +58 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ext/ooxml_crypt/extconf.rb +18 -0
- data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
- data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
- data/lib/ooxml_crypt/version.rb +5 -0
- data/lib/ooxml_crypt.rb +75 -0
- data/vendor/cybozulib/.github/workflows/main.yml +12 -0
- data/vendor/cybozulib/.gitignore +5 -0
- data/vendor/cybozulib/CMakeLists.txt +6 -0
- data/vendor/cybozulib/COPYRIGHT +27 -0
- data/vendor/cybozulib/Makefile +26 -0
- data/vendor/cybozulib/bin/libeay32.dll +0 -0
- data/vendor/cybozulib/bin/libmecab.dll +0 -0
- data/vendor/cybozulib/bin/ssleay32.dll +0 -0
- data/vendor/cybozulib/common.mk +116 -0
- data/vendor/cybozulib/common.props +25 -0
- data/vendor/cybozulib/cybozulib.sln +286 -0
- data/vendor/cybozulib/debug.props +14 -0
- data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
- data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
- data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
- data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
- data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
- data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
- data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
- data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
- data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
- data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
- data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
- data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
- data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
- data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
- data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
- data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
- data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
- data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
- data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
- data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
- data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
- data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
- data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
- data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
- data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
- data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
- data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
- data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
- data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
- data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
- data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
- data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
- data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
- data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
- data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
- data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
- data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
- data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
- data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
- data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
- data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
- data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
- data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
- data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
- data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
- data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
- data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
- data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
- data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
- data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
- data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
- data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
- data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
- data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
- data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
- data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
- data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
- data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
- data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
- data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
- data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
- data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
- data/vendor/cybozulib/include/sais.hxx +364 -0
- data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
- data/vendor/cybozulib/mk.bat +37 -0
- data/vendor/cybozulib/readme.md +29 -0
- data/vendor/cybozulib/release.props +12 -0
- data/vendor/cybozulib/sample/Makefile +30 -0
- data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
- data/vendor/cybozulib/sample/data/svd/test1 +4 -0
- data/vendor/cybozulib/sample/data/svd/test2 +4 -0
- data/vendor/cybozulib/sample/desymbol.cpp +127 -0
- data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
- data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
- data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
- data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
- data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
- data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
- data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
- data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
- data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
- data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
- data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
- data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
- data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
- data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
- data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
- data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
- data/vendor/cybozulib/src/Makefile +8 -0
- data/vendor/cybozulib/src/base/Makefile +19 -0
- data/vendor/cybozulib/test/Makefile +12 -0
- data/vendor/cybozulib/test/base/Makefile +37 -0
- data/vendor/cybozulib/test/base/array_test.cpp +173 -0
- data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
- data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
- data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
- data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
- data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
- data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
- data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
- data/vendor/cybozulib/test/base/config_test.cpp +236 -0
- data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
- data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
- data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
- data/vendor/cybozulib/test/base/data/a.xml +26 -0
- data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
- data/vendor/cybozulib/test/base/env_test.cpp +22 -0
- data/vendor/cybozulib/test/base/event_test.cpp +41 -0
- data/vendor/cybozulib/test/base/file_test.cpp +233 -0
- data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
- data/vendor/cybozulib/test/base/format_test.cpp +12 -0
- data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
- data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
- data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
- data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
- data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
- data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
- data/vendor/cybozulib/test/base/option_test.cpp +487 -0
- data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
- data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
- data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
- data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
- data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
- data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
- data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
- data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
- data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
- data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
- data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
- data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
- data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
- data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
- data/vendor/cybozulib/test/base/time_test.cpp +164 -0
- data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
- data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
- data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
- data/vendor/cybozulib/test/nlp/Makefile +27 -0
- data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
- data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
- data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
- data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
- data/vendor/cybozulib/tool/create_vcproj.py +186 -0
- data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
- data/vendor/msoffice/COPYRIGHT +27 -0
- data/vendor/msoffice/Makefile +29 -0
- data/vendor/msoffice/bin/64/msoc.dll +0 -0
- data/vendor/msoffice/bin/64/msocsample.exe +0 -0
- data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/bin/msoc.dll +0 -0
- data/vendor/msoffice/bin/msocsample.exe +0 -0
- data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/common.mk +71 -0
- data/vendor/msoffice/common.props +26 -0
- data/vendor/msoffice/debug.props +14 -0
- data/vendor/msoffice/include/attack.hpp +211 -0
- data/vendor/msoffice/include/cfb.hpp +777 -0
- data/vendor/msoffice/include/crypto_util.hpp +450 -0
- data/vendor/msoffice/include/custom_sha1.hpp +342 -0
- data/vendor/msoffice/include/decode.hpp +240 -0
- data/vendor/msoffice/include/encode.hpp +221 -0
- data/vendor/msoffice/include/make_dataspace.hpp +316 -0
- data/vendor/msoffice/include/msoc.h +129 -0
- data/vendor/msoffice/include/resource.hpp +7 -0
- data/vendor/msoffice/include/standard_encryption.hpp +145 -0
- data/vendor/msoffice/include/uint32vec.hpp +179 -0
- data/vendor/msoffice/include/util.hpp +212 -0
- data/vendor/msoffice/lib/.emptydir +0 -0
- data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
- data/vendor/msoffice/mk.bat +1 -0
- data/vendor/msoffice/mkdll.bat +3 -0
- data/vendor/msoffice/msoc.def +13 -0
- data/vendor/msoffice/msocsample.py +178 -0
- data/vendor/msoffice/msoffice12.sln +31 -0
- data/vendor/msoffice/readme.md +110 -0
- data/vendor/msoffice/release.props +28 -0
- data/vendor/msoffice/src/Makefile +19 -0
- data/vendor/msoffice/src/attack.cpp +124 -0
- data/vendor/msoffice/src/cfb_test.cpp +77 -0
- data/vendor/msoffice/src/minisample.c +54 -0
- data/vendor/msoffice/src/msocdll.cpp +276 -0
- data/vendor/msoffice/src/msocsample.c +136 -0
- data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
- data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
- data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
- data/vendor/msoffice/src/sha1.cpp +234 -0
- data/vendor/msoffice/test/Makefile +20 -0
- data/vendor/msoffice/test/cfb_test.cpp +74 -0
- data/vendor/msoffice/test/hash_test.cpp +59 -0
- data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
- data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
- data/vendor/msoffice/test/sampl.bat +8 -0
- data/vendor/msoffice/test_all.py +46 -0
- data/vendor/update +4 -0
- metadata +351 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
/**
|
|
3
|
+
@file
|
|
4
|
+
@brief mutex
|
|
5
|
+
|
|
6
|
+
@author MITSUNARI Shigeo(@herumi)
|
|
7
|
+
@author MITSUNARI Shigeo
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
#ifdef _WIN32
|
|
11
|
+
#ifndef WIN32_LEAN_AND_MEAN
|
|
12
|
+
#define WIN32_LEAN_AND_MEAN
|
|
13
|
+
#endif
|
|
14
|
+
#include <windows.h>
|
|
15
|
+
#else
|
|
16
|
+
#include <pthread.h>
|
|
17
|
+
#include <time.h>
|
|
18
|
+
#endif
|
|
19
|
+
#include <assert.h>
|
|
20
|
+
#include <stdlib.h>
|
|
21
|
+
|
|
22
|
+
namespace cybozu {
|
|
23
|
+
|
|
24
|
+
class ConditionVariable;
|
|
25
|
+
|
|
26
|
+
namespace thread {
|
|
27
|
+
|
|
28
|
+
#ifdef _WIN32
|
|
29
|
+
typedef HANDLE MutexHandle;
|
|
30
|
+
inline void MutexInit(MutexHandle& mutex)
|
|
31
|
+
{
|
|
32
|
+
// mutex = CreateSemaphore(NULL /* no security */, 1 /* init */, 0x7FFFFFFF /* max */, NULL /* no name */);
|
|
33
|
+
mutex = CreateMutex(NULL /* no security */, FALSE /* no owner */, NULL /* no name */);
|
|
34
|
+
}
|
|
35
|
+
inline void MutexLock(MutexHandle& mutex) { WaitForSingleObject(mutex, INFINITE); }
|
|
36
|
+
/*
|
|
37
|
+
return false if timeout
|
|
38
|
+
@param msec [in] msec
|
|
39
|
+
*/
|
|
40
|
+
inline bool MutexLockTimeout(MutexHandle& mutex, int msec)
|
|
41
|
+
{
|
|
42
|
+
DWORD ret = WaitForSingleObject(mutex, msec);
|
|
43
|
+
if (ret == WAIT_OBJECT_0) {
|
|
44
|
+
return true;
|
|
45
|
+
}
|
|
46
|
+
if (ret == WAIT_TIMEOUT) {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
/* ret == WAIT_ABANDONED */
|
|
50
|
+
assert(0);
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
53
|
+
inline void MutexUnlock(MutexHandle& mutex)
|
|
54
|
+
{
|
|
55
|
+
// ReleaseSemaphore(mutex, 1, NULL);
|
|
56
|
+
ReleaseMutex(mutex);
|
|
57
|
+
}
|
|
58
|
+
inline void MutexTerm(MutexHandle& mutex) { CloseHandle(mutex); }
|
|
59
|
+
#else
|
|
60
|
+
typedef pthread_mutex_t MutexHandle;
|
|
61
|
+
inline void MutexInit(MutexHandle& mutex)
|
|
62
|
+
{
|
|
63
|
+
#if 1
|
|
64
|
+
pthread_mutex_init(&mutex, NULL);
|
|
65
|
+
#else
|
|
66
|
+
pthread_mutexattr_t attr;
|
|
67
|
+
pthread_mutexattr_init(&attr);
|
|
68
|
+
if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_TIMED_NP)) {
|
|
69
|
+
perror("pthread_mutexattr_settype");
|
|
70
|
+
exit(1);
|
|
71
|
+
}
|
|
72
|
+
pthread_mutex_init(&mutex, &attr);
|
|
73
|
+
pthread_mutexattr_destroy(&attr);
|
|
74
|
+
#endif
|
|
75
|
+
}
|
|
76
|
+
inline void MutexLock(MutexHandle& mutex) { pthread_mutex_lock(&mutex); }
|
|
77
|
+
#if 0
|
|
78
|
+
inline bool MutexLockTimeout(MutexHandle& mutex, int msec)
|
|
79
|
+
{
|
|
80
|
+
timespec absTime;
|
|
81
|
+
clock_gettime(CLOCK_REALTIME, &absTime);
|
|
82
|
+
absTime.tv_sec += msec / 1000;
|
|
83
|
+
absTime.tv_nsec += msec % 1000;
|
|
84
|
+
bool ret = pthread_mutex_timedlock(&mutex, &absTime) == 0;
|
|
85
|
+
return ret;
|
|
86
|
+
}
|
|
87
|
+
#endif
|
|
88
|
+
inline void MutexUnlock(MutexHandle& mutex) { pthread_mutex_unlock(&mutex); }
|
|
89
|
+
inline void MutexTerm(MutexHandle& mutex) { pthread_mutex_destroy(&mutex); }
|
|
90
|
+
#endif
|
|
91
|
+
|
|
92
|
+
template<class T>
|
|
93
|
+
class AutoLockT {
|
|
94
|
+
public:
|
|
95
|
+
explicit AutoLockT(T &t)
|
|
96
|
+
: t_(t)
|
|
97
|
+
{
|
|
98
|
+
t_.lock();
|
|
99
|
+
}
|
|
100
|
+
~AutoLockT()
|
|
101
|
+
{
|
|
102
|
+
t_.unlock();
|
|
103
|
+
}
|
|
104
|
+
private:
|
|
105
|
+
T& t_;
|
|
106
|
+
AutoLockT& operator=(const AutoLockT&);
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
} // cybozu::thread
|
|
110
|
+
|
|
111
|
+
class Mutex {
|
|
112
|
+
friend class cybozu::ConditionVariable;
|
|
113
|
+
public:
|
|
114
|
+
Mutex()
|
|
115
|
+
{
|
|
116
|
+
thread::MutexInit(hdl_);
|
|
117
|
+
}
|
|
118
|
+
~Mutex()
|
|
119
|
+
{
|
|
120
|
+
thread::MutexTerm(hdl_);
|
|
121
|
+
}
|
|
122
|
+
void lock()
|
|
123
|
+
{
|
|
124
|
+
thread::MutexLock(hdl_);
|
|
125
|
+
}
|
|
126
|
+
#if 0
|
|
127
|
+
bool lockTimeout(int msec)
|
|
128
|
+
{
|
|
129
|
+
return thread::MutexLockTimeout(hdl_, msec);
|
|
130
|
+
}
|
|
131
|
+
#endif
|
|
132
|
+
void unlock()
|
|
133
|
+
{
|
|
134
|
+
thread::MutexUnlock(hdl_);
|
|
135
|
+
}
|
|
136
|
+
private:
|
|
137
|
+
Mutex(const Mutex&);
|
|
138
|
+
Mutex& operator=(const Mutex&);
|
|
139
|
+
thread::MutexHandle hdl_;
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
typedef cybozu::thread::AutoLockT<cybozu::Mutex> AutoLock;
|
|
143
|
+
|
|
144
|
+
} // cybozu
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
/**
|
|
3
|
+
@file
|
|
4
|
+
@brief wrapper of MeCab
|
|
5
|
+
|
|
6
|
+
@author MITSUNARI Shigeo(@herumi)
|
|
7
|
+
*/
|
|
8
|
+
#include <string>
|
|
9
|
+
#include <assert.h>
|
|
10
|
+
#ifdef _WIN32
|
|
11
|
+
#include <winsock2.h>
|
|
12
|
+
#endif
|
|
13
|
+
#include "mecab.h"
|
|
14
|
+
#include <cybozu/exception.hpp>
|
|
15
|
+
#ifdef _WIN32
|
|
16
|
+
#pragma comment(lib, "libmecab.lib")
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
namespace cybozu { namespace nlp {
|
|
20
|
+
|
|
21
|
+
struct Mecab {
|
|
22
|
+
Mecab(const char *option = "-O wakati")
|
|
23
|
+
: tagger_(MeCab::createTagger(option))
|
|
24
|
+
, node_(0)
|
|
25
|
+
{
|
|
26
|
+
if (tagger_ == 0) {
|
|
27
|
+
throw cybozu::Exception("nlp:mecab:createTagger");
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
T must have push_back(std::string)
|
|
32
|
+
*/
|
|
33
|
+
template<class T>
|
|
34
|
+
bool parse(T& out, const char *str, size_t strLen = 0)
|
|
35
|
+
{
|
|
36
|
+
if (strLen == 0) {
|
|
37
|
+
strLen = strlen(str);
|
|
38
|
+
}
|
|
39
|
+
const char *p = tagger_->parse(str, strLen);
|
|
40
|
+
if (p == 0) return false;
|
|
41
|
+
while (*p) {
|
|
42
|
+
if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') {
|
|
43
|
+
p++;
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
const char *q = strchr(p, ' ');
|
|
47
|
+
if (q == 0) {
|
|
48
|
+
out.push_back(p);
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
out.push_back(std::string(p, q));
|
|
52
|
+
p = q + 1;
|
|
53
|
+
}
|
|
54
|
+
return true;
|
|
55
|
+
}
|
|
56
|
+
void set(const char *str, size_t strLen = 0)
|
|
57
|
+
{
|
|
58
|
+
if (strLen == 0) {
|
|
59
|
+
strLen = strlen(str);
|
|
60
|
+
}
|
|
61
|
+
node_ = tagger_->parseToNode(str, strLen);
|
|
62
|
+
}
|
|
63
|
+
void set(const std::string& str)
|
|
64
|
+
{
|
|
65
|
+
set(&str[0], str.size());
|
|
66
|
+
}
|
|
67
|
+
bool isEnd() const
|
|
68
|
+
{
|
|
69
|
+
if (node_ == 0) return true;
|
|
70
|
+
return node_->stat == MECAB_EOS_NODE;
|
|
71
|
+
}
|
|
72
|
+
const char *getPos() const { return node_->surface; }
|
|
73
|
+
size_t getSize() const { return node_->length; }
|
|
74
|
+
/* adhoc */
|
|
75
|
+
bool isNoun() const
|
|
76
|
+
{
|
|
77
|
+
assert(node_);
|
|
78
|
+
const char *p = node_->feature;
|
|
79
|
+
if (node_->length < 2) return false;
|
|
80
|
+
return p[0] == '\xE5' && p[1] == '\x90' && p[2] == '\x8D';
|
|
81
|
+
}
|
|
82
|
+
void next()
|
|
83
|
+
{
|
|
84
|
+
assert(node_);
|
|
85
|
+
node_ = node_->next;
|
|
86
|
+
}
|
|
87
|
+
~Mecab()
|
|
88
|
+
{
|
|
89
|
+
delete tagger_;
|
|
90
|
+
}
|
|
91
|
+
private:
|
|
92
|
+
MeCab::Tagger *tagger_;
|
|
93
|
+
const MeCab::Node *node_;
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
} } // cybozu::nlp
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
/**
|
|
3
|
+
@file
|
|
4
|
+
@brief pLSI
|
|
5
|
+
@author MITSUNARI Shigeo(@herumi)
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <fstream>
|
|
9
|
+
#include <map>
|
|
10
|
+
#include <limits>
|
|
11
|
+
#include <math.h>
|
|
12
|
+
#include <cybozu/string_operation.hpp>
|
|
13
|
+
#include <cybozu/time.hpp>
|
|
14
|
+
#include <cybozu/nlp/random.hpp>
|
|
15
|
+
#include <cybozu/nlp/sparse.hpp>
|
|
16
|
+
#include <cybozu/nlp/top_score.hpp>
|
|
17
|
+
|
|
18
|
+
namespace cybozu { namespace nlp {
|
|
19
|
+
|
|
20
|
+
namespace local {
|
|
21
|
+
|
|
22
|
+
template<class os, typename T>
|
|
23
|
+
os& dump(os& out, const std::vector<T>& list) {
|
|
24
|
+
out << "{ ";
|
|
25
|
+
for (typename std::vector<T>::const_iterator i = list.begin(), ie = list.end(); i != ie; ++i) {
|
|
26
|
+
out << *i << " ";
|
|
27
|
+
}
|
|
28
|
+
out << "}";
|
|
29
|
+
return out;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
} // local
|
|
33
|
+
|
|
34
|
+
//const double NaN = std::numeric_limits<double>::quiet_NaN();
|
|
35
|
+
|
|
36
|
+
typedef cybozu::nlp::SparseVector<bool> BoolSVec;
|
|
37
|
+
typedef cybozu::nlp::SparseVector<double> DoubleSVec;
|
|
38
|
+
typedef std::vector<BoolSVec> SMatrix;
|
|
39
|
+
|
|
40
|
+
template<typename T>
|
|
41
|
+
bool hasKey(const std::map<T, size_t>& map, T key) { return map.find(key) != map.end(); }
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class Plsi {
|
|
45
|
+
public:
|
|
46
|
+
typedef int ITEM_TYPE;
|
|
47
|
+
typedef int USER_TYPE;
|
|
48
|
+
|
|
49
|
+
enum SEARCH_TYPE {
|
|
50
|
+
JOINT,
|
|
51
|
+
CONDITIONAL,
|
|
52
|
+
POSTERIOR
|
|
53
|
+
};
|
|
54
|
+
private:
|
|
55
|
+
typedef std::vector<double> DoubleVec;
|
|
56
|
+
typedef std::vector<DoubleVec> DoubleVecVec;
|
|
57
|
+
std::map<USER_TYPE, size_t> users_;
|
|
58
|
+
std::vector<USER_TYPE> userlist_;
|
|
59
|
+
|
|
60
|
+
std::map<ITEM_TYPE, size_t> items_;
|
|
61
|
+
std::vector<ITEM_TYPE> itemlist_;
|
|
62
|
+
|
|
63
|
+
SMatrix matrix_; // item => users
|
|
64
|
+
|
|
65
|
+
// probability of p(z), p(x|z), p(y|z)
|
|
66
|
+
DoubleVec z_;
|
|
67
|
+
DoubleVecVec user_z_, item_z_;
|
|
68
|
+
|
|
69
|
+
template<class os>
|
|
70
|
+
friend os& dump(os& out, const Plsi& x) {
|
|
71
|
+
out << x.matrix_.size() << std::endl;
|
|
72
|
+
local::dump(out, x.z_) << std::endl;
|
|
73
|
+
return out;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
public:
|
|
77
|
+
size_t get_item_id(ITEM_TYPE item) {
|
|
78
|
+
if (hasKey(items_, item)) return items_[item];
|
|
79
|
+
|
|
80
|
+
size_t id = items_[item] = itemlist_.size();
|
|
81
|
+
itemlist_.push_back(item);
|
|
82
|
+
matrix_.push_back(BoolSVec());
|
|
83
|
+
return id;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
BoolSVec& getItem(ITEM_TYPE item) {
|
|
87
|
+
return matrix_[get_item_id(item)];
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
size_t get_user_id(USER_TYPE user) {
|
|
91
|
+
if (hasKey(users_, user)) return users_[user];
|
|
92
|
+
|
|
93
|
+
size_t id = users_[user] = userlist_.size();
|
|
94
|
+
userlist_.push_back(user);
|
|
95
|
+
return id;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
ITEM_TYPE get_item_key(size_t item_id) {
|
|
99
|
+
return itemlist_[item_id];
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
@brief retrieve relevant items for query user
|
|
104
|
+
*/
|
|
105
|
+
cybozu::nlp::TopScore<size_t>::Table search_items(USER_TYPE user, int top = 10) {
|
|
106
|
+
int K = (int)z_.size();
|
|
107
|
+
size_t user_id = get_user_id(user);
|
|
108
|
+
|
|
109
|
+
double p_x = 0; // p(x) = sum p(z)p(x|z)
|
|
110
|
+
DoubleVec p_z_x; // p(z|x) = p(z)p(x|z) / p(x)
|
|
111
|
+
for (int k = 0; k < K; k++) {
|
|
112
|
+
double p = z_[k] * user_z_[k][user_id];
|
|
113
|
+
p_x += p;
|
|
114
|
+
p_z_x.push_back(p);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
cybozu::nlp::TopScore<size_t> ranking(top);
|
|
118
|
+
for (size_t item_id = 0; item_id < items_.size(); item_id++) {
|
|
119
|
+
double score = 0; // p(y|x) = sum _z p(y|z) * p(z|x)
|
|
120
|
+
for (int k = 0; k < K; k++) {
|
|
121
|
+
score += item_z_[k][item_id] * p_z_x[k];
|
|
122
|
+
}
|
|
123
|
+
ranking.add(score / p_x, item_id);
|
|
124
|
+
}
|
|
125
|
+
return ranking.getTable();
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
@brief retrieve similar items for query item
|
|
130
|
+
*/
|
|
131
|
+
cybozu::nlp::TopScore<size_t>::Table similar_items(ITEM_TYPE item, SEARCH_TYPE search_type, int top=10) {
|
|
132
|
+
int K = (int)z_.size();
|
|
133
|
+
size_t target_item_id = get_item_id(item);
|
|
134
|
+
|
|
135
|
+
cybozu::nlp::TopScore<size_t> ranking(top);
|
|
136
|
+
if (search_type == POSTERIOR) {
|
|
137
|
+
for (size_t item_id = 0; item_id < items_.size(); item_id++) {
|
|
138
|
+
// p(y1=target|y2=item_id) = sum _z p(target|z) * p(item_id|z) * p(z) / p(item_id)
|
|
139
|
+
double score = 0, p_y = 0;
|
|
140
|
+
for(int k=0;k<K;++k) {
|
|
141
|
+
double p = item_z_[k][item_id] * z_[k];
|
|
142
|
+
p_y += p;
|
|
143
|
+
score += item_z_[k][target_item_id] * p;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
ranking.add(score / p_y, item_id);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
} else if (search_type == CONDITIONAL) {
|
|
150
|
+
double p_y = 0; // p(y=target) = sum p(z)p(y=target|z)
|
|
151
|
+
DoubleVec p_z_y; // p(z)p(y=target|z)
|
|
152
|
+
for (int k = 0; k < K; k++) {
|
|
153
|
+
double p = z_[k] * item_z_[k][target_item_id];
|
|
154
|
+
p_y += p;
|
|
155
|
+
p_z_y.push_back(p);
|
|
156
|
+
}
|
|
157
|
+
for (size_t item_id = 0; item_id < items_.size(); item_id++) {
|
|
158
|
+
// p(y1=item_id|y2=target) = sum _z p(y1|z) * p(z|y2) = sum _z p(y1|z) * p(y2|z) * p(z) / p(y2)
|
|
159
|
+
double score = 0;
|
|
160
|
+
for (int k = 0; k < K; k++) {
|
|
161
|
+
score += item_z_[k][item_id] * p_z_y[k];
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
ranking.add(score / p_y, item_id);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
} else if (search_type == JOINT) {
|
|
168
|
+
for (size_t item_id = 0; item_id < items_.size(); item_id++) {
|
|
169
|
+
// p(y1=item_id, y2=i) = sum _z p(y1|z) * p(y2|z) * p(z)
|
|
170
|
+
double score = 0;
|
|
171
|
+
for (int k = 0; k < K; k++) {
|
|
172
|
+
score += item_z_[k][item_id] * item_z_[k][target_item_id] * z_[k];
|
|
173
|
+
}
|
|
174
|
+
ranking.add(score, item_id);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return ranking.getTable();
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
@brief calcurate perplexity
|
|
182
|
+
*/
|
|
183
|
+
double perplexity()
|
|
184
|
+
{
|
|
185
|
+
int K = (int)z_.size();
|
|
186
|
+
|
|
187
|
+
// p(x) = sum p(z)p(x|z)
|
|
188
|
+
DoubleVec p_x;
|
|
189
|
+
for (size_t user_id = 0; user_id < users_.size(); user_id++) {
|
|
190
|
+
double p = 0;
|
|
191
|
+
for (int k = 0; k < K; k++) {
|
|
192
|
+
p += z_[k] * user_z_[k][user_id];
|
|
193
|
+
}
|
|
194
|
+
p_x.push_back(p);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
int denom = 0;
|
|
198
|
+
double sum = 0;
|
|
199
|
+
for (size_t item_id = 0; item_id < matrix_.size(); item_id++) {
|
|
200
|
+
BoolSVec& item_users = matrix_[item_id];
|
|
201
|
+
for (BoolSVec::const_iterator i = item_users.begin(), ie = item_users.end(); i != ie; ++i) {
|
|
202
|
+
++denom;
|
|
203
|
+
size_t user_id = i.pos();
|
|
204
|
+
|
|
205
|
+
// p(y|x) = sum p(y|z)p(z|x) = sum p(y|z)p(x|z)p(z)/p(x)
|
|
206
|
+
double p = 0;
|
|
207
|
+
for (int k = 0; k < K; k++) {
|
|
208
|
+
p += z_[k] * user_z_[k][user_id] * item_z_[k][item_id];
|
|
209
|
+
}
|
|
210
|
+
sum += log(p / p_x[user_id]);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
return exp(-sum/denom);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
@brief start learning (initialize learning)
|
|
218
|
+
*/
|
|
219
|
+
void startLearning(int K)
|
|
220
|
+
{
|
|
221
|
+
size_t M = users_.size();
|
|
222
|
+
size_t N = items_.size();
|
|
223
|
+
user_z_.resize(K);
|
|
224
|
+
item_z_.resize(K);
|
|
225
|
+
cybozu::nlp::UniformRandomGenerator rand(0.25, 0.75);
|
|
226
|
+
for (int k = 0; k < K; k++) {
|
|
227
|
+
// initialize p(z=k)
|
|
228
|
+
z_.push_back(1.0/K);
|
|
229
|
+
|
|
230
|
+
// initialize p(x=user|z=k)
|
|
231
|
+
DoubleVec& uvec = user_z_[k];
|
|
232
|
+
for (size_t j = 0; j < M; j++) uvec.push_back(1.0/M);
|
|
233
|
+
|
|
234
|
+
// initialize p(y=item|z=k)
|
|
235
|
+
DoubleVec& ivec = item_z_[k];
|
|
236
|
+
double s = 0;
|
|
237
|
+
for (size_t j = 0; j < N; j++) {
|
|
238
|
+
double r = rand.getDouble();
|
|
239
|
+
ivec.push_back(r);
|
|
240
|
+
s += r;
|
|
241
|
+
}
|
|
242
|
+
for(size_t j = 0; j < N; j++) ivec[j] /= s;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
@brief step learning (called repeatedly after initialization learning)
|
|
249
|
+
@param[in] beta temperature for tempered EM
|
|
250
|
+
@return likelyhood for previous iteration
|
|
251
|
+
*/
|
|
252
|
+
double step(double beta = 1)
|
|
253
|
+
{
|
|
254
|
+
int K = (int)z_.size();
|
|
255
|
+
|
|
256
|
+
DoubleVec z_numer;
|
|
257
|
+
DoubleVecVec user_numer, item_numer;
|
|
258
|
+
z_numer.resize(K);
|
|
259
|
+
user_numer.resize(K);
|
|
260
|
+
item_numer.resize(K);
|
|
261
|
+
for (int k = 0; k < K; k++) {
|
|
262
|
+
user_numer[k].resize(users_.size());
|
|
263
|
+
item_numer[k].resize(items_.size());
|
|
264
|
+
}
|
|
265
|
+
int denom = 0;
|
|
266
|
+
double likelihood = 0;
|
|
267
|
+
DoubleVec p_z_xy;
|
|
268
|
+
p_z_xy.resize(K);
|
|
269
|
+
|
|
270
|
+
for (size_t item_id = 0; item_id < matrix_.size(); ++item_id) {
|
|
271
|
+
BoolSVec& item_users = matrix_[item_id];
|
|
272
|
+
for (BoolSVec::const_iterator i = item_users.begin(), ie = item_users.end(); i != ie; ++i) {
|
|
273
|
+
// when n(x, y) = 1(true)
|
|
274
|
+
++denom;
|
|
275
|
+
size_t user_id = i.pos();
|
|
276
|
+
|
|
277
|
+
// E-step: p(z|x,y)
|
|
278
|
+
double sum = 0;
|
|
279
|
+
for (int k = 0; k < K; k++) {
|
|
280
|
+
// p(z=k)p(x=user_id|z=k)p(y=item_id|z=k)
|
|
281
|
+
double p = pow(z_[k] * user_z_[k][user_id] * item_z_[k][item_id], beta);
|
|
282
|
+
p_z_xy[k] = p;
|
|
283
|
+
sum += p;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
// normalize & M-step
|
|
287
|
+
for (int k = 0; k < K; k++) {
|
|
288
|
+
double p = p_z_xy[k] / sum;
|
|
289
|
+
|
|
290
|
+
user_numer[k][user_id] += p;
|
|
291
|
+
item_numer[k][item_id] += p;
|
|
292
|
+
z_numer[k] += p;
|
|
293
|
+
}
|
|
294
|
+
likelihood += log(sum);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// M-step: update
|
|
299
|
+
for (int k = 0; k < K; k++) {
|
|
300
|
+
double z_num = z_numer[k];
|
|
301
|
+
z_[k] = z_num / denom;
|
|
302
|
+
for (size_t item_id = 0; item_id < items_.size(); ++item_id) {
|
|
303
|
+
item_z_[k][item_id] = item_numer[k][item_id] / z_num;
|
|
304
|
+
}
|
|
305
|
+
for (size_t user_id = 0; user_id < users_.size(); ++user_id) {
|
|
306
|
+
user_z_[k][user_id] = user_numer[k][user_id] / z_num;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// log-likelihood of previous iteration
|
|
311
|
+
return likelihood;
|
|
312
|
+
}
|
|
313
|
+
};
|
|
314
|
+
|
|
315
|
+
} } // cybozu::nlp
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
/**
|
|
3
|
+
@file
|
|
4
|
+
@brief normal random generator
|
|
5
|
+
|
|
6
|
+
@author MITSUNARI Shigeo(@herumi)
|
|
7
|
+
@author MITSUNARI Shigeo
|
|
8
|
+
*/
|
|
9
|
+
#include <cybozu/xorshift.hpp>
|
|
10
|
+
|
|
11
|
+
namespace cybozu { namespace nlp {
|
|
12
|
+
|
|
13
|
+
/*
|
|
14
|
+
use xor shift
|
|
15
|
+
*/
|
|
16
|
+
class UniformRandomGenerator {
|
|
17
|
+
double a_;
|
|
18
|
+
double b_;
|
|
19
|
+
cybozu::XorShift rg;
|
|
20
|
+
public:
|
|
21
|
+
/* generate uniform random value in [a, b) */
|
|
22
|
+
explicit UniformRandomGenerator(double a = 0, double b = 1, int seed = 0)
|
|
23
|
+
: a_(a)
|
|
24
|
+
, b_(b)
|
|
25
|
+
, rg(seed)
|
|
26
|
+
{
|
|
27
|
+
}
|
|
28
|
+
void init(int seed = 0)
|
|
29
|
+
{
|
|
30
|
+
rg.init(seed);
|
|
31
|
+
}
|
|
32
|
+
/* [0, 2^32) random number */
|
|
33
|
+
uint32_t operator()() { return rg.get32(); }
|
|
34
|
+
uint32_t get32() { return rg.get32(); }
|
|
35
|
+
uint64_t get64() { return rg.get64(); }
|
|
36
|
+
/* [a, b) random number */
|
|
37
|
+
double getDouble()
|
|
38
|
+
{
|
|
39
|
+
uint32_t x = get32() >> 5;
|
|
40
|
+
uint32_t y = get32() >> 6;
|
|
41
|
+
double z = (x * double(1U << 26) + y) * (1.0 / double(1LL << 53));
|
|
42
|
+
return (b_ - a_) * z + a_;
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
/*
|
|
47
|
+
normal random generator
|
|
48
|
+
*/
|
|
49
|
+
class NormalRandomGenerator {
|
|
50
|
+
UniformRandomGenerator gen_;
|
|
51
|
+
double u_;
|
|
52
|
+
double s_;
|
|
53
|
+
public:
|
|
54
|
+
explicit NormalRandomGenerator(double u = 0, double s = 1, int seed = 0)
|
|
55
|
+
: gen_(seed)
|
|
56
|
+
, u_(u)
|
|
57
|
+
, s_(s)
|
|
58
|
+
{
|
|
59
|
+
}
|
|
60
|
+
void init(int seed = 0)
|
|
61
|
+
{
|
|
62
|
+
gen_.init(seed);
|
|
63
|
+
}
|
|
64
|
+
double get()
|
|
65
|
+
{
|
|
66
|
+
double sum = -6;
|
|
67
|
+
for (int i = 0; i < 12; i++) {
|
|
68
|
+
sum += gen_.getDouble();
|
|
69
|
+
}
|
|
70
|
+
return sum * s_ + u_;
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
} } // cybozu::nlp
|