ooxml_crypt 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +58 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ext/ooxml_crypt/extconf.rb +18 -0
- data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
- data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
- data/lib/ooxml_crypt/version.rb +5 -0
- data/lib/ooxml_crypt.rb +75 -0
- data/vendor/cybozulib/.github/workflows/main.yml +12 -0
- data/vendor/cybozulib/.gitignore +5 -0
- data/vendor/cybozulib/CMakeLists.txt +6 -0
- data/vendor/cybozulib/COPYRIGHT +27 -0
- data/vendor/cybozulib/Makefile +26 -0
- data/vendor/cybozulib/bin/libeay32.dll +0 -0
- data/vendor/cybozulib/bin/libmecab.dll +0 -0
- data/vendor/cybozulib/bin/ssleay32.dll +0 -0
- data/vendor/cybozulib/common.mk +116 -0
- data/vendor/cybozulib/common.props +25 -0
- data/vendor/cybozulib/cybozulib.sln +286 -0
- data/vendor/cybozulib/debug.props +14 -0
- data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
- data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
- data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
- data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
- data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
- data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
- data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
- data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
- data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
- data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
- data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
- data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
- data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
- data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
- data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
- data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
- data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
- data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
- data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
- data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
- data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
- data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
- data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
- data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
- data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
- data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
- data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
- data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
- data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
- data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
- data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
- data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
- data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
- data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
- data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
- data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
- data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
- data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
- data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
- data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
- data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
- data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
- data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
- data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
- data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
- data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
- data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
- data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
- data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
- data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
- data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
- data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
- data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
- data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
- data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
- data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
- data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
- data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
- data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
- data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
- data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
- data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
- data/vendor/cybozulib/include/sais.hxx +364 -0
- data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
- data/vendor/cybozulib/mk.bat +37 -0
- data/vendor/cybozulib/readme.md +29 -0
- data/vendor/cybozulib/release.props +12 -0
- data/vendor/cybozulib/sample/Makefile +30 -0
- data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
- data/vendor/cybozulib/sample/data/svd/test1 +4 -0
- data/vendor/cybozulib/sample/data/svd/test2 +4 -0
- data/vendor/cybozulib/sample/desymbol.cpp +127 -0
- data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
- data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
- data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
- data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
- data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
- data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
- data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
- data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
- data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
- data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
- data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
- data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
- data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
- data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
- data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
- data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
- data/vendor/cybozulib/src/Makefile +8 -0
- data/vendor/cybozulib/src/base/Makefile +19 -0
- data/vendor/cybozulib/test/Makefile +12 -0
- data/vendor/cybozulib/test/base/Makefile +37 -0
- data/vendor/cybozulib/test/base/array_test.cpp +173 -0
- data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
- data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
- data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
- data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
- data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
- data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
- data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
- data/vendor/cybozulib/test/base/config_test.cpp +236 -0
- data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
- data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
- data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
- data/vendor/cybozulib/test/base/data/a.xml +26 -0
- data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
- data/vendor/cybozulib/test/base/env_test.cpp +22 -0
- data/vendor/cybozulib/test/base/event_test.cpp +41 -0
- data/vendor/cybozulib/test/base/file_test.cpp +233 -0
- data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
- data/vendor/cybozulib/test/base/format_test.cpp +12 -0
- data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
- data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
- data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
- data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
- data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
- data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
- data/vendor/cybozulib/test/base/option_test.cpp +487 -0
- data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
- data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
- data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
- data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
- data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
- data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
- data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
- data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
- data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
- data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
- data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
- data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
- data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
- data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
- data/vendor/cybozulib/test/base/time_test.cpp +164 -0
- data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
- data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
- data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
- data/vendor/cybozulib/test/nlp/Makefile +27 -0
- data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
- data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
- data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
- data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
- data/vendor/cybozulib/tool/create_vcproj.py +186 -0
- data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
- data/vendor/msoffice/COPYRIGHT +27 -0
- data/vendor/msoffice/Makefile +29 -0
- data/vendor/msoffice/bin/64/msoc.dll +0 -0
- data/vendor/msoffice/bin/64/msocsample.exe +0 -0
- data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/bin/msoc.dll +0 -0
- data/vendor/msoffice/bin/msocsample.exe +0 -0
- data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/common.mk +71 -0
- data/vendor/msoffice/common.props +26 -0
- data/vendor/msoffice/debug.props +14 -0
- data/vendor/msoffice/include/attack.hpp +211 -0
- data/vendor/msoffice/include/cfb.hpp +777 -0
- data/vendor/msoffice/include/crypto_util.hpp +450 -0
- data/vendor/msoffice/include/custom_sha1.hpp +342 -0
- data/vendor/msoffice/include/decode.hpp +240 -0
- data/vendor/msoffice/include/encode.hpp +221 -0
- data/vendor/msoffice/include/make_dataspace.hpp +316 -0
- data/vendor/msoffice/include/msoc.h +129 -0
- data/vendor/msoffice/include/resource.hpp +7 -0
- data/vendor/msoffice/include/standard_encryption.hpp +145 -0
- data/vendor/msoffice/include/uint32vec.hpp +179 -0
- data/vendor/msoffice/include/util.hpp +212 -0
- data/vendor/msoffice/lib/.emptydir +0 -0
- data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
- data/vendor/msoffice/mk.bat +1 -0
- data/vendor/msoffice/mkdll.bat +3 -0
- data/vendor/msoffice/msoc.def +13 -0
- data/vendor/msoffice/msocsample.py +178 -0
- data/vendor/msoffice/msoffice12.sln +31 -0
- data/vendor/msoffice/readme.md +110 -0
- data/vendor/msoffice/release.props +28 -0
- data/vendor/msoffice/src/Makefile +19 -0
- data/vendor/msoffice/src/attack.cpp +124 -0
- data/vendor/msoffice/src/cfb_test.cpp +77 -0
- data/vendor/msoffice/src/minisample.c +54 -0
- data/vendor/msoffice/src/msocdll.cpp +276 -0
- data/vendor/msoffice/src/msocsample.c +136 -0
- data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
- data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
- data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
- data/vendor/msoffice/src/sha1.cpp +234 -0
- data/vendor/msoffice/test/Makefile +20 -0
- data/vendor/msoffice/test/cfb_test.cpp +74 -0
- data/vendor/msoffice/test/hash_test.cpp +59 -0
- data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
- data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
- data/vendor/msoffice/test/sampl.bat +8 -0
- data/vendor/msoffice/test_all.py +46 -0
- data/vendor/update +4 -0
- metadata +351 -0
@@ -0,0 +1,144 @@
|
|
1
|
+
#pragma once
|
2
|
+
/**
|
3
|
+
@file
|
4
|
+
@brief mutex
|
5
|
+
|
6
|
+
@author MITSUNARI Shigeo(@herumi)
|
7
|
+
@author MITSUNARI Shigeo
|
8
|
+
*/
|
9
|
+
|
10
|
+
#ifdef _WIN32
|
11
|
+
#ifndef WIN32_LEAN_AND_MEAN
|
12
|
+
#define WIN32_LEAN_AND_MEAN
|
13
|
+
#endif
|
14
|
+
#include <windows.h>
|
15
|
+
#else
|
16
|
+
#include <pthread.h>
|
17
|
+
#include <time.h>
|
18
|
+
#endif
|
19
|
+
#include <assert.h>
|
20
|
+
#include <stdlib.h>
|
21
|
+
|
22
|
+
namespace cybozu {
|
23
|
+
|
24
|
+
class ConditionVariable;
|
25
|
+
|
26
|
+
namespace thread {
|
27
|
+
|
28
|
+
#ifdef _WIN32
|
29
|
+
typedef HANDLE MutexHandle;
|
30
|
+
inline void MutexInit(MutexHandle& mutex)
|
31
|
+
{
|
32
|
+
// mutex = CreateSemaphore(NULL /* no security */, 1 /* init */, 0x7FFFFFFF /* max */, NULL /* no name */);
|
33
|
+
mutex = CreateMutex(NULL /* no security */, FALSE /* no owner */, NULL /* no name */);
|
34
|
+
}
|
35
|
+
inline void MutexLock(MutexHandle& mutex) { WaitForSingleObject(mutex, INFINITE); }
|
36
|
+
/*
|
37
|
+
return false if timeout
|
38
|
+
@param msec [in] msec
|
39
|
+
*/
|
40
|
+
inline bool MutexLockTimeout(MutexHandle& mutex, int msec)
|
41
|
+
{
|
42
|
+
DWORD ret = WaitForSingleObject(mutex, msec);
|
43
|
+
if (ret == WAIT_OBJECT_0) {
|
44
|
+
return true;
|
45
|
+
}
|
46
|
+
if (ret == WAIT_TIMEOUT) {
|
47
|
+
return false;
|
48
|
+
}
|
49
|
+
/* ret == WAIT_ABANDONED */
|
50
|
+
assert(0);
|
51
|
+
return false;
|
52
|
+
}
|
53
|
+
inline void MutexUnlock(MutexHandle& mutex)
|
54
|
+
{
|
55
|
+
// ReleaseSemaphore(mutex, 1, NULL);
|
56
|
+
ReleaseMutex(mutex);
|
57
|
+
}
|
58
|
+
inline void MutexTerm(MutexHandle& mutex) { CloseHandle(mutex); }
|
59
|
+
#else
|
60
|
+
typedef pthread_mutex_t MutexHandle;
|
61
|
+
inline void MutexInit(MutexHandle& mutex)
|
62
|
+
{
|
63
|
+
#if 1
|
64
|
+
pthread_mutex_init(&mutex, NULL);
|
65
|
+
#else
|
66
|
+
pthread_mutexattr_t attr;
|
67
|
+
pthread_mutexattr_init(&attr);
|
68
|
+
if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_TIMED_NP)) {
|
69
|
+
perror("pthread_mutexattr_settype");
|
70
|
+
exit(1);
|
71
|
+
}
|
72
|
+
pthread_mutex_init(&mutex, &attr);
|
73
|
+
pthread_mutexattr_destroy(&attr);
|
74
|
+
#endif
|
75
|
+
}
|
76
|
+
inline void MutexLock(MutexHandle& mutex) { pthread_mutex_lock(&mutex); }
|
77
|
+
#if 0
|
78
|
+
inline bool MutexLockTimeout(MutexHandle& mutex, int msec)
|
79
|
+
{
|
80
|
+
timespec absTime;
|
81
|
+
clock_gettime(CLOCK_REALTIME, &absTime);
|
82
|
+
absTime.tv_sec += msec / 1000;
|
83
|
+
absTime.tv_nsec += msec % 1000;
|
84
|
+
bool ret = pthread_mutex_timedlock(&mutex, &absTime) == 0;
|
85
|
+
return ret;
|
86
|
+
}
|
87
|
+
#endif
|
88
|
+
inline void MutexUnlock(MutexHandle& mutex) { pthread_mutex_unlock(&mutex); }
|
89
|
+
inline void MutexTerm(MutexHandle& mutex) { pthread_mutex_destroy(&mutex); }
|
90
|
+
#endif
|
91
|
+
|
92
|
+
template<class T>
|
93
|
+
class AutoLockT {
|
94
|
+
public:
|
95
|
+
explicit AutoLockT(T &t)
|
96
|
+
: t_(t)
|
97
|
+
{
|
98
|
+
t_.lock();
|
99
|
+
}
|
100
|
+
~AutoLockT()
|
101
|
+
{
|
102
|
+
t_.unlock();
|
103
|
+
}
|
104
|
+
private:
|
105
|
+
T& t_;
|
106
|
+
AutoLockT& operator=(const AutoLockT&);
|
107
|
+
};
|
108
|
+
|
109
|
+
} // cybozu::thread
|
110
|
+
|
111
|
+
class Mutex {
|
112
|
+
friend class cybozu::ConditionVariable;
|
113
|
+
public:
|
114
|
+
Mutex()
|
115
|
+
{
|
116
|
+
thread::MutexInit(hdl_);
|
117
|
+
}
|
118
|
+
~Mutex()
|
119
|
+
{
|
120
|
+
thread::MutexTerm(hdl_);
|
121
|
+
}
|
122
|
+
void lock()
|
123
|
+
{
|
124
|
+
thread::MutexLock(hdl_);
|
125
|
+
}
|
126
|
+
#if 0
|
127
|
+
bool lockTimeout(int msec)
|
128
|
+
{
|
129
|
+
return thread::MutexLockTimeout(hdl_, msec);
|
130
|
+
}
|
131
|
+
#endif
|
132
|
+
void unlock()
|
133
|
+
{
|
134
|
+
thread::MutexUnlock(hdl_);
|
135
|
+
}
|
136
|
+
private:
|
137
|
+
Mutex(const Mutex&);
|
138
|
+
Mutex& operator=(const Mutex&);
|
139
|
+
thread::MutexHandle hdl_;
|
140
|
+
};
|
141
|
+
|
142
|
+
typedef cybozu::thread::AutoLockT<cybozu::Mutex> AutoLock;
|
143
|
+
|
144
|
+
} // cybozu
|
@@ -0,0 +1,96 @@
|
|
1
|
+
#pragma once
|
2
|
+
/**
|
3
|
+
@file
|
4
|
+
@brief wrapper of MeCab
|
5
|
+
|
6
|
+
@author MITSUNARI Shigeo(@herumi)
|
7
|
+
*/
|
8
|
+
#include <string>
|
9
|
+
#include <assert.h>
|
10
|
+
#ifdef _WIN32
|
11
|
+
#include <winsock2.h>
|
12
|
+
#endif
|
13
|
+
#include "mecab.h"
|
14
|
+
#include <cybozu/exception.hpp>
|
15
|
+
#ifdef _WIN32
|
16
|
+
#pragma comment(lib, "libmecab.lib")
|
17
|
+
#endif
|
18
|
+
|
19
|
+
namespace cybozu { namespace nlp {
|
20
|
+
|
21
|
+
struct Mecab {
|
22
|
+
Mecab(const char *option = "-O wakati")
|
23
|
+
: tagger_(MeCab::createTagger(option))
|
24
|
+
, node_(0)
|
25
|
+
{
|
26
|
+
if (tagger_ == 0) {
|
27
|
+
throw cybozu::Exception("nlp:mecab:createTagger");
|
28
|
+
}
|
29
|
+
}
|
30
|
+
/**
|
31
|
+
T must have push_back(std::string)
|
32
|
+
*/
|
33
|
+
template<class T>
|
34
|
+
bool parse(T& out, const char *str, size_t strLen = 0)
|
35
|
+
{
|
36
|
+
if (strLen == 0) {
|
37
|
+
strLen = strlen(str);
|
38
|
+
}
|
39
|
+
const char *p = tagger_->parse(str, strLen);
|
40
|
+
if (p == 0) return false;
|
41
|
+
while (*p) {
|
42
|
+
if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') {
|
43
|
+
p++;
|
44
|
+
continue;
|
45
|
+
}
|
46
|
+
const char *q = strchr(p, ' ');
|
47
|
+
if (q == 0) {
|
48
|
+
out.push_back(p);
|
49
|
+
break;
|
50
|
+
}
|
51
|
+
out.push_back(std::string(p, q));
|
52
|
+
p = q + 1;
|
53
|
+
}
|
54
|
+
return true;
|
55
|
+
}
|
56
|
+
void set(const char *str, size_t strLen = 0)
|
57
|
+
{
|
58
|
+
if (strLen == 0) {
|
59
|
+
strLen = strlen(str);
|
60
|
+
}
|
61
|
+
node_ = tagger_->parseToNode(str, strLen);
|
62
|
+
}
|
63
|
+
void set(const std::string& str)
|
64
|
+
{
|
65
|
+
set(&str[0], str.size());
|
66
|
+
}
|
67
|
+
bool isEnd() const
|
68
|
+
{
|
69
|
+
if (node_ == 0) return true;
|
70
|
+
return node_->stat == MECAB_EOS_NODE;
|
71
|
+
}
|
72
|
+
const char *getPos() const { return node_->surface; }
|
73
|
+
size_t getSize() const { return node_->length; }
|
74
|
+
/* adhoc */
|
75
|
+
bool isNoun() const
|
76
|
+
{
|
77
|
+
assert(node_);
|
78
|
+
const char *p = node_->feature;
|
79
|
+
if (node_->length < 2) return false;
|
80
|
+
return p[0] == '\xE5' && p[1] == '\x90' && p[2] == '\x8D';
|
81
|
+
}
|
82
|
+
void next()
|
83
|
+
{
|
84
|
+
assert(node_);
|
85
|
+
node_ = node_->next;
|
86
|
+
}
|
87
|
+
~Mecab()
|
88
|
+
{
|
89
|
+
delete tagger_;
|
90
|
+
}
|
91
|
+
private:
|
92
|
+
MeCab::Tagger *tagger_;
|
93
|
+
const MeCab::Node *node_;
|
94
|
+
};
|
95
|
+
|
96
|
+
} } // cybozu::nlp
|
@@ -0,0 +1,315 @@
|
|
1
|
+
#pragma once
|
2
|
+
/**
|
3
|
+
@file
|
4
|
+
@brief pLSI
|
5
|
+
@author MITSUNARI Shigeo(@herumi)
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <fstream>
|
9
|
+
#include <map>
|
10
|
+
#include <limits>
|
11
|
+
#include <math.h>
|
12
|
+
#include <cybozu/string_operation.hpp>
|
13
|
+
#include <cybozu/time.hpp>
|
14
|
+
#include <cybozu/nlp/random.hpp>
|
15
|
+
#include <cybozu/nlp/sparse.hpp>
|
16
|
+
#include <cybozu/nlp/top_score.hpp>
|
17
|
+
|
18
|
+
namespace cybozu { namespace nlp {
|
19
|
+
|
20
|
+
namespace local {
|
21
|
+
|
22
|
+
template<class os, typename T>
|
23
|
+
os& dump(os& out, const std::vector<T>& list) {
|
24
|
+
out << "{ ";
|
25
|
+
for (typename std::vector<T>::const_iterator i = list.begin(), ie = list.end(); i != ie; ++i) {
|
26
|
+
out << *i << " ";
|
27
|
+
}
|
28
|
+
out << "}";
|
29
|
+
return out;
|
30
|
+
}
|
31
|
+
|
32
|
+
} // local
|
33
|
+
|
34
|
+
//const double NaN = std::numeric_limits<double>::quiet_NaN();
|
35
|
+
|
36
|
+
typedef cybozu::nlp::SparseVector<bool> BoolSVec;
|
37
|
+
typedef cybozu::nlp::SparseVector<double> DoubleSVec;
|
38
|
+
typedef std::vector<BoolSVec> SMatrix;
|
39
|
+
|
40
|
+
template<typename T>
|
41
|
+
bool hasKey(const std::map<T, size_t>& map, T key) { return map.find(key) != map.end(); }
|
42
|
+
|
43
|
+
|
44
|
+
class Plsi {
|
45
|
+
public:
|
46
|
+
typedef int ITEM_TYPE;
|
47
|
+
typedef int USER_TYPE;
|
48
|
+
|
49
|
+
enum SEARCH_TYPE {
|
50
|
+
JOINT,
|
51
|
+
CONDITIONAL,
|
52
|
+
POSTERIOR
|
53
|
+
};
|
54
|
+
private:
|
55
|
+
typedef std::vector<double> DoubleVec;
|
56
|
+
typedef std::vector<DoubleVec> DoubleVecVec;
|
57
|
+
std::map<USER_TYPE, size_t> users_;
|
58
|
+
std::vector<USER_TYPE> userlist_;
|
59
|
+
|
60
|
+
std::map<ITEM_TYPE, size_t> items_;
|
61
|
+
std::vector<ITEM_TYPE> itemlist_;
|
62
|
+
|
63
|
+
SMatrix matrix_; // item => users
|
64
|
+
|
65
|
+
// probability of p(z), p(x|z), p(y|z)
|
66
|
+
DoubleVec z_;
|
67
|
+
DoubleVecVec user_z_, item_z_;
|
68
|
+
|
69
|
+
template<class os>
|
70
|
+
friend os& dump(os& out, const Plsi& x) {
|
71
|
+
out << x.matrix_.size() << std::endl;
|
72
|
+
local::dump(out, x.z_) << std::endl;
|
73
|
+
return out;
|
74
|
+
}
|
75
|
+
|
76
|
+
public:
|
77
|
+
size_t get_item_id(ITEM_TYPE item) {
|
78
|
+
if (hasKey(items_, item)) return items_[item];
|
79
|
+
|
80
|
+
size_t id = items_[item] = itemlist_.size();
|
81
|
+
itemlist_.push_back(item);
|
82
|
+
matrix_.push_back(BoolSVec());
|
83
|
+
return id;
|
84
|
+
}
|
85
|
+
|
86
|
+
BoolSVec& getItem(ITEM_TYPE item) {
|
87
|
+
return matrix_[get_item_id(item)];
|
88
|
+
}
|
89
|
+
|
90
|
+
size_t get_user_id(USER_TYPE user) {
|
91
|
+
if (hasKey(users_, user)) return users_[user];
|
92
|
+
|
93
|
+
size_t id = users_[user] = userlist_.size();
|
94
|
+
userlist_.push_back(user);
|
95
|
+
return id;
|
96
|
+
}
|
97
|
+
|
98
|
+
ITEM_TYPE get_item_key(size_t item_id) {
|
99
|
+
return itemlist_[item_id];
|
100
|
+
}
|
101
|
+
|
102
|
+
/**
|
103
|
+
@brief retrieve relevant items for query user
|
104
|
+
*/
|
105
|
+
cybozu::nlp::TopScore<size_t>::Table search_items(USER_TYPE user, int top = 10) {
|
106
|
+
int K = (int)z_.size();
|
107
|
+
size_t user_id = get_user_id(user);
|
108
|
+
|
109
|
+
double p_x = 0; // p(x) = sum p(z)p(x|z)
|
110
|
+
DoubleVec p_z_x; // p(z|x) = p(z)p(x|z) / p(x)
|
111
|
+
for (int k = 0; k < K; k++) {
|
112
|
+
double p = z_[k] * user_z_[k][user_id];
|
113
|
+
p_x += p;
|
114
|
+
p_z_x.push_back(p);
|
115
|
+
}
|
116
|
+
|
117
|
+
cybozu::nlp::TopScore<size_t> ranking(top);
|
118
|
+
for (size_t item_id = 0; item_id < items_.size(); item_id++) {
|
119
|
+
double score = 0; // p(y|x) = sum _z p(y|z) * p(z|x)
|
120
|
+
for (int k = 0; k < K; k++) {
|
121
|
+
score += item_z_[k][item_id] * p_z_x[k];
|
122
|
+
}
|
123
|
+
ranking.add(score / p_x, item_id);
|
124
|
+
}
|
125
|
+
return ranking.getTable();
|
126
|
+
}
|
127
|
+
|
128
|
+
/**
|
129
|
+
@brief retrieve similar items for query item
|
130
|
+
*/
|
131
|
+
cybozu::nlp::TopScore<size_t>::Table similar_items(ITEM_TYPE item, SEARCH_TYPE search_type, int top=10) {
|
132
|
+
int K = (int)z_.size();
|
133
|
+
size_t target_item_id = get_item_id(item);
|
134
|
+
|
135
|
+
cybozu::nlp::TopScore<size_t> ranking(top);
|
136
|
+
if (search_type == POSTERIOR) {
|
137
|
+
for (size_t item_id = 0; item_id < items_.size(); item_id++) {
|
138
|
+
// p(y1=target|y2=item_id) = sum _z p(target|z) * p(item_id|z) * p(z) / p(item_id)
|
139
|
+
double score = 0, p_y = 0;
|
140
|
+
for(int k=0;k<K;++k) {
|
141
|
+
double p = item_z_[k][item_id] * z_[k];
|
142
|
+
p_y += p;
|
143
|
+
score += item_z_[k][target_item_id] * p;
|
144
|
+
}
|
145
|
+
|
146
|
+
ranking.add(score / p_y, item_id);
|
147
|
+
}
|
148
|
+
|
149
|
+
} else if (search_type == CONDITIONAL) {
|
150
|
+
double p_y = 0; // p(y=target) = sum p(z)p(y=target|z)
|
151
|
+
DoubleVec p_z_y; // p(z)p(y=target|z)
|
152
|
+
for (int k = 0; k < K; k++) {
|
153
|
+
double p = z_[k] * item_z_[k][target_item_id];
|
154
|
+
p_y += p;
|
155
|
+
p_z_y.push_back(p);
|
156
|
+
}
|
157
|
+
for (size_t item_id = 0; item_id < items_.size(); item_id++) {
|
158
|
+
// p(y1=item_id|y2=target) = sum _z p(y1|z) * p(z|y2) = sum _z p(y1|z) * p(y2|z) * p(z) / p(y2)
|
159
|
+
double score = 0;
|
160
|
+
for (int k = 0; k < K; k++) {
|
161
|
+
score += item_z_[k][item_id] * p_z_y[k];
|
162
|
+
}
|
163
|
+
|
164
|
+
ranking.add(score / p_y, item_id);
|
165
|
+
}
|
166
|
+
|
167
|
+
} else if (search_type == JOINT) {
|
168
|
+
for (size_t item_id = 0; item_id < items_.size(); item_id++) {
|
169
|
+
// p(y1=item_id, y2=i) = sum _z p(y1|z) * p(y2|z) * p(z)
|
170
|
+
double score = 0;
|
171
|
+
for (int k = 0; k < K; k++) {
|
172
|
+
score += item_z_[k][item_id] * item_z_[k][target_item_id] * z_[k];
|
173
|
+
}
|
174
|
+
ranking.add(score, item_id);
|
175
|
+
}
|
176
|
+
}
|
177
|
+
return ranking.getTable();
|
178
|
+
}
|
179
|
+
|
180
|
+
/**
|
181
|
+
@brief calcurate perplexity
|
182
|
+
*/
|
183
|
+
double perplexity()
|
184
|
+
{
|
185
|
+
int K = (int)z_.size();
|
186
|
+
|
187
|
+
// p(x) = sum p(z)p(x|z)
|
188
|
+
DoubleVec p_x;
|
189
|
+
for (size_t user_id = 0; user_id < users_.size(); user_id++) {
|
190
|
+
double p = 0;
|
191
|
+
for (int k = 0; k < K; k++) {
|
192
|
+
p += z_[k] * user_z_[k][user_id];
|
193
|
+
}
|
194
|
+
p_x.push_back(p);
|
195
|
+
}
|
196
|
+
|
197
|
+
int denom = 0;
|
198
|
+
double sum = 0;
|
199
|
+
for (size_t item_id = 0; item_id < matrix_.size(); item_id++) {
|
200
|
+
BoolSVec& item_users = matrix_[item_id];
|
201
|
+
for (BoolSVec::const_iterator i = item_users.begin(), ie = item_users.end(); i != ie; ++i) {
|
202
|
+
++denom;
|
203
|
+
size_t user_id = i.pos();
|
204
|
+
|
205
|
+
// p(y|x) = sum p(y|z)p(z|x) = sum p(y|z)p(x|z)p(z)/p(x)
|
206
|
+
double p = 0;
|
207
|
+
for (int k = 0; k < K; k++) {
|
208
|
+
p += z_[k] * user_z_[k][user_id] * item_z_[k][item_id];
|
209
|
+
}
|
210
|
+
sum += log(p / p_x[user_id]);
|
211
|
+
}
|
212
|
+
}
|
213
|
+
return exp(-sum/denom);
|
214
|
+
}
|
215
|
+
|
216
|
+
/**
|
217
|
+
@brief start learning (initialize learning)
|
218
|
+
*/
|
219
|
+
void startLearning(int K)
|
220
|
+
{
|
221
|
+
size_t M = users_.size();
|
222
|
+
size_t N = items_.size();
|
223
|
+
user_z_.resize(K);
|
224
|
+
item_z_.resize(K);
|
225
|
+
cybozu::nlp::UniformRandomGenerator rand(0.25, 0.75);
|
226
|
+
for (int k = 0; k < K; k++) {
|
227
|
+
// initialize p(z=k)
|
228
|
+
z_.push_back(1.0/K);
|
229
|
+
|
230
|
+
// initialize p(x=user|z=k)
|
231
|
+
DoubleVec& uvec = user_z_[k];
|
232
|
+
for (size_t j = 0; j < M; j++) uvec.push_back(1.0/M);
|
233
|
+
|
234
|
+
// initialize p(y=item|z=k)
|
235
|
+
DoubleVec& ivec = item_z_[k];
|
236
|
+
double s = 0;
|
237
|
+
for (size_t j = 0; j < N; j++) {
|
238
|
+
double r = rand.getDouble();
|
239
|
+
ivec.push_back(r);
|
240
|
+
s += r;
|
241
|
+
}
|
242
|
+
for(size_t j = 0; j < N; j++) ivec[j] /= s;
|
243
|
+
}
|
244
|
+
|
245
|
+
}
|
246
|
+
|
247
|
+
/**
|
248
|
+
@brief step learning (called repeatedly after initialization learning)
|
249
|
+
@param[in] beta temperature for tempered EM
|
250
|
+
@return likelyhood for previous iteration
|
251
|
+
*/
|
252
|
+
double step(double beta = 1)
|
253
|
+
{
|
254
|
+
int K = (int)z_.size();
|
255
|
+
|
256
|
+
DoubleVec z_numer;
|
257
|
+
DoubleVecVec user_numer, item_numer;
|
258
|
+
z_numer.resize(K);
|
259
|
+
user_numer.resize(K);
|
260
|
+
item_numer.resize(K);
|
261
|
+
for (int k = 0; k < K; k++) {
|
262
|
+
user_numer[k].resize(users_.size());
|
263
|
+
item_numer[k].resize(items_.size());
|
264
|
+
}
|
265
|
+
int denom = 0;
|
266
|
+
double likelihood = 0;
|
267
|
+
DoubleVec p_z_xy;
|
268
|
+
p_z_xy.resize(K);
|
269
|
+
|
270
|
+
for (size_t item_id = 0; item_id < matrix_.size(); ++item_id) {
|
271
|
+
BoolSVec& item_users = matrix_[item_id];
|
272
|
+
for (BoolSVec::const_iterator i = item_users.begin(), ie = item_users.end(); i != ie; ++i) {
|
273
|
+
// when n(x, y) = 1(true)
|
274
|
+
++denom;
|
275
|
+
size_t user_id = i.pos();
|
276
|
+
|
277
|
+
// E-step: p(z|x,y)
|
278
|
+
double sum = 0;
|
279
|
+
for (int k = 0; k < K; k++) {
|
280
|
+
// p(z=k)p(x=user_id|z=k)p(y=item_id|z=k)
|
281
|
+
double p = pow(z_[k] * user_z_[k][user_id] * item_z_[k][item_id], beta);
|
282
|
+
p_z_xy[k] = p;
|
283
|
+
sum += p;
|
284
|
+
}
|
285
|
+
|
286
|
+
// normalize & M-step
|
287
|
+
for (int k = 0; k < K; k++) {
|
288
|
+
double p = p_z_xy[k] / sum;
|
289
|
+
|
290
|
+
user_numer[k][user_id] += p;
|
291
|
+
item_numer[k][item_id] += p;
|
292
|
+
z_numer[k] += p;
|
293
|
+
}
|
294
|
+
likelihood += log(sum);
|
295
|
+
}
|
296
|
+
}
|
297
|
+
|
298
|
+
// M-step: update
|
299
|
+
for (int k = 0; k < K; k++) {
|
300
|
+
double z_num = z_numer[k];
|
301
|
+
z_[k] = z_num / denom;
|
302
|
+
for (size_t item_id = 0; item_id < items_.size(); ++item_id) {
|
303
|
+
item_z_[k][item_id] = item_numer[k][item_id] / z_num;
|
304
|
+
}
|
305
|
+
for (size_t user_id = 0; user_id < users_.size(); ++user_id) {
|
306
|
+
user_z_[k][user_id] = user_numer[k][user_id] / z_num;
|
307
|
+
}
|
308
|
+
}
|
309
|
+
|
310
|
+
// log-likelihood of previous iteration
|
311
|
+
return likelihood;
|
312
|
+
}
|
313
|
+
};
|
314
|
+
|
315
|
+
} } // cybozu::nlp
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#pragma once
|
2
|
+
/**
|
3
|
+
@file
|
4
|
+
@brief normal random generator
|
5
|
+
|
6
|
+
@author MITSUNARI Shigeo(@herumi)
|
7
|
+
@author MITSUNARI Shigeo
|
8
|
+
*/
|
9
|
+
#include <cybozu/xorshift.hpp>
|
10
|
+
|
11
|
+
namespace cybozu { namespace nlp {
|
12
|
+
|
13
|
+
/*
|
14
|
+
use xor shift
|
15
|
+
*/
|
16
|
+
class UniformRandomGenerator {
|
17
|
+
double a_;
|
18
|
+
double b_;
|
19
|
+
cybozu::XorShift rg;
|
20
|
+
public:
|
21
|
+
/* generate uniform random value in [a, b) */
|
22
|
+
explicit UniformRandomGenerator(double a = 0, double b = 1, int seed = 0)
|
23
|
+
: a_(a)
|
24
|
+
, b_(b)
|
25
|
+
, rg(seed)
|
26
|
+
{
|
27
|
+
}
|
28
|
+
void init(int seed = 0)
|
29
|
+
{
|
30
|
+
rg.init(seed);
|
31
|
+
}
|
32
|
+
/* [0, 2^32) random number */
|
33
|
+
uint32_t operator()() { return rg.get32(); }
|
34
|
+
uint32_t get32() { return rg.get32(); }
|
35
|
+
uint64_t get64() { return rg.get64(); }
|
36
|
+
/* [a, b) random number */
|
37
|
+
double getDouble()
|
38
|
+
{
|
39
|
+
uint32_t x = get32() >> 5;
|
40
|
+
uint32_t y = get32() >> 6;
|
41
|
+
double z = (x * double(1U << 26) + y) * (1.0 / double(1LL << 53));
|
42
|
+
return (b_ - a_) * z + a_;
|
43
|
+
}
|
44
|
+
};
|
45
|
+
|
46
|
+
/*
|
47
|
+
normal random generator
|
48
|
+
*/
|
49
|
+
class NormalRandomGenerator {
|
50
|
+
UniformRandomGenerator gen_;
|
51
|
+
double u_;
|
52
|
+
double s_;
|
53
|
+
public:
|
54
|
+
explicit NormalRandomGenerator(double u = 0, double s = 1, int seed = 0)
|
55
|
+
: gen_(seed)
|
56
|
+
, u_(u)
|
57
|
+
, s_(s)
|
58
|
+
{
|
59
|
+
}
|
60
|
+
void init(int seed = 0)
|
61
|
+
{
|
62
|
+
gen_.init(seed);
|
63
|
+
}
|
64
|
+
double get()
|
65
|
+
{
|
66
|
+
double sum = -6;
|
67
|
+
for (int i = 0; i < 12; i++) {
|
68
|
+
sum += gen_.getDouble();
|
69
|
+
}
|
70
|
+
return sum * s_ + u_;
|
71
|
+
}
|
72
|
+
};
|
73
|
+
|
74
|
+
} } // cybozu::nlp
|