ooxml_crypt 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (264) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +58 -0
  5. data/Rakefile +12 -0
  6. data/bin/console +15 -0
  7. data/bin/setup +8 -0
  8. data/ext/ooxml_crypt/extconf.rb +18 -0
  9. data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
  10. data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
  11. data/lib/ooxml_crypt/version.rb +5 -0
  12. data/lib/ooxml_crypt.rb +75 -0
  13. data/vendor/cybozulib/.github/workflows/main.yml +12 -0
  14. data/vendor/cybozulib/.gitignore +5 -0
  15. data/vendor/cybozulib/CMakeLists.txt +6 -0
  16. data/vendor/cybozulib/COPYRIGHT +27 -0
  17. data/vendor/cybozulib/Makefile +26 -0
  18. data/vendor/cybozulib/bin/libeay32.dll +0 -0
  19. data/vendor/cybozulib/bin/libmecab.dll +0 -0
  20. data/vendor/cybozulib/bin/ssleay32.dll +0 -0
  21. data/vendor/cybozulib/common.mk +116 -0
  22. data/vendor/cybozulib/common.props +25 -0
  23. data/vendor/cybozulib/cybozulib.sln +286 -0
  24. data/vendor/cybozulib/debug.props +14 -0
  25. data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
  26. data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
  27. data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
  28. data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
  29. data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
  30. data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
  31. data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
  32. data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
  33. data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
  34. data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
  35. data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
  36. data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
  37. data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
  38. data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
  39. data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
  40. data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
  41. data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
  42. data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
  43. data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
  44. data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
  45. data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
  46. data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
  47. data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
  48. data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
  49. data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
  50. data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
  51. data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
  52. data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
  53. data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
  54. data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
  55. data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
  56. data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
  57. data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
  58. data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
  59. data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
  60. data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
  61. data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
  62. data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
  63. data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
  64. data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
  65. data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
  66. data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
  67. data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
  68. data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
  69. data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
  70. data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
  71. data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
  72. data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
  73. data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
  74. data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
  75. data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
  76. data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
  77. data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
  78. data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
  79. data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
  80. data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
  81. data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
  82. data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
  83. data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
  84. data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
  85. data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
  86. data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
  87. data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
  88. data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
  89. data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
  90. data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
  91. data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
  92. data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
  93. data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
  94. data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
  95. data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
  96. data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
  97. data/vendor/cybozulib/include/sais.hxx +364 -0
  98. data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
  99. data/vendor/cybozulib/mk.bat +37 -0
  100. data/vendor/cybozulib/readme.md +29 -0
  101. data/vendor/cybozulib/release.props +12 -0
  102. data/vendor/cybozulib/sample/Makefile +30 -0
  103. data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
  104. data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
  105. data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
  106. data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
  107. data/vendor/cybozulib/sample/data/svd/test1 +4 -0
  108. data/vendor/cybozulib/sample/data/svd/test2 +4 -0
  109. data/vendor/cybozulib/sample/desymbol.cpp +127 -0
  110. data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
  111. data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
  112. data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
  113. data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
  114. data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
  115. data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
  116. data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
  117. data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
  118. data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
  119. data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
  120. data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
  121. data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
  122. data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
  123. data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
  124. data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
  125. data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
  126. data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
  127. data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
  128. data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
  129. data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
  130. data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
  131. data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
  132. data/vendor/cybozulib/src/Makefile +8 -0
  133. data/vendor/cybozulib/src/base/Makefile +19 -0
  134. data/vendor/cybozulib/test/Makefile +12 -0
  135. data/vendor/cybozulib/test/base/Makefile +37 -0
  136. data/vendor/cybozulib/test/base/array_test.cpp +173 -0
  137. data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
  138. data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
  139. data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
  140. data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
  141. data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
  142. data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
  143. data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
  144. data/vendor/cybozulib/test/base/config_test.cpp +236 -0
  145. data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
  146. data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
  147. data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
  148. data/vendor/cybozulib/test/base/data/a.xml +26 -0
  149. data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
  150. data/vendor/cybozulib/test/base/env_test.cpp +22 -0
  151. data/vendor/cybozulib/test/base/event_test.cpp +41 -0
  152. data/vendor/cybozulib/test/base/file_test.cpp +233 -0
  153. data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
  154. data/vendor/cybozulib/test/base/format_test.cpp +12 -0
  155. data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
  156. data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
  157. data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
  158. data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
  159. data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
  160. data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
  161. data/vendor/cybozulib/test/base/option_test.cpp +487 -0
  162. data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
  163. data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
  164. data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
  165. data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
  166. data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
  167. data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
  168. data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
  169. data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
  170. data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
  171. data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
  172. data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
  173. data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
  174. data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
  175. data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
  176. data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
  177. data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
  178. data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
  179. data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
  180. data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
  181. data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
  182. data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
  183. data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
  184. data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
  185. data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
  186. data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
  187. data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
  188. data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
  189. data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
  190. data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
  191. data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
  192. data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
  193. data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
  194. data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
  195. data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
  196. data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
  197. data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
  198. data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
  199. data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
  200. data/vendor/cybozulib/test/base/time_test.cpp +164 -0
  201. data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
  202. data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
  203. data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
  204. data/vendor/cybozulib/test/nlp/Makefile +27 -0
  205. data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
  206. data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
  207. data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
  208. data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
  209. data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
  210. data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
  211. data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
  212. data/vendor/cybozulib/tool/create_vcproj.py +186 -0
  213. data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
  214. data/vendor/msoffice/COPYRIGHT +27 -0
  215. data/vendor/msoffice/Makefile +29 -0
  216. data/vendor/msoffice/bin/64/msoc.dll +0 -0
  217. data/vendor/msoffice/bin/64/msocsample.exe +0 -0
  218. data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
  219. data/vendor/msoffice/bin/msoc.dll +0 -0
  220. data/vendor/msoffice/bin/msocsample.exe +0 -0
  221. data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
  222. data/vendor/msoffice/common.mk +71 -0
  223. data/vendor/msoffice/common.props +26 -0
  224. data/vendor/msoffice/debug.props +14 -0
  225. data/vendor/msoffice/include/attack.hpp +211 -0
  226. data/vendor/msoffice/include/cfb.hpp +777 -0
  227. data/vendor/msoffice/include/crypto_util.hpp +450 -0
  228. data/vendor/msoffice/include/custom_sha1.hpp +342 -0
  229. data/vendor/msoffice/include/decode.hpp +240 -0
  230. data/vendor/msoffice/include/encode.hpp +221 -0
  231. data/vendor/msoffice/include/make_dataspace.hpp +316 -0
  232. data/vendor/msoffice/include/msoc.h +129 -0
  233. data/vendor/msoffice/include/resource.hpp +7 -0
  234. data/vendor/msoffice/include/standard_encryption.hpp +145 -0
  235. data/vendor/msoffice/include/uint32vec.hpp +179 -0
  236. data/vendor/msoffice/include/util.hpp +212 -0
  237. data/vendor/msoffice/lib/.emptydir +0 -0
  238. data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
  239. data/vendor/msoffice/mk.bat +1 -0
  240. data/vendor/msoffice/mkdll.bat +3 -0
  241. data/vendor/msoffice/msoc.def +13 -0
  242. data/vendor/msoffice/msocsample.py +178 -0
  243. data/vendor/msoffice/msoffice12.sln +31 -0
  244. data/vendor/msoffice/readme.md +110 -0
  245. data/vendor/msoffice/release.props +28 -0
  246. data/vendor/msoffice/src/Makefile +19 -0
  247. data/vendor/msoffice/src/attack.cpp +124 -0
  248. data/vendor/msoffice/src/cfb_test.cpp +77 -0
  249. data/vendor/msoffice/src/minisample.c +54 -0
  250. data/vendor/msoffice/src/msocdll.cpp +276 -0
  251. data/vendor/msoffice/src/msocsample.c +136 -0
  252. data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
  253. data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
  254. data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
  255. data/vendor/msoffice/src/sha1.cpp +234 -0
  256. data/vendor/msoffice/test/Makefile +20 -0
  257. data/vendor/msoffice/test/cfb_test.cpp +74 -0
  258. data/vendor/msoffice/test/hash_test.cpp +59 -0
  259. data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
  260. data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
  261. data/vendor/msoffice/test/sampl.bat +8 -0
  262. data/vendor/msoffice/test_all.py +46 -0
  263. data/vendor/update +4 -0
  264. metadata +351 -0
@@ -0,0 +1,144 @@
1
+ #pragma once
2
+ /**
3
+ @file
4
+ @brief mutex
5
+
6
+ @author MITSUNARI Shigeo(@herumi)
7
+ @author MITSUNARI Shigeo
8
+ */
9
+
10
+ #ifdef _WIN32
11
+ #ifndef WIN32_LEAN_AND_MEAN
12
+ #define WIN32_LEAN_AND_MEAN
13
+ #endif
14
+ #include <windows.h>
15
+ #else
16
+ #include <pthread.h>
17
+ #include <time.h>
18
+ #endif
19
+ #include <assert.h>
20
+ #include <stdlib.h>
21
+
22
+ namespace cybozu {
23
+
24
+ class ConditionVariable;
25
+
26
+ namespace thread {
27
+
28
+ #ifdef _WIN32
29
+ typedef HANDLE MutexHandle;
30
+ inline void MutexInit(MutexHandle& mutex)
31
+ {
32
+ // mutex = CreateSemaphore(NULL /* no security */, 1 /* init */, 0x7FFFFFFF /* max */, NULL /* no name */);
33
+ mutex = CreateMutex(NULL /* no security */, FALSE /* no owner */, NULL /* no name */);
34
+ }
35
+ inline void MutexLock(MutexHandle& mutex) { WaitForSingleObject(mutex, INFINITE); }
36
+ /*
37
+ return false if timeout
38
+ @param msec [in] msec
39
+ */
40
+ inline bool MutexLockTimeout(MutexHandle& mutex, int msec)
41
+ {
42
+ DWORD ret = WaitForSingleObject(mutex, msec);
43
+ if (ret == WAIT_OBJECT_0) {
44
+ return true;
45
+ }
46
+ if (ret == WAIT_TIMEOUT) {
47
+ return false;
48
+ }
49
+ /* ret == WAIT_ABANDONED */
50
+ assert(0);
51
+ return false;
52
+ }
53
+ inline void MutexUnlock(MutexHandle& mutex)
54
+ {
55
+ // ReleaseSemaphore(mutex, 1, NULL);
56
+ ReleaseMutex(mutex);
57
+ }
58
+ inline void MutexTerm(MutexHandle& mutex) { CloseHandle(mutex); }
59
+ #else
60
+ typedef pthread_mutex_t MutexHandle;
61
+ inline void MutexInit(MutexHandle& mutex)
62
+ {
63
+ #if 1
64
+ pthread_mutex_init(&mutex, NULL);
65
+ #else
66
+ pthread_mutexattr_t attr;
67
+ pthread_mutexattr_init(&attr);
68
+ if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_TIMED_NP)) {
69
+ perror("pthread_mutexattr_settype");
70
+ exit(1);
71
+ }
72
+ pthread_mutex_init(&mutex, &attr);
73
+ pthread_mutexattr_destroy(&attr);
74
+ #endif
75
+ }
76
+ inline void MutexLock(MutexHandle& mutex) { pthread_mutex_lock(&mutex); }
77
+ #if 0
78
+ inline bool MutexLockTimeout(MutexHandle& mutex, int msec)
79
+ {
80
+ timespec absTime;
81
+ clock_gettime(CLOCK_REALTIME, &absTime);
82
+ absTime.tv_sec += msec / 1000;
83
+ absTime.tv_nsec += msec % 1000;
84
+ bool ret = pthread_mutex_timedlock(&mutex, &absTime) == 0;
85
+ return ret;
86
+ }
87
+ #endif
88
+ inline void MutexUnlock(MutexHandle& mutex) { pthread_mutex_unlock(&mutex); }
89
+ inline void MutexTerm(MutexHandle& mutex) { pthread_mutex_destroy(&mutex); }
90
+ #endif
91
+
92
+ template<class T>
93
+ class AutoLockT {
94
+ public:
95
+ explicit AutoLockT(T &t)
96
+ : t_(t)
97
+ {
98
+ t_.lock();
99
+ }
100
+ ~AutoLockT()
101
+ {
102
+ t_.unlock();
103
+ }
104
+ private:
105
+ T& t_;
106
+ AutoLockT& operator=(const AutoLockT&);
107
+ };
108
+
109
+ } // cybozu::thread
110
+
111
+ class Mutex {
112
+ friend class cybozu::ConditionVariable;
113
+ public:
114
+ Mutex()
115
+ {
116
+ thread::MutexInit(hdl_);
117
+ }
118
+ ~Mutex()
119
+ {
120
+ thread::MutexTerm(hdl_);
121
+ }
122
+ void lock()
123
+ {
124
+ thread::MutexLock(hdl_);
125
+ }
126
+ #if 0
127
+ bool lockTimeout(int msec)
128
+ {
129
+ return thread::MutexLockTimeout(hdl_, msec);
130
+ }
131
+ #endif
132
+ void unlock()
133
+ {
134
+ thread::MutexUnlock(hdl_);
135
+ }
136
+ private:
137
+ Mutex(const Mutex&);
138
+ Mutex& operator=(const Mutex&);
139
+ thread::MutexHandle hdl_;
140
+ };
141
+
142
+ typedef cybozu::thread::AutoLockT<cybozu::Mutex> AutoLock;
143
+
144
+ } // cybozu
@@ -0,0 +1,96 @@
1
+ #pragma once
2
+ /**
3
+ @file
4
+ @brief wrapper of MeCab
5
+
6
+ @author MITSUNARI Shigeo(@herumi)
7
+ */
8
+ #include <string>
9
+ #include <assert.h>
10
+ #ifdef _WIN32
11
+ #include <winsock2.h>
12
+ #endif
13
+ #include "mecab.h"
14
+ #include <cybozu/exception.hpp>
15
+ #ifdef _WIN32
16
+ #pragma comment(lib, "libmecab.lib")
17
+ #endif
18
+
19
+ namespace cybozu { namespace nlp {
20
+
21
+ struct Mecab {
22
+ Mecab(const char *option = "-O wakati")
23
+ : tagger_(MeCab::createTagger(option))
24
+ , node_(0)
25
+ {
26
+ if (tagger_ == 0) {
27
+ throw cybozu::Exception("nlp:mecab:createTagger");
28
+ }
29
+ }
30
+ /**
31
+ T must have push_back(std::string)
32
+ */
33
+ template<class T>
34
+ bool parse(T& out, const char *str, size_t strLen = 0)
35
+ {
36
+ if (strLen == 0) {
37
+ strLen = strlen(str);
38
+ }
39
+ const char *p = tagger_->parse(str, strLen);
40
+ if (p == 0) return false;
41
+ while (*p) {
42
+ if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') {
43
+ p++;
44
+ continue;
45
+ }
46
+ const char *q = strchr(p, ' ');
47
+ if (q == 0) {
48
+ out.push_back(p);
49
+ break;
50
+ }
51
+ out.push_back(std::string(p, q));
52
+ p = q + 1;
53
+ }
54
+ return true;
55
+ }
56
+ void set(const char *str, size_t strLen = 0)
57
+ {
58
+ if (strLen == 0) {
59
+ strLen = strlen(str);
60
+ }
61
+ node_ = tagger_->parseToNode(str, strLen);
62
+ }
63
+ void set(const std::string& str)
64
+ {
65
+ set(&str[0], str.size());
66
+ }
67
+ bool isEnd() const
68
+ {
69
+ if (node_ == 0) return true;
70
+ return node_->stat == MECAB_EOS_NODE;
71
+ }
72
+ const char *getPos() const { return node_->surface; }
73
+ size_t getSize() const { return node_->length; }
74
+ /* adhoc */
75
+ bool isNoun() const
76
+ {
77
+ assert(node_);
78
+ const char *p = node_->feature;
79
+ if (node_->length < 2) return false;
80
+ return p[0] == '\xE5' && p[1] == '\x90' && p[2] == '\x8D';
81
+ }
82
+ void next()
83
+ {
84
+ assert(node_);
85
+ node_ = node_->next;
86
+ }
87
+ ~Mecab()
88
+ {
89
+ delete tagger_;
90
+ }
91
+ private:
92
+ MeCab::Tagger *tagger_;
93
+ const MeCab::Node *node_;
94
+ };
95
+
96
+ } } // cybozu::nlp
@@ -0,0 +1,315 @@
1
+ #pragma once
2
+ /**
3
+ @file
4
+ @brief pLSI
5
+ @author MITSUNARI Shigeo(@herumi)
6
+ */
7
+
8
+ #include <fstream>
9
+ #include <map>
10
+ #include <limits>
11
+ #include <math.h>
12
+ #include <cybozu/string_operation.hpp>
13
+ #include <cybozu/time.hpp>
14
+ #include <cybozu/nlp/random.hpp>
15
+ #include <cybozu/nlp/sparse.hpp>
16
+ #include <cybozu/nlp/top_score.hpp>
17
+
18
+ namespace cybozu { namespace nlp {
19
+
20
+ namespace local {
21
+
22
+ template<class os, typename T>
23
+ os& dump(os& out, const std::vector<T>& list) {
24
+ out << "{ ";
25
+ for (typename std::vector<T>::const_iterator i = list.begin(), ie = list.end(); i != ie; ++i) {
26
+ out << *i << " ";
27
+ }
28
+ out << "}";
29
+ return out;
30
+ }
31
+
32
+ } // local
33
+
34
+ //const double NaN = std::numeric_limits<double>::quiet_NaN();
35
+
36
+ typedef cybozu::nlp::SparseVector<bool> BoolSVec;
37
+ typedef cybozu::nlp::SparseVector<double> DoubleSVec;
38
+ typedef std::vector<BoolSVec> SMatrix;
39
+
40
+ template<typename T>
41
+ bool hasKey(const std::map<T, size_t>& map, T key) { return map.find(key) != map.end(); }
42
+
43
+
44
+ class Plsi {
45
+ public:
46
+ typedef int ITEM_TYPE;
47
+ typedef int USER_TYPE;
48
+
49
+ enum SEARCH_TYPE {
50
+ JOINT,
51
+ CONDITIONAL,
52
+ POSTERIOR
53
+ };
54
+ private:
55
+ typedef std::vector<double> DoubleVec;
56
+ typedef std::vector<DoubleVec> DoubleVecVec;
57
+ std::map<USER_TYPE, size_t> users_;
58
+ std::vector<USER_TYPE> userlist_;
59
+
60
+ std::map<ITEM_TYPE, size_t> items_;
61
+ std::vector<ITEM_TYPE> itemlist_;
62
+
63
+ SMatrix matrix_; // item => users
64
+
65
+ // probability of p(z), p(x|z), p(y|z)
66
+ DoubleVec z_;
67
+ DoubleVecVec user_z_, item_z_;
68
+
69
+ template<class os>
70
+ friend os& dump(os& out, const Plsi& x) {
71
+ out << x.matrix_.size() << std::endl;
72
+ local::dump(out, x.z_) << std::endl;
73
+ return out;
74
+ }
75
+
76
+ public:
77
+ size_t get_item_id(ITEM_TYPE item) {
78
+ if (hasKey(items_, item)) return items_[item];
79
+
80
+ size_t id = items_[item] = itemlist_.size();
81
+ itemlist_.push_back(item);
82
+ matrix_.push_back(BoolSVec());
83
+ return id;
84
+ }
85
+
86
+ BoolSVec& getItem(ITEM_TYPE item) {
87
+ return matrix_[get_item_id(item)];
88
+ }
89
+
90
+ size_t get_user_id(USER_TYPE user) {
91
+ if (hasKey(users_, user)) return users_[user];
92
+
93
+ size_t id = users_[user] = userlist_.size();
94
+ userlist_.push_back(user);
95
+ return id;
96
+ }
97
+
98
+ ITEM_TYPE get_item_key(size_t item_id) {
99
+ return itemlist_[item_id];
100
+ }
101
+
102
+ /**
103
+ @brief retrieve relevant items for query user
104
+ */
105
+ cybozu::nlp::TopScore<size_t>::Table search_items(USER_TYPE user, int top = 10) {
106
+ int K = (int)z_.size();
107
+ size_t user_id = get_user_id(user);
108
+
109
+ double p_x = 0; // p(x) = sum p(z)p(x|z)
110
+ DoubleVec p_z_x; // p(z|x) = p(z)p(x|z) / p(x)
111
+ for (int k = 0; k < K; k++) {
112
+ double p = z_[k] * user_z_[k][user_id];
113
+ p_x += p;
114
+ p_z_x.push_back(p);
115
+ }
116
+
117
+ cybozu::nlp::TopScore<size_t> ranking(top);
118
+ for (size_t item_id = 0; item_id < items_.size(); item_id++) {
119
+ double score = 0; // p(y|x) = sum _z p(y|z) * p(z|x)
120
+ for (int k = 0; k < K; k++) {
121
+ score += item_z_[k][item_id] * p_z_x[k];
122
+ }
123
+ ranking.add(score / p_x, item_id);
124
+ }
125
+ return ranking.getTable();
126
+ }
127
+
128
+ /**
129
+ @brief retrieve similar items for query item
130
+ */
131
+ cybozu::nlp::TopScore<size_t>::Table similar_items(ITEM_TYPE item, SEARCH_TYPE search_type, int top=10) {
132
+ int K = (int)z_.size();
133
+ size_t target_item_id = get_item_id(item);
134
+
135
+ cybozu::nlp::TopScore<size_t> ranking(top);
136
+ if (search_type == POSTERIOR) {
137
+ for (size_t item_id = 0; item_id < items_.size(); item_id++) {
138
+ // p(y1=target|y2=item_id) = sum _z p(target|z) * p(item_id|z) * p(z) / p(item_id)
139
+ double score = 0, p_y = 0;
140
+ for(int k=0;k<K;++k) {
141
+ double p = item_z_[k][item_id] * z_[k];
142
+ p_y += p;
143
+ score += item_z_[k][target_item_id] * p;
144
+ }
145
+
146
+ ranking.add(score / p_y, item_id);
147
+ }
148
+
149
+ } else if (search_type == CONDITIONAL) {
150
+ double p_y = 0; // p(y=target) = sum p(z)p(y=target|z)
151
+ DoubleVec p_z_y; // p(z)p(y=target|z)
152
+ for (int k = 0; k < K; k++) {
153
+ double p = z_[k] * item_z_[k][target_item_id];
154
+ p_y += p;
155
+ p_z_y.push_back(p);
156
+ }
157
+ for (size_t item_id = 0; item_id < items_.size(); item_id++) {
158
+ // p(y1=item_id|y2=target) = sum _z p(y1|z) * p(z|y2) = sum _z p(y1|z) * p(y2|z) * p(z) / p(y2)
159
+ double score = 0;
160
+ for (int k = 0; k < K; k++) {
161
+ score += item_z_[k][item_id] * p_z_y[k];
162
+ }
163
+
164
+ ranking.add(score / p_y, item_id);
165
+ }
166
+
167
+ } else if (search_type == JOINT) {
168
+ for (size_t item_id = 0; item_id < items_.size(); item_id++) {
169
+ // p(y1=item_id, y2=i) = sum _z p(y1|z) * p(y2|z) * p(z)
170
+ double score = 0;
171
+ for (int k = 0; k < K; k++) {
172
+ score += item_z_[k][item_id] * item_z_[k][target_item_id] * z_[k];
173
+ }
174
+ ranking.add(score, item_id);
175
+ }
176
+ }
177
+ return ranking.getTable();
178
+ }
179
+
180
+ /**
181
+ @brief calcurate perplexity
182
+ */
183
+ double perplexity()
184
+ {
185
+ int K = (int)z_.size();
186
+
187
+ // p(x) = sum p(z)p(x|z)
188
+ DoubleVec p_x;
189
+ for (size_t user_id = 0; user_id < users_.size(); user_id++) {
190
+ double p = 0;
191
+ for (int k = 0; k < K; k++) {
192
+ p += z_[k] * user_z_[k][user_id];
193
+ }
194
+ p_x.push_back(p);
195
+ }
196
+
197
+ int denom = 0;
198
+ double sum = 0;
199
+ for (size_t item_id = 0; item_id < matrix_.size(); item_id++) {
200
+ BoolSVec& item_users = matrix_[item_id];
201
+ for (BoolSVec::const_iterator i = item_users.begin(), ie = item_users.end(); i != ie; ++i) {
202
+ ++denom;
203
+ size_t user_id = i.pos();
204
+
205
+ // p(y|x) = sum p(y|z)p(z|x) = sum p(y|z)p(x|z)p(z)/p(x)
206
+ double p = 0;
207
+ for (int k = 0; k < K; k++) {
208
+ p += z_[k] * user_z_[k][user_id] * item_z_[k][item_id];
209
+ }
210
+ sum += log(p / p_x[user_id]);
211
+ }
212
+ }
213
+ return exp(-sum/denom);
214
+ }
215
+
216
+ /**
217
+ @brief start learning (initialize learning)
218
+ */
219
+ void startLearning(int K)
220
+ {
221
+ size_t M = users_.size();
222
+ size_t N = items_.size();
223
+ user_z_.resize(K);
224
+ item_z_.resize(K);
225
+ cybozu::nlp::UniformRandomGenerator rand(0.25, 0.75);
226
+ for (int k = 0; k < K; k++) {
227
+ // initialize p(z=k)
228
+ z_.push_back(1.0/K);
229
+
230
+ // initialize p(x=user|z=k)
231
+ DoubleVec& uvec = user_z_[k];
232
+ for (size_t j = 0; j < M; j++) uvec.push_back(1.0/M);
233
+
234
+ // initialize p(y=item|z=k)
235
+ DoubleVec& ivec = item_z_[k];
236
+ double s = 0;
237
+ for (size_t j = 0; j < N; j++) {
238
+ double r = rand.getDouble();
239
+ ivec.push_back(r);
240
+ s += r;
241
+ }
242
+ for(size_t j = 0; j < N; j++) ivec[j] /= s;
243
+ }
244
+
245
+ }
246
+
247
+ /**
248
+ @brief step learning (called repeatedly after initialization learning)
249
+ @param[in] beta temperature for tempered EM
250
+ @return likelyhood for previous iteration
251
+ */
252
+ double step(double beta = 1)
253
+ {
254
+ int K = (int)z_.size();
255
+
256
+ DoubleVec z_numer;
257
+ DoubleVecVec user_numer, item_numer;
258
+ z_numer.resize(K);
259
+ user_numer.resize(K);
260
+ item_numer.resize(K);
261
+ for (int k = 0; k < K; k++) {
262
+ user_numer[k].resize(users_.size());
263
+ item_numer[k].resize(items_.size());
264
+ }
265
+ int denom = 0;
266
+ double likelihood = 0;
267
+ DoubleVec p_z_xy;
268
+ p_z_xy.resize(K);
269
+
270
+ for (size_t item_id = 0; item_id < matrix_.size(); ++item_id) {
271
+ BoolSVec& item_users = matrix_[item_id];
272
+ for (BoolSVec::const_iterator i = item_users.begin(), ie = item_users.end(); i != ie; ++i) {
273
+ // when n(x, y) = 1(true)
274
+ ++denom;
275
+ size_t user_id = i.pos();
276
+
277
+ // E-step: p(z|x,y)
278
+ double sum = 0;
279
+ for (int k = 0; k < K; k++) {
280
+ // p(z=k)p(x=user_id|z=k)p(y=item_id|z=k)
281
+ double p = pow(z_[k] * user_z_[k][user_id] * item_z_[k][item_id], beta);
282
+ p_z_xy[k] = p;
283
+ sum += p;
284
+ }
285
+
286
+ // normalize & M-step
287
+ for (int k = 0; k < K; k++) {
288
+ double p = p_z_xy[k] / sum;
289
+
290
+ user_numer[k][user_id] += p;
291
+ item_numer[k][item_id] += p;
292
+ z_numer[k] += p;
293
+ }
294
+ likelihood += log(sum);
295
+ }
296
+ }
297
+
298
+ // M-step: update
299
+ for (int k = 0; k < K; k++) {
300
+ double z_num = z_numer[k];
301
+ z_[k] = z_num / denom;
302
+ for (size_t item_id = 0; item_id < items_.size(); ++item_id) {
303
+ item_z_[k][item_id] = item_numer[k][item_id] / z_num;
304
+ }
305
+ for (size_t user_id = 0; user_id < users_.size(); ++user_id) {
306
+ user_z_[k][user_id] = user_numer[k][user_id] / z_num;
307
+ }
308
+ }
309
+
310
+ // log-likelihood of previous iteration
311
+ return likelihood;
312
+ }
313
+ };
314
+
315
+ } } // cybozu::nlp
@@ -0,0 +1,74 @@
1
+ #pragma once
2
+ /**
3
+ @file
4
+ @brief normal random generator
5
+
6
+ @author MITSUNARI Shigeo(@herumi)
7
+ @author MITSUNARI Shigeo
8
+ */
9
+ #include <cybozu/xorshift.hpp>
10
+
11
+ namespace cybozu { namespace nlp {
12
+
13
+ /*
14
+ use xor shift
15
+ */
16
+ class UniformRandomGenerator {
17
+ double a_;
18
+ double b_;
19
+ cybozu::XorShift rg;
20
+ public:
21
+ /* generate uniform random value in [a, b) */
22
+ explicit UniformRandomGenerator(double a = 0, double b = 1, int seed = 0)
23
+ : a_(a)
24
+ , b_(b)
25
+ , rg(seed)
26
+ {
27
+ }
28
+ void init(int seed = 0)
29
+ {
30
+ rg.init(seed);
31
+ }
32
+ /* [0, 2^32) random number */
33
+ uint32_t operator()() { return rg.get32(); }
34
+ uint32_t get32() { return rg.get32(); }
35
+ uint64_t get64() { return rg.get64(); }
36
+ /* [a, b) random number */
37
+ double getDouble()
38
+ {
39
+ uint32_t x = get32() >> 5;
40
+ uint32_t y = get32() >> 6;
41
+ double z = (x * double(1U << 26) + y) * (1.0 / double(1LL << 53));
42
+ return (b_ - a_) * z + a_;
43
+ }
44
+ };
45
+
46
+ /*
47
+ normal random generator
48
+ */
49
+ class NormalRandomGenerator {
50
+ UniformRandomGenerator gen_;
51
+ double u_;
52
+ double s_;
53
+ public:
54
+ explicit NormalRandomGenerator(double u = 0, double s = 1, int seed = 0)
55
+ : gen_(seed)
56
+ , u_(u)
57
+ , s_(s)
58
+ {
59
+ }
60
+ void init(int seed = 0)
61
+ {
62
+ gen_.init(seed);
63
+ }
64
+ double get()
65
+ {
66
+ double sum = -6;
67
+ for (int i = 0; i < 12; i++) {
68
+ sum += gen_.getDouble();
69
+ }
70
+ return sum * s_ + u_;
71
+ }
72
+ };
73
+
74
+ } } // cybozu::nlp