ooxml_crypt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +58 -0
  5. data/Rakefile +12 -0
  6. data/bin/console +15 -0
  7. data/bin/setup +8 -0
  8. data/ext/ooxml_crypt/extconf.rb +18 -0
  9. data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
  10. data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
  11. data/lib/ooxml_crypt/version.rb +5 -0
  12. data/lib/ooxml_crypt.rb +75 -0
  13. data/vendor/cybozulib/.github/workflows/main.yml +12 -0
  14. data/vendor/cybozulib/.gitignore +5 -0
  15. data/vendor/cybozulib/CMakeLists.txt +6 -0
  16. data/vendor/cybozulib/COPYRIGHT +27 -0
  17. data/vendor/cybozulib/Makefile +26 -0
  18. data/vendor/cybozulib/bin/libeay32.dll +0 -0
  19. data/vendor/cybozulib/bin/libmecab.dll +0 -0
  20. data/vendor/cybozulib/bin/ssleay32.dll +0 -0
  21. data/vendor/cybozulib/common.mk +116 -0
  22. data/vendor/cybozulib/common.props +25 -0
  23. data/vendor/cybozulib/cybozulib.sln +286 -0
  24. data/vendor/cybozulib/debug.props +14 -0
  25. data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
  26. data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
  27. data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
  28. data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
  29. data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
  30. data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
  31. data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
  32. data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
  33. data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
  34. data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
  35. data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
  36. data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
  37. data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
  38. data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
  39. data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
  40. data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
  41. data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
  42. data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
  43. data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
  44. data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
  45. data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
  46. data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
  47. data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
  48. data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
  49. data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
  50. data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
  51. data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
  52. data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
  53. data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
  54. data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
  55. data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
  56. data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
  57. data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
  58. data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
  59. data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
  60. data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
  61. data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
  62. data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
  63. data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
  64. data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
  65. data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
  66. data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
  67. data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
  68. data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
  69. data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
  70. data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
  71. data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
  72. data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
  73. data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
  74. data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
  75. data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
  76. data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
  77. data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
  78. data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
  79. data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
  80. data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
  81. data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
  82. data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
  83. data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
  84. data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
  85. data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
  86. data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
  87. data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
  88. data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
  89. data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
  90. data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
  91. data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
  92. data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
  93. data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
  94. data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
  95. data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
  96. data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
  97. data/vendor/cybozulib/include/sais.hxx +364 -0
  98. data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
  99. data/vendor/cybozulib/mk.bat +37 -0
  100. data/vendor/cybozulib/readme.md +29 -0
  101. data/vendor/cybozulib/release.props +12 -0
  102. data/vendor/cybozulib/sample/Makefile +30 -0
  103. data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
  104. data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
  105. data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
  106. data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
  107. data/vendor/cybozulib/sample/data/svd/test1 +4 -0
  108. data/vendor/cybozulib/sample/data/svd/test2 +4 -0
  109. data/vendor/cybozulib/sample/desymbol.cpp +127 -0
  110. data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
  111. data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
  112. data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
  113. data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
  114. data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
  115. data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
  116. data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
  117. data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
  118. data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
  119. data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
  120. data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
  121. data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
  122. data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
  123. data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
  124. data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
  125. data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
  126. data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
  127. data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
  128. data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
  129. data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
  130. data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
  131. data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
  132. data/vendor/cybozulib/src/Makefile +8 -0
  133. data/vendor/cybozulib/src/base/Makefile +19 -0
  134. data/vendor/cybozulib/test/Makefile +12 -0
  135. data/vendor/cybozulib/test/base/Makefile +37 -0
  136. data/vendor/cybozulib/test/base/array_test.cpp +173 -0
  137. data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
  138. data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
  139. data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
  140. data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
  141. data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
  142. data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
  143. data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
  144. data/vendor/cybozulib/test/base/config_test.cpp +236 -0
  145. data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
  146. data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
  147. data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
  148. data/vendor/cybozulib/test/base/data/a.xml +26 -0
  149. data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
  150. data/vendor/cybozulib/test/base/env_test.cpp +22 -0
  151. data/vendor/cybozulib/test/base/event_test.cpp +41 -0
  152. data/vendor/cybozulib/test/base/file_test.cpp +233 -0
  153. data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
  154. data/vendor/cybozulib/test/base/format_test.cpp +12 -0
  155. data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
  156. data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
  157. data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
  158. data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
  159. data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
  160. data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
  161. data/vendor/cybozulib/test/base/option_test.cpp +487 -0
  162. data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
  163. data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
  164. data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
  165. data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
  166. data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
  167. data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
  168. data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
  169. data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
  170. data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
  171. data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
  172. data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
  173. data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
  174. data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
  175. data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
  176. data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
  177. data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
  178. data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
  179. data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
  180. data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
  181. data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
  182. data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
  183. data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
  184. data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
  185. data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
  186. data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
  187. data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
  188. data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
  189. data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
  190. data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
  191. data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
  192. data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
  193. data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
  194. data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
  195. data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
  196. data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
  197. data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
  198. data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
  199. data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
  200. data/vendor/cybozulib/test/base/time_test.cpp +164 -0
  201. data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
  202. data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
  203. data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
  204. data/vendor/cybozulib/test/nlp/Makefile +27 -0
  205. data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
  206. data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
  207. data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
  208. data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
  209. data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
  210. data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
  211. data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
  212. data/vendor/cybozulib/tool/create_vcproj.py +186 -0
  213. data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
  214. data/vendor/msoffice/COPYRIGHT +27 -0
  215. data/vendor/msoffice/Makefile +29 -0
  216. data/vendor/msoffice/bin/64/msoc.dll +0 -0
  217. data/vendor/msoffice/bin/64/msocsample.exe +0 -0
  218. data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
  219. data/vendor/msoffice/bin/msoc.dll +0 -0
  220. data/vendor/msoffice/bin/msocsample.exe +0 -0
  221. data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
  222. data/vendor/msoffice/common.mk +71 -0
  223. data/vendor/msoffice/common.props +26 -0
  224. data/vendor/msoffice/debug.props +14 -0
  225. data/vendor/msoffice/include/attack.hpp +211 -0
  226. data/vendor/msoffice/include/cfb.hpp +777 -0
  227. data/vendor/msoffice/include/crypto_util.hpp +450 -0
  228. data/vendor/msoffice/include/custom_sha1.hpp +342 -0
  229. data/vendor/msoffice/include/decode.hpp +240 -0
  230. data/vendor/msoffice/include/encode.hpp +221 -0
  231. data/vendor/msoffice/include/make_dataspace.hpp +316 -0
  232. data/vendor/msoffice/include/msoc.h +129 -0
  233. data/vendor/msoffice/include/resource.hpp +7 -0
  234. data/vendor/msoffice/include/standard_encryption.hpp +145 -0
  235. data/vendor/msoffice/include/uint32vec.hpp +179 -0
  236. data/vendor/msoffice/include/util.hpp +212 -0
  237. data/vendor/msoffice/lib/.emptydir +0 -0
  238. data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
  239. data/vendor/msoffice/mk.bat +1 -0
  240. data/vendor/msoffice/mkdll.bat +3 -0
  241. data/vendor/msoffice/msoc.def +13 -0
  242. data/vendor/msoffice/msocsample.py +178 -0
  243. data/vendor/msoffice/msoffice12.sln +31 -0
  244. data/vendor/msoffice/readme.md +110 -0
  245. data/vendor/msoffice/release.props +28 -0
  246. data/vendor/msoffice/src/Makefile +19 -0
  247. data/vendor/msoffice/src/attack.cpp +124 -0
  248. data/vendor/msoffice/src/cfb_test.cpp +77 -0
  249. data/vendor/msoffice/src/minisample.c +54 -0
  250. data/vendor/msoffice/src/msocdll.cpp +276 -0
  251. data/vendor/msoffice/src/msocsample.c +136 -0
  252. data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
  253. data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
  254. data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
  255. data/vendor/msoffice/src/sha1.cpp +234 -0
  256. data/vendor/msoffice/test/Makefile +20 -0
  257. data/vendor/msoffice/test/cfb_test.cpp +74 -0
  258. data/vendor/msoffice/test/hash_test.cpp +59 -0
  259. data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
  260. data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
  261. data/vendor/msoffice/test/sampl.bat +8 -0
  262. data/vendor/msoffice/test_all.py +46 -0
  263. data/vendor/update +4 -0
  264. metadata +351 -0
@@ -0,0 +1,144 @@
1
+ #pragma once
2
+ /**
3
+ @file
4
+ @brief mutex
5
+
6
+ @author MITSUNARI Shigeo(@herumi)
7
+ @author MITSUNARI Shigeo
8
+ */
9
+
10
+ #ifdef _WIN32
11
+ #ifndef WIN32_LEAN_AND_MEAN
12
+ #define WIN32_LEAN_AND_MEAN
13
+ #endif
14
+ #include <windows.h>
15
+ #else
16
+ #include <pthread.h>
17
+ #include <time.h>
18
+ #endif
19
+ #include <assert.h>
20
+ #include <stdlib.h>
21
+
22
+ namespace cybozu {
23
+
24
+ class ConditionVariable;
25
+
26
+ namespace thread {
27
+
28
+ #ifdef _WIN32
29
+ typedef HANDLE MutexHandle;
30
+ inline void MutexInit(MutexHandle& mutex)
31
+ {
32
+ // mutex = CreateSemaphore(NULL /* no security */, 1 /* init */, 0x7FFFFFFF /* max */, NULL /* no name */);
33
+ mutex = CreateMutex(NULL /* no security */, FALSE /* no owner */, NULL /* no name */);
34
+ }
35
+ inline void MutexLock(MutexHandle& mutex) { WaitForSingleObject(mutex, INFINITE); }
36
+ /*
37
+ return false if timeout
38
+ @param msec [in] msec
39
+ */
40
+ inline bool MutexLockTimeout(MutexHandle& mutex, int msec)
41
+ {
42
+ DWORD ret = WaitForSingleObject(mutex, msec);
43
+ if (ret == WAIT_OBJECT_0) {
44
+ return true;
45
+ }
46
+ if (ret == WAIT_TIMEOUT) {
47
+ return false;
48
+ }
49
+ /* ret == WAIT_ABANDONED */
50
+ assert(0);
51
+ return false;
52
+ }
53
+ inline void MutexUnlock(MutexHandle& mutex)
54
+ {
55
+ // ReleaseSemaphore(mutex, 1, NULL);
56
+ ReleaseMutex(mutex);
57
+ }
58
+ inline void MutexTerm(MutexHandle& mutex) { CloseHandle(mutex); }
59
+ #else
60
+ typedef pthread_mutex_t MutexHandle;
61
+ inline void MutexInit(MutexHandle& mutex)
62
+ {
63
+ #if 1
64
+ pthread_mutex_init(&mutex, NULL);
65
+ #else
66
+ pthread_mutexattr_t attr;
67
+ pthread_mutexattr_init(&attr);
68
+ if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_TIMED_NP)) {
69
+ perror("pthread_mutexattr_settype");
70
+ exit(1);
71
+ }
72
+ pthread_mutex_init(&mutex, &attr);
73
+ pthread_mutexattr_destroy(&attr);
74
+ #endif
75
+ }
76
+ inline void MutexLock(MutexHandle& mutex) { pthread_mutex_lock(&mutex); }
77
+ #if 0
78
+ inline bool MutexLockTimeout(MutexHandle& mutex, int msec)
79
+ {
80
+ timespec absTime;
81
+ clock_gettime(CLOCK_REALTIME, &absTime);
82
+ absTime.tv_sec += msec / 1000;
83
+ absTime.tv_nsec += msec % 1000;
84
+ bool ret = pthread_mutex_timedlock(&mutex, &absTime) == 0;
85
+ return ret;
86
+ }
87
+ #endif
88
+ inline void MutexUnlock(MutexHandle& mutex) { pthread_mutex_unlock(&mutex); }
89
+ inline void MutexTerm(MutexHandle& mutex) { pthread_mutex_destroy(&mutex); }
90
+ #endif
91
+
92
+ template<class T>
93
+ class AutoLockT {
94
+ public:
95
+ explicit AutoLockT(T &t)
96
+ : t_(t)
97
+ {
98
+ t_.lock();
99
+ }
100
+ ~AutoLockT()
101
+ {
102
+ t_.unlock();
103
+ }
104
+ private:
105
+ T& t_;
106
+ AutoLockT& operator=(const AutoLockT&);
107
+ };
108
+
109
+ } // cybozu::thread
110
+
111
+ class Mutex {
112
+ friend class cybozu::ConditionVariable;
113
+ public:
114
+ Mutex()
115
+ {
116
+ thread::MutexInit(hdl_);
117
+ }
118
+ ~Mutex()
119
+ {
120
+ thread::MutexTerm(hdl_);
121
+ }
122
+ void lock()
123
+ {
124
+ thread::MutexLock(hdl_);
125
+ }
126
+ #if 0
127
+ bool lockTimeout(int msec)
128
+ {
129
+ return thread::MutexLockTimeout(hdl_, msec);
130
+ }
131
+ #endif
132
+ void unlock()
133
+ {
134
+ thread::MutexUnlock(hdl_);
135
+ }
136
+ private:
137
+ Mutex(const Mutex&);
138
+ Mutex& operator=(const Mutex&);
139
+ thread::MutexHandle hdl_;
140
+ };
141
+
142
+ typedef cybozu::thread::AutoLockT<cybozu::Mutex> AutoLock;
143
+
144
+ } // cybozu
@@ -0,0 +1,96 @@
1
+ #pragma once
2
+ /**
3
+ @file
4
+ @brief wrapper of MeCab
5
+
6
+ @author MITSUNARI Shigeo(@herumi)
7
+ */
8
+ #include <string>
9
+ #include <assert.h>
10
+ #ifdef _WIN32
11
+ #include <winsock2.h>
12
+ #endif
13
+ #include "mecab.h"
14
+ #include <cybozu/exception.hpp>
15
+ #ifdef _WIN32
16
+ #pragma comment(lib, "libmecab.lib")
17
+ #endif
18
+
19
+ namespace cybozu { namespace nlp {
20
+
21
+ struct Mecab {
22
+ Mecab(const char *option = "-O wakati")
23
+ : tagger_(MeCab::createTagger(option))
24
+ , node_(0)
25
+ {
26
+ if (tagger_ == 0) {
27
+ throw cybozu::Exception("nlp:mecab:createTagger");
28
+ }
29
+ }
30
+ /**
31
+ T must have push_back(std::string)
32
+ */
33
+ template<class T>
34
+ bool parse(T& out, const char *str, size_t strLen = 0)
35
+ {
36
+ if (strLen == 0) {
37
+ strLen = strlen(str);
38
+ }
39
+ const char *p = tagger_->parse(str, strLen);
40
+ if (p == 0) return false;
41
+ while (*p) {
42
+ if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') {
43
+ p++;
44
+ continue;
45
+ }
46
+ const char *q = strchr(p, ' ');
47
+ if (q == 0) {
48
+ out.push_back(p);
49
+ break;
50
+ }
51
+ out.push_back(std::string(p, q));
52
+ p = q + 1;
53
+ }
54
+ return true;
55
+ }
56
+ void set(const char *str, size_t strLen = 0)
57
+ {
58
+ if (strLen == 0) {
59
+ strLen = strlen(str);
60
+ }
61
+ node_ = tagger_->parseToNode(str, strLen);
62
+ }
63
+ void set(const std::string& str)
64
+ {
65
+ set(&str[0], str.size());
66
+ }
67
+ bool isEnd() const
68
+ {
69
+ if (node_ == 0) return true;
70
+ return node_->stat == MECAB_EOS_NODE;
71
+ }
72
+ const char *getPos() const { return node_->surface; }
73
+ size_t getSize() const { return node_->length; }
74
+ /* adhoc */
75
+ bool isNoun() const
76
+ {
77
+ assert(node_);
78
+ const char *p = node_->feature;
79
+ if (node_->length < 2) return false;
80
+ return p[0] == '\xE5' && p[1] == '\x90' && p[2] == '\x8D';
81
+ }
82
+ void next()
83
+ {
84
+ assert(node_);
85
+ node_ = node_->next;
86
+ }
87
+ ~Mecab()
88
+ {
89
+ delete tagger_;
90
+ }
91
+ private:
92
+ MeCab::Tagger *tagger_;
93
+ const MeCab::Node *node_;
94
+ };
95
+
96
+ } } // cybozu::nlp
@@ -0,0 +1,315 @@
1
+ #pragma once
2
+ /**
3
+ @file
4
+ @brief pLSI
5
+ @author MITSUNARI Shigeo(@herumi)
6
+ */
7
+
8
+ #include <fstream>
9
+ #include <map>
10
+ #include <limits>
11
+ #include <math.h>
12
+ #include <cybozu/string_operation.hpp>
13
+ #include <cybozu/time.hpp>
14
+ #include <cybozu/nlp/random.hpp>
15
+ #include <cybozu/nlp/sparse.hpp>
16
+ #include <cybozu/nlp/top_score.hpp>
17
+
18
+ namespace cybozu { namespace nlp {
19
+
20
+ namespace local {
21
+
22
+ template<class os, typename T>
23
+ os& dump(os& out, const std::vector<T>& list) {
24
+ out << "{ ";
25
+ for (typename std::vector<T>::const_iterator i = list.begin(), ie = list.end(); i != ie; ++i) {
26
+ out << *i << " ";
27
+ }
28
+ out << "}";
29
+ return out;
30
+ }
31
+
32
+ } // local
33
+
34
+ //const double NaN = std::numeric_limits<double>::quiet_NaN();
35
+
36
+ typedef cybozu::nlp::SparseVector<bool> BoolSVec;
37
+ typedef cybozu::nlp::SparseVector<double> DoubleSVec;
38
+ typedef std::vector<BoolSVec> SMatrix;
39
+
40
+ template<typename T>
41
+ bool hasKey(const std::map<T, size_t>& map, T key) { return map.find(key) != map.end(); }
42
+
43
+
44
+ class Plsi {
45
+ public:
46
+ typedef int ITEM_TYPE;
47
+ typedef int USER_TYPE;
48
+
49
+ enum SEARCH_TYPE {
50
+ JOINT,
51
+ CONDITIONAL,
52
+ POSTERIOR
53
+ };
54
+ private:
55
+ typedef std::vector<double> DoubleVec;
56
+ typedef std::vector<DoubleVec> DoubleVecVec;
57
+ std::map<USER_TYPE, size_t> users_;
58
+ std::vector<USER_TYPE> userlist_;
59
+
60
+ std::map<ITEM_TYPE, size_t> items_;
61
+ std::vector<ITEM_TYPE> itemlist_;
62
+
63
+ SMatrix matrix_; // item => users
64
+
65
+ // probability of p(z), p(x|z), p(y|z)
66
+ DoubleVec z_;
67
+ DoubleVecVec user_z_, item_z_;
68
+
69
+ template<class os>
70
+ friend os& dump(os& out, const Plsi& x) {
71
+ out << x.matrix_.size() << std::endl;
72
+ local::dump(out, x.z_) << std::endl;
73
+ return out;
74
+ }
75
+
76
+ public:
77
+ size_t get_item_id(ITEM_TYPE item) {
78
+ if (hasKey(items_, item)) return items_[item];
79
+
80
+ size_t id = items_[item] = itemlist_.size();
81
+ itemlist_.push_back(item);
82
+ matrix_.push_back(BoolSVec());
83
+ return id;
84
+ }
85
+
86
+ BoolSVec& getItem(ITEM_TYPE item) {
87
+ return matrix_[get_item_id(item)];
88
+ }
89
+
90
+ size_t get_user_id(USER_TYPE user) {
91
+ if (hasKey(users_, user)) return users_[user];
92
+
93
+ size_t id = users_[user] = userlist_.size();
94
+ userlist_.push_back(user);
95
+ return id;
96
+ }
97
+
98
+ ITEM_TYPE get_item_key(size_t item_id) {
99
+ return itemlist_[item_id];
100
+ }
101
+
102
+ /**
103
+ @brief retrieve relevant items for query user
104
+ */
105
+ cybozu::nlp::TopScore<size_t>::Table search_items(USER_TYPE user, int top = 10) {
106
+ int K = (int)z_.size();
107
+ size_t user_id = get_user_id(user);
108
+
109
+ double p_x = 0; // p(x) = sum p(z)p(x|z)
110
+ DoubleVec p_z_x; // p(z|x) = p(z)p(x|z) / p(x)
111
+ for (int k = 0; k < K; k++) {
112
+ double p = z_[k] * user_z_[k][user_id];
113
+ p_x += p;
114
+ p_z_x.push_back(p);
115
+ }
116
+
117
+ cybozu::nlp::TopScore<size_t> ranking(top);
118
+ for (size_t item_id = 0; item_id < items_.size(); item_id++) {
119
+ double score = 0; // p(y|x) = sum _z p(y|z) * p(z|x)
120
+ for (int k = 0; k < K; k++) {
121
+ score += item_z_[k][item_id] * p_z_x[k];
122
+ }
123
+ ranking.add(score / p_x, item_id);
124
+ }
125
+ return ranking.getTable();
126
+ }
127
+
128
+ /**
129
+ @brief retrieve similar items for query item
130
+ */
131
+ cybozu::nlp::TopScore<size_t>::Table similar_items(ITEM_TYPE item, SEARCH_TYPE search_type, int top=10) {
132
+ int K = (int)z_.size();
133
+ size_t target_item_id = get_item_id(item);
134
+
135
+ cybozu::nlp::TopScore<size_t> ranking(top);
136
+ if (search_type == POSTERIOR) {
137
+ for (size_t item_id = 0; item_id < items_.size(); item_id++) {
138
+ // p(y1=target|y2=item_id) = sum _z p(target|z) * p(item_id|z) * p(z) / p(item_id)
139
+ double score = 0, p_y = 0;
140
+ for(int k=0;k<K;++k) {
141
+ double p = item_z_[k][item_id] * z_[k];
142
+ p_y += p;
143
+ score += item_z_[k][target_item_id] * p;
144
+ }
145
+
146
+ ranking.add(score / p_y, item_id);
147
+ }
148
+
149
+ } else if (search_type == CONDITIONAL) {
150
+ double p_y = 0; // p(y=target) = sum p(z)p(y=target|z)
151
+ DoubleVec p_z_y; // p(z)p(y=target|z)
152
+ for (int k = 0; k < K; k++) {
153
+ double p = z_[k] * item_z_[k][target_item_id];
154
+ p_y += p;
155
+ p_z_y.push_back(p);
156
+ }
157
+ for (size_t item_id = 0; item_id < items_.size(); item_id++) {
158
+ // p(y1=item_id|y2=target) = sum _z p(y1|z) * p(z|y2) = sum _z p(y1|z) * p(y2|z) * p(z) / p(y2)
159
+ double score = 0;
160
+ for (int k = 0; k < K; k++) {
161
+ score += item_z_[k][item_id] * p_z_y[k];
162
+ }
163
+
164
+ ranking.add(score / p_y, item_id);
165
+ }
166
+
167
+ } else if (search_type == JOINT) {
168
+ for (size_t item_id = 0; item_id < items_.size(); item_id++) {
169
+ // p(y1=item_id, y2=i) = sum _z p(y1|z) * p(y2|z) * p(z)
170
+ double score = 0;
171
+ for (int k = 0; k < K; k++) {
172
+ score += item_z_[k][item_id] * item_z_[k][target_item_id] * z_[k];
173
+ }
174
+ ranking.add(score, item_id);
175
+ }
176
+ }
177
+ return ranking.getTable();
178
+ }
179
+
180
+ /**
181
+ @brief calcurate perplexity
182
+ */
183
+ double perplexity()
184
+ {
185
+ int K = (int)z_.size();
186
+
187
+ // p(x) = sum p(z)p(x|z)
188
+ DoubleVec p_x;
189
+ for (size_t user_id = 0; user_id < users_.size(); user_id++) {
190
+ double p = 0;
191
+ for (int k = 0; k < K; k++) {
192
+ p += z_[k] * user_z_[k][user_id];
193
+ }
194
+ p_x.push_back(p);
195
+ }
196
+
197
+ int denom = 0;
198
+ double sum = 0;
199
+ for (size_t item_id = 0; item_id < matrix_.size(); item_id++) {
200
+ BoolSVec& item_users = matrix_[item_id];
201
+ for (BoolSVec::const_iterator i = item_users.begin(), ie = item_users.end(); i != ie; ++i) {
202
+ ++denom;
203
+ size_t user_id = i.pos();
204
+
205
+ // p(y|x) = sum p(y|z)p(z|x) = sum p(y|z)p(x|z)p(z)/p(x)
206
+ double p = 0;
207
+ for (int k = 0; k < K; k++) {
208
+ p += z_[k] * user_z_[k][user_id] * item_z_[k][item_id];
209
+ }
210
+ sum += log(p / p_x[user_id]);
211
+ }
212
+ }
213
+ return exp(-sum/denom);
214
+ }
215
+
216
+ /**
217
+ @brief start learning (initialize learning)
218
+ */
219
+ void startLearning(int K)
220
+ {
221
+ size_t M = users_.size();
222
+ size_t N = items_.size();
223
+ user_z_.resize(K);
224
+ item_z_.resize(K);
225
+ cybozu::nlp::UniformRandomGenerator rand(0.25, 0.75);
226
+ for (int k = 0; k < K; k++) {
227
+ // initialize p(z=k)
228
+ z_.push_back(1.0/K);
229
+
230
+ // initialize p(x=user|z=k)
231
+ DoubleVec& uvec = user_z_[k];
232
+ for (size_t j = 0; j < M; j++) uvec.push_back(1.0/M);
233
+
234
+ // initialize p(y=item|z=k)
235
+ DoubleVec& ivec = item_z_[k];
236
+ double s = 0;
237
+ for (size_t j = 0; j < N; j++) {
238
+ double r = rand.getDouble();
239
+ ivec.push_back(r);
240
+ s += r;
241
+ }
242
+ for(size_t j = 0; j < N; j++) ivec[j] /= s;
243
+ }
244
+
245
+ }
246
+
247
+ /**
248
+ @brief step learning (called repeatedly after initialization learning)
249
+ @param[in] beta temperature for tempered EM
250
+ @return likelyhood for previous iteration
251
+ */
252
+ double step(double beta = 1)
253
+ {
254
+ int K = (int)z_.size();
255
+
256
+ DoubleVec z_numer;
257
+ DoubleVecVec user_numer, item_numer;
258
+ z_numer.resize(K);
259
+ user_numer.resize(K);
260
+ item_numer.resize(K);
261
+ for (int k = 0; k < K; k++) {
262
+ user_numer[k].resize(users_.size());
263
+ item_numer[k].resize(items_.size());
264
+ }
265
+ int denom = 0;
266
+ double likelihood = 0;
267
+ DoubleVec p_z_xy;
268
+ p_z_xy.resize(K);
269
+
270
+ for (size_t item_id = 0; item_id < matrix_.size(); ++item_id) {
271
+ BoolSVec& item_users = matrix_[item_id];
272
+ for (BoolSVec::const_iterator i = item_users.begin(), ie = item_users.end(); i != ie; ++i) {
273
+ // when n(x, y) = 1(true)
274
+ ++denom;
275
+ size_t user_id = i.pos();
276
+
277
+ // E-step: p(z|x,y)
278
+ double sum = 0;
279
+ for (int k = 0; k < K; k++) {
280
+ // p(z=k)p(x=user_id|z=k)p(y=item_id|z=k)
281
+ double p = pow(z_[k] * user_z_[k][user_id] * item_z_[k][item_id], beta);
282
+ p_z_xy[k] = p;
283
+ sum += p;
284
+ }
285
+
286
+ // normalize & M-step
287
+ for (int k = 0; k < K; k++) {
288
+ double p = p_z_xy[k] / sum;
289
+
290
+ user_numer[k][user_id] += p;
291
+ item_numer[k][item_id] += p;
292
+ z_numer[k] += p;
293
+ }
294
+ likelihood += log(sum);
295
+ }
296
+ }
297
+
298
+ // M-step: update
299
+ for (int k = 0; k < K; k++) {
300
+ double z_num = z_numer[k];
301
+ z_[k] = z_num / denom;
302
+ for (size_t item_id = 0; item_id < items_.size(); ++item_id) {
303
+ item_z_[k][item_id] = item_numer[k][item_id] / z_num;
304
+ }
305
+ for (size_t user_id = 0; user_id < users_.size(); ++user_id) {
306
+ user_z_[k][user_id] = user_numer[k][user_id] / z_num;
307
+ }
308
+ }
309
+
310
+ // log-likelihood of previous iteration
311
+ return likelihood;
312
+ }
313
+ };
314
+
315
+ } } // cybozu::nlp
@@ -0,0 +1,74 @@
1
+ #pragma once
2
+ /**
3
+ @file
4
+ @brief normal random generator
5
+
6
+ @author MITSUNARI Shigeo(@herumi)
7
+ @author MITSUNARI Shigeo
8
+ */
9
+ #include <cybozu/xorshift.hpp>
10
+
11
+ namespace cybozu { namespace nlp {
12
+
13
+ /*
14
+ use xor shift
15
+ */
16
+ class UniformRandomGenerator {
17
+ double a_;
18
+ double b_;
19
+ cybozu::XorShift rg;
20
+ public:
21
+ /* generate uniform random value in [a, b) */
22
+ explicit UniformRandomGenerator(double a = 0, double b = 1, int seed = 0)
23
+ : a_(a)
24
+ , b_(b)
25
+ , rg(seed)
26
+ {
27
+ }
28
+ void init(int seed = 0)
29
+ {
30
+ rg.init(seed);
31
+ }
32
+ /* [0, 2^32) random number */
33
+ uint32_t operator()() { return rg.get32(); }
34
+ uint32_t get32() { return rg.get32(); }
35
+ uint64_t get64() { return rg.get64(); }
36
+ /* [a, b) random number */
37
+ double getDouble()
38
+ {
39
+ uint32_t x = get32() >> 5;
40
+ uint32_t y = get32() >> 6;
41
+ double z = (x * double(1U << 26) + y) * (1.0 / double(1LL << 53));
42
+ return (b_ - a_) * z + a_;
43
+ }
44
+ };
45
+
46
+ /*
47
+ normal random generator
48
+ */
49
+ class NormalRandomGenerator {
50
+ UniformRandomGenerator gen_;
51
+ double u_;
52
+ double s_;
53
+ public:
54
+ explicit NormalRandomGenerator(double u = 0, double s = 1, int seed = 0)
55
+ : gen_(seed)
56
+ , u_(u)
57
+ , s_(s)
58
+ {
59
+ }
60
+ void init(int seed = 0)
61
+ {
62
+ gen_.init(seed);
63
+ }
64
+ double get()
65
+ {
66
+ double sum = -6;
67
+ for (int i = 0; i < 12; i++) {
68
+ sum += gen_.getDouble();
69
+ }
70
+ return sum * s_ + u_;
71
+ }
72
+ };
73
+
74
+ } } // cybozu::nlp