ooxml_crypt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +58 -0
  5. data/Rakefile +12 -0
  6. data/bin/console +15 -0
  7. data/bin/setup +8 -0
  8. data/ext/ooxml_crypt/extconf.rb +18 -0
  9. data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
  10. data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
  11. data/lib/ooxml_crypt/version.rb +5 -0
  12. data/lib/ooxml_crypt.rb +75 -0
  13. data/vendor/cybozulib/.github/workflows/main.yml +12 -0
  14. data/vendor/cybozulib/.gitignore +5 -0
  15. data/vendor/cybozulib/CMakeLists.txt +6 -0
  16. data/vendor/cybozulib/COPYRIGHT +27 -0
  17. data/vendor/cybozulib/Makefile +26 -0
  18. data/vendor/cybozulib/bin/libeay32.dll +0 -0
  19. data/vendor/cybozulib/bin/libmecab.dll +0 -0
  20. data/vendor/cybozulib/bin/ssleay32.dll +0 -0
  21. data/vendor/cybozulib/common.mk +116 -0
  22. data/vendor/cybozulib/common.props +25 -0
  23. data/vendor/cybozulib/cybozulib.sln +286 -0
  24. data/vendor/cybozulib/debug.props +14 -0
  25. data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
  26. data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
  27. data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
  28. data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
  29. data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
  30. data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
  31. data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
  32. data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
  33. data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
  34. data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
  35. data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
  36. data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
  37. data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
  38. data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
  39. data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
  40. data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
  41. data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
  42. data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
  43. data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
  44. data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
  45. data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
  46. data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
  47. data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
  48. data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
  49. data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
  50. data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
  51. data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
  52. data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
  53. data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
  54. data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
  55. data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
  56. data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
  57. data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
  58. data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
  59. data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
  60. data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
  61. data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
  62. data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
  63. data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
  64. data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
  65. data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
  66. data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
  67. data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
  68. data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
  69. data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
  70. data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
  71. data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
  72. data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
  73. data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
  74. data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
  75. data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
  76. data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
  77. data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
  78. data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
  79. data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
  80. data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
  81. data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
  82. data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
  83. data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
  84. data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
  85. data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
  86. data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
  87. data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
  88. data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
  89. data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
  90. data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
  91. data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
  92. data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
  93. data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
  94. data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
  95. data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
  96. data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
  97. data/vendor/cybozulib/include/sais.hxx +364 -0
  98. data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
  99. data/vendor/cybozulib/mk.bat +37 -0
  100. data/vendor/cybozulib/readme.md +29 -0
  101. data/vendor/cybozulib/release.props +12 -0
  102. data/vendor/cybozulib/sample/Makefile +30 -0
  103. data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
  104. data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
  105. data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
  106. data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
  107. data/vendor/cybozulib/sample/data/svd/test1 +4 -0
  108. data/vendor/cybozulib/sample/data/svd/test2 +4 -0
  109. data/vendor/cybozulib/sample/desymbol.cpp +127 -0
  110. data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
  111. data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
  112. data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
  113. data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
  114. data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
  115. data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
  116. data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
  117. data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
  118. data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
  119. data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
  120. data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
  121. data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
  122. data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
  123. data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
  124. data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
  125. data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
  126. data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
  127. data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
  128. data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
  129. data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
  130. data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
  131. data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
  132. data/vendor/cybozulib/src/Makefile +8 -0
  133. data/vendor/cybozulib/src/base/Makefile +19 -0
  134. data/vendor/cybozulib/test/Makefile +12 -0
  135. data/vendor/cybozulib/test/base/Makefile +37 -0
  136. data/vendor/cybozulib/test/base/array_test.cpp +173 -0
  137. data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
  138. data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
  139. data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
  140. data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
  141. data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
  142. data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
  143. data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
  144. data/vendor/cybozulib/test/base/config_test.cpp +236 -0
  145. data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
  146. data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
  147. data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
  148. data/vendor/cybozulib/test/base/data/a.xml +26 -0
  149. data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
  150. data/vendor/cybozulib/test/base/env_test.cpp +22 -0
  151. data/vendor/cybozulib/test/base/event_test.cpp +41 -0
  152. data/vendor/cybozulib/test/base/file_test.cpp +233 -0
  153. data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
  154. data/vendor/cybozulib/test/base/format_test.cpp +12 -0
  155. data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
  156. data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
  157. data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
  158. data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
  159. data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
  160. data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
  161. data/vendor/cybozulib/test/base/option_test.cpp +487 -0
  162. data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
  163. data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
  164. data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
  165. data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
  166. data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
  167. data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
  168. data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
  169. data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
  170. data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
  171. data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
  172. data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
  173. data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
  174. data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
  175. data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
  176. data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
  177. data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
  178. data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
  179. data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
  180. data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
  181. data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
  182. data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
  183. data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
  184. data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
  185. data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
  186. data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
  187. data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
  188. data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
  189. data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
  190. data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
  191. data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
  192. data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
  193. data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
  194. data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
  195. data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
  196. data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
  197. data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
  198. data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
  199. data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
  200. data/vendor/cybozulib/test/base/time_test.cpp +164 -0
  201. data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
  202. data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
  203. data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
  204. data/vendor/cybozulib/test/nlp/Makefile +27 -0
  205. data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
  206. data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
  207. data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
  208. data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
  209. data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
  210. data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
  211. data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
  212. data/vendor/cybozulib/tool/create_vcproj.py +186 -0
  213. data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
  214. data/vendor/msoffice/COPYRIGHT +27 -0
  215. data/vendor/msoffice/Makefile +29 -0
  216. data/vendor/msoffice/bin/64/msoc.dll +0 -0
  217. data/vendor/msoffice/bin/64/msocsample.exe +0 -0
  218. data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
  219. data/vendor/msoffice/bin/msoc.dll +0 -0
  220. data/vendor/msoffice/bin/msocsample.exe +0 -0
  221. data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
  222. data/vendor/msoffice/common.mk +71 -0
  223. data/vendor/msoffice/common.props +26 -0
  224. data/vendor/msoffice/debug.props +14 -0
  225. data/vendor/msoffice/include/attack.hpp +211 -0
  226. data/vendor/msoffice/include/cfb.hpp +777 -0
  227. data/vendor/msoffice/include/crypto_util.hpp +450 -0
  228. data/vendor/msoffice/include/custom_sha1.hpp +342 -0
  229. data/vendor/msoffice/include/decode.hpp +240 -0
  230. data/vendor/msoffice/include/encode.hpp +221 -0
  231. data/vendor/msoffice/include/make_dataspace.hpp +316 -0
  232. data/vendor/msoffice/include/msoc.h +129 -0
  233. data/vendor/msoffice/include/resource.hpp +7 -0
  234. data/vendor/msoffice/include/standard_encryption.hpp +145 -0
  235. data/vendor/msoffice/include/uint32vec.hpp +179 -0
  236. data/vendor/msoffice/include/util.hpp +212 -0
  237. data/vendor/msoffice/lib/.emptydir +0 -0
  238. data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
  239. data/vendor/msoffice/mk.bat +1 -0
  240. data/vendor/msoffice/mkdll.bat +3 -0
  241. data/vendor/msoffice/msoc.def +13 -0
  242. data/vendor/msoffice/msocsample.py +178 -0
  243. data/vendor/msoffice/msoffice12.sln +31 -0
  244. data/vendor/msoffice/readme.md +110 -0
  245. data/vendor/msoffice/release.props +28 -0
  246. data/vendor/msoffice/src/Makefile +19 -0
  247. data/vendor/msoffice/src/attack.cpp +124 -0
  248. data/vendor/msoffice/src/cfb_test.cpp +77 -0
  249. data/vendor/msoffice/src/minisample.c +54 -0
  250. data/vendor/msoffice/src/msocdll.cpp +276 -0
  251. data/vendor/msoffice/src/msocsample.c +136 -0
  252. data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
  253. data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
  254. data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
  255. data/vendor/msoffice/src/sha1.cpp +234 -0
  256. data/vendor/msoffice/test/Makefile +20 -0
  257. data/vendor/msoffice/test/cfb_test.cpp +74 -0
  258. data/vendor/msoffice/test/hash_test.cpp +59 -0
  259. data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
  260. data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
  261. data/vendor/msoffice/test/sampl.bat +8 -0
  262. data/vendor/msoffice/test_all.py +46 -0
  263. data/vendor/update +4 -0
  264. metadata +351 -0
@@ -0,0 +1,46 @@
1
+ #include <cybozu/exception.hpp>
2
+ #include <cybozu/string.hpp>
3
+ #include <cybozu/atoi.hpp>
4
+ #include <iostream>
5
+ #include <assert.h>
6
+ #include <cybozu/stacktrace.hpp>
7
+
8
+ struct MailException : cybozu::Exception {
9
+ MailException() : cybozu::Exception("mail") { }
10
+ };
11
+
12
+ void f2()
13
+ {
14
+ const char *msg = "HTTP/...";
15
+ std::string abc = "abc";
16
+ char c = 'x';
17
+ int port = 80;
18
+ unsigned int s = 90;
19
+ MailException e;
20
+ e << "can't send" << msg << abc << c << port << s << '\n';
21
+ cybozu::StackTrace st;
22
+ e << st;
23
+ throw e;
24
+ }
25
+
26
+ void f1()
27
+ {
28
+ f2();
29
+ }
30
+
31
+ void f0()
32
+ {
33
+ f1();
34
+ }
35
+
36
+ int main()
37
+ {
38
+ try {
39
+ f0();
40
+ } catch (cybozu::Exception &e) {
41
+ std::cout << "for user" << std::endl;
42
+ std::cout << e.toString() << std::endl;
43
+ } catch (...) {
44
+ std::cout << "Error!" << std::endl;
45
+ }
46
+ }
@@ -0,0 +1,231 @@
1
+ #include <fstream>
2
+ #include <cybozu/time.hpp>
3
+ #include <cybozu/fmindex.hpp>
4
+ #include <cybozu/mmap.hpp>
5
+ #include <cybozu/string.hpp>
6
+ #include <cybozu/hash.hpp>
7
+ #include <cybozu/benchmark.hpp>
8
+ #include <set>
9
+
10
+ #ifdef USE_UTF32
11
+ typedef cybozu::FMindexT<cybozu::Char> FMindex;
12
+ typedef cybozu::String String;
13
+ #else
14
+ typedef cybozu::FMindex FMindex;
15
+ typedef std::string String;
16
+ #endif
17
+
18
+ typedef std::set<int> Set;
19
+
20
+ void putSet(const Set& set)
21
+ {
22
+ for (Set::const_iterator i = set.begin(), ie = set.end(); i != ie; ++i) {
23
+ std::cout << *i << ' ';
24
+ }
25
+ std::cout << std::endl;
26
+ }
27
+
28
+ template<class STRING>
29
+ void simpleSearch(const std::string& inName, const std::string& queryFile, bool putHash)
30
+ {
31
+ cybozu::Mmap m(inName);
32
+ STRING text(m.get(), m.size());
33
+
34
+ double beginTime = cybozu::GetCurrentTimeSec();
35
+
36
+ std::ifstream qs(queryFile.c_str(), std::ios::binary);
37
+ STRING key;
38
+ uint64_t hash = 0;
39
+ while (qs >> key) {
40
+ if (!putHash) std::cout << "query " << key << std::endl;
41
+ size_t p = 0;
42
+ Set set;
43
+ for (;;) {
44
+ size_t q = text.find(key, p);
45
+ if (q == std::string::npos) break;
46
+ set.insert((int)q);
47
+ p = q + 1;
48
+ }
49
+ if (putHash) {
50
+ hash = cybozu::hash64(set.begin(), set.end(), hash);
51
+ } else {
52
+ putSet(set);
53
+ }
54
+ }
55
+ if (putHash) printf("hash=%llx\n", (long long)hash);
56
+
57
+ double endTime = cybozu::GetCurrentTimeSec();
58
+ fprintf(stderr, "time: %gsec\n", endTime - beginTime);
59
+ }
60
+
61
+ template<class FMINDEX, class STRING>
62
+ void recover(const std::string& inName, const std::string& outName)
63
+ {
64
+ std::ifstream is(inName.c_str(), std::ios::binary);
65
+ FMINDEX f;
66
+ f.load(is);
67
+
68
+ double beginTime = cybozu::GetCurrentTimeSec();
69
+
70
+ STRING str;
71
+ f.getPrevString(str, 0, f.wm.size() - 1);
72
+ double endTime = cybozu::GetCurrentTimeSec();
73
+ fprintf(stderr, "time: %gsec\n", endTime - beginTime);
74
+ std::ofstream os(outName.c_str(), std::ios::binary);
75
+ os << str;
76
+ }
77
+
78
+ template<class FMINDEX, class STRING>
79
+ void search(const std::string& inName, const std::string& queryFile, bool putHash, bool bench)
80
+ {
81
+ std::ifstream is(inName.c_str(), std::ios::binary);
82
+ FMINDEX f;
83
+ f.load(is);
84
+
85
+ double beginTime = cybozu::GetCurrentTimeSec();
86
+
87
+ std::ifstream qs(queryFile.c_str(), std::ios::binary);
88
+ STRING key;
89
+ uint64_t hash = 0;
90
+ cybozu::CpuClock clkRange;
91
+ cybozu::CpuClock clkPos;
92
+ while (qs >> key) {
93
+ if (!putHash) std::cout << "query " << key << std::endl;
94
+ size_t begin, end = 0;
95
+ if (bench) clkRange.begin();
96
+ bool found = f.getRange(&begin, &end, key);
97
+ if (bench) clkRange.end();
98
+ Set set;
99
+ if (found) {
100
+ while (begin != end) {
101
+ if (bench) clkPos.begin();
102
+ int pos = (int)f.convertPosition(begin);
103
+ if (bench) clkPos.end();
104
+ set.insert(pos);
105
+ begin++;
106
+ }
107
+ }
108
+ if (putHash) {
109
+ hash = cybozu::hash64(set.begin(), set.end(), hash);
110
+ } else {
111
+ putSet(set);
112
+ }
113
+ }
114
+ if (putHash) printf("hash=%llx\n", (long long)hash);
115
+
116
+ double endTime = cybozu::GetCurrentTimeSec();
117
+ fprintf(stderr, "time: %gsec\n", endTime - beginTime);
118
+ if (bench) {
119
+ int rangeNum = (int)clkRange.getCount();
120
+ int posNum = (int)clkPos.getCount();
121
+ fprintf(stderr, "getRange %.2f(%d) pos %.2f(%d)\n", clkRange.getClock() / double(rangeNum), rangeNum, clkPos.getClock() / double(posNum), posNum);
122
+ }
123
+ }
124
+
125
+ template<class FMINDEX, class STRING>
126
+ static void create(const std::string& inName, const std::string& outName, int skip)
127
+ {
128
+ fprintf(stderr, "inName=%s, outName=%s, skip=%d\n", inName.c_str(), outName.c_str(), skip);
129
+
130
+ double beginTime = cybozu::GetCurrentTimeSec();
131
+
132
+ cybozu::Mmap m(inName);
133
+ FMINDEX f;
134
+ STRING text(m.get(), m.get() + m.size());
135
+ f.init(text.begin(), text.end(), skip);
136
+
137
+ double endTime = cybozu::GetCurrentTimeSec();
138
+ fprintf(stderr, "create time %gsec\n", endTime - beginTime);
139
+ std::ofstream os(outName.c_str(), std::ios::binary);
140
+ f.save(os);
141
+ }
142
+
143
+ void usage()
144
+ {
145
+ printf("fmindex_smpl.exe (-c|-s|-r|-ss) file1 file2 [-skip skip][-hash][-time]\n");
146
+ printf(" -c : create index file\n");
147
+ printf(" file1 : any UTF-8 string file\n");
148
+ printf(" file2 : output index file\n");
149
+ printf(" -skip skip : skip to sampling(default 8)\n");
150
+ printf(" -hash : put position hash\n");
151
+ printf(" -time : benchmark\n");
152
+ printf(" -s : search mode\n");
153
+ printf(" file1 : index file\n");
154
+ printf(" file2 : query string file\n");
155
+ printf(" -r : recover mode\n");
156
+ printf(" file1 : index file\n");
157
+ printf(" file2 : org index file\n");
158
+ printf(" -ss: simple search\n");
159
+ printf(" file1 : any UTF-8 string file\n");
160
+ printf(" file2 : query string file\n");
161
+ exit(1);
162
+ }
163
+
164
+ int main(int argc, char* argv[])
165
+ try
166
+ {
167
+ argc--, argv++;
168
+ std::string fName1;
169
+ std::string fName2;
170
+ std::string mode;
171
+ int skip = 8;
172
+ bool putHash = false;
173
+ bool bench = false;
174
+
175
+ while (argc > 0) {
176
+ if (strcmp(*argv, "-c") == 0) {
177
+ mode = *argv;
178
+ } else
179
+ if (strcmp(*argv, "-s") == 0) {
180
+ mode = *argv;
181
+ } else
182
+ if (strcmp(*argv, "-r") == 0) {
183
+ mode = *argv;
184
+ } else
185
+ if (strcmp(*argv, "-ss") == 0) {
186
+ mode = *argv;
187
+ } else
188
+ if (argc > 1 && strcmp(*argv, "-skip") == 0) {
189
+ argc--, argv++;
190
+ skip = atoi(*argv);
191
+ } else
192
+ if (strcmp(*argv, "-hash") == 0) {
193
+ putHash = true;
194
+ } else
195
+ if (strcmp(*argv, "-time") == 0) {
196
+ bench = true;
197
+ } else
198
+ if (**argv != '-' && fName1.empty()) {
199
+ fName1 = *argv;
200
+ } else
201
+ if (**argv != '-' && fName2.empty()) {
202
+ fName2 = *argv;
203
+ } else
204
+ {
205
+ usage();
206
+ }
207
+ argc--, argv++;
208
+ }
209
+ if (fName1.empty() || fName2.empty() || mode.empty()) {
210
+ usage();
211
+ }
212
+ if (mode == "-c") {
213
+ create<FMindex, String>(fName1, fName2, skip);
214
+ } else
215
+ if (mode == "-s") {
216
+ search<FMindex, String>(fName1, fName2, putHash, bench);
217
+ } else
218
+ if (mode == "-r") {
219
+ recover<FMindex, String>(fName1, fName2);
220
+ } else
221
+ if (mode == "-ss") {
222
+ simpleSearch<String>(fName1, fName2, putHash);
223
+ } else
224
+ {
225
+ usage();
226
+ }
227
+ } catch (std::exception& e) {
228
+ printf("ERR %s\n", e.what());
229
+ return 1;
230
+ }
231
+
@@ -0,0 +1,19 @@
1
+ #include <stdio.h>
2
+ #include <cybozu/log.hpp>
3
+
4
+ int main()
5
+ {
6
+ cybozu::PutLog(cybozu::LogInfo, "this is a pen1");
7
+ cybozu::useSyslog(false);
8
+ cybozu::SetLogUseMsec();
9
+ cybozu::PutLog(cybozu::LogInfo, "this is a pen2");
10
+ cybozu::OpenLogFile("test.log");
11
+ cybozu::PutLog(cybozu::LogInfo, "this is a pen3");
12
+ cybozu::useSyslog(true);
13
+ cybozu::PutLog(cybozu::LogInfo, "this is a pen4");
14
+
15
+ cybozu::PutLog(cybozu::LogInfo, "AAtest");
16
+ cybozu::SetLogPriority(cybozu::LogInfo);
17
+ cybozu::PutLog(cybozu::LogInfo, "AAtest2");
18
+ cybozu::PutLog(cybozu::LogDebug, "not print");
19
+ }
@@ -0,0 +1,37 @@
1
+ #include <vector>
2
+ #include <stdio.h>
3
+ #include <cybozu/nlp/mecab.hpp>
4
+ #include <cybozu/mmap.hpp>
5
+
6
+ int main(int argc, char *argv[])
7
+ {
8
+ argc--, argv++;
9
+ if (argc == 0) {
10
+ fprintf(stderr, "mecab_smpl filename\n");
11
+ return 1;
12
+ }
13
+ try {
14
+ const std::string fileName = argv[0];
15
+ cybozu::Mmap mmap(fileName);
16
+ if (mmap.size() > (1 << 30)) {
17
+ fprintf(stderr, "file is too large %lld\n", (long long)mmap.size());
18
+ return 1;
19
+ }
20
+
21
+ cybozu::nlp::Mecab mecab;
22
+ typedef std::vector<std::string> StrVec;
23
+ StrVec sv;
24
+ if (mecab.parse(sv, mmap.get(), (int)mmap.size())) {
25
+ for (size_t i = 0, n = sv.size(); i < n; i++) {
26
+ printf("%s ", sv[i].c_str());
27
+ }
28
+ printf("\n");
29
+ }
30
+ return 0;
31
+ } catch (std::exception& e) {
32
+ fprintf(stderr, "exception %s\n", e.what());
33
+ } catch (...) {
34
+ fprintf(stderr, "unknown exception\n");
35
+ }
36
+ return 1;
37
+ }
@@ -0,0 +1,68 @@
1
+ /*
2
+ how to use two step option parser
3
+ */
4
+ #include <stdio.h>
5
+ #include <cybozu/option.hpp>
6
+ #include <vector>
7
+
8
+ struct Opt {
9
+ // common option
10
+ int x;
11
+ cybozu::Option opt1;
12
+
13
+ // cmd option
14
+ std::string cmd;
15
+
16
+ std::string init_s;
17
+ double run_d;
18
+ char status_c;
19
+ cybozu::Option opt2;
20
+
21
+ int parse1(int argc, char *argv[])
22
+ {
23
+ opt1.appendOpt(&x, 5, "x", " :value");
24
+ opt1.appendDelimiter("init");
25
+ opt1.appendDelimiter("run");
26
+ opt1.appendDelimiter("status");
27
+ opt1.appendHelp("h");
28
+ opt1.setUsage("option2 [opt] (init|run|status)", true);
29
+
30
+ if (!opt1.parse(argc, argv)) return false;
31
+ const int pos = opt1.getNextPositionOfDelimiter();
32
+ if (pos == 0) return 0;
33
+ cmd = argv[pos - 1];
34
+ if (cmd == "init") {
35
+ opt2.appendOpt(&init_s, "abc", "s", " :string");
36
+ } else if (cmd == "run") {
37
+ opt2.appendOpt(&run_d, 1.2, "d", " :double");
38
+ } else if (cmd == "status") {
39
+ opt2.appendOpt(&status_c, 'X', "c", " :char");
40
+ } else {
41
+ return 0;
42
+ }
43
+ opt2.appendHelp("h");
44
+ return pos;
45
+ }
46
+ void parse(int argc, char *argv[])
47
+ {
48
+ int pos = parse1(argc, argv);
49
+ if (pos == 0) {
50
+ opt1.usage();
51
+ exit(1);
52
+ }
53
+ if (!opt2.parse(argc, argv, pos)) {
54
+ opt2.usage();
55
+ exit(1);
56
+ }
57
+ puts("common");
58
+ opt1.put();
59
+ printf("opt for %s\n", cmd.c_str());
60
+ opt2.put();
61
+ }
62
+ };
63
+
64
+ int main(int argc, char *argv[])
65
+ {
66
+ Opt opt;
67
+ opt.parse(argc, argv);
68
+ }
@@ -0,0 +1,42 @@
1
+ /*
2
+ how to use
3
+ */
4
+ #include <stdio.h>
5
+ #include <cybozu/option.hpp>
6
+ #include <vector>
7
+
8
+ int main(int argc, char *argv[])
9
+ try
10
+ {
11
+ int x;
12
+ bool b;
13
+ double d;
14
+ std::string y;
15
+ std::vector<int> z;
16
+ std::vector<std::string> w;
17
+ std::string inName;
18
+ std::vector<std::string> r;
19
+ std::vector<std::string> vi;
20
+ uint64_t u;
21
+
22
+ cybozu::Option opt;
23
+
24
+ opt.appendOpt(&x, 5, "x", "int");
25
+ opt.appendBoolOpt(&b, "b", "bool");
26
+ opt.appendMust(&d, "d", "double");
27
+ opt.appendMust(&y, "y", "string");
28
+ opt.appendVec(&z, "z", "int int int ...");
29
+ opt.appendVec(&w, "w", "str str str ...");
30
+ opt.appendOpt(&u, 0, "u", "uint64 val");
31
+ opt.appendParam(&inName, "input-file", "text file");
32
+ opt.appendParamVec(&vi, "remains", "sss");
33
+ opt.appendHelp("h");
34
+
35
+ if (opt.parse(argc, argv)) {
36
+ opt.put();
37
+ } else {
38
+ opt.usage();
39
+ }
40
+ } catch (std::exception& e) {
41
+ printf("ERR %s\n", e.what());
42
+ }
@@ -0,0 +1,207 @@
1
+ /**
2
+ pLSI(probabilistic latent semantic indexing)
3
+ @author MITSUNARI Shigeo(@herumi)
4
+ */
5
+
6
+ #include <stdio.h>
7
+ #include <map>
8
+ #include <cybozu/file.hpp>
9
+ #include <cybozu/csv.hpp>
10
+ #include <cybozu/nlp/plsi.hpp>
11
+ #include <cybozu/string_operation.hpp>
12
+ #include <cybozu/time.hpp>
13
+ #include <iostream>
14
+
15
+ void load(cybozu::nlp::Plsi& plsi, const std::string& filepath)
16
+ {
17
+ cybozu::CsvReader csv(filepath, ' ');
18
+ std::vector<std::string> line;
19
+ while (csv.read(line)) {
20
+ cybozu::nlp::Plsi::ITEM_TYPE item_key = cybozu::atoi(line[0]);
21
+ size_t size = line.size();
22
+ if (size < 2) continue;
23
+ std::map<size_t, bool> map;
24
+ for (size_t i = 1; i < line.size(); ++i) {
25
+ cybozu::nlp::Plsi::USER_TYPE user_key = cybozu::atoi(line[i]);
26
+ map[plsi.get_user_id(user_key)] = true;
27
+ }
28
+ plsi.getItem(item_key).set(map);
29
+ }
30
+ }
31
+
32
+ void usage()
33
+ {
34
+ printf("usage: plsi [option] -f [dataset filename]\n");
35
+ printf(" -k [num] : # of latent classes");
36
+ printf(" -i [num] : # of iterations");
37
+ exit(1);
38
+ }
39
+
40
+ /**
41
+ @brief Atnd Data
42
+ */
43
+ struct AtndData {
44
+ std::string date; // for only event
45
+ std::string name; // user or event name
46
+ };
47
+
48
+ /**
49
+ @brief Atnd Information (Users / Events)
50
+ */
51
+ struct AtndInfo {
52
+ typedef std::map<int, AtndData> Int2Data;
53
+ typedef std::map<std::string, int> Str2Int;
54
+ Int2Data int2data_;
55
+ Str2Int name2id_;
56
+ /**
57
+ @brief load list of Atnd Users / Events
58
+ @param[in] name filename of list
59
+ @param[in] isEvent Is it an event list?
60
+ */
61
+ bool loadList(const std::string& name, bool isEvent)
62
+ {
63
+ std::ifstream ifs(name.c_str(), std::ios::binary);
64
+ if (!ifs) return false;
65
+ for (;;) {
66
+ AtndData t;
67
+ int id;
68
+ if (!(ifs >> id)) break;
69
+ if (isEvent) {
70
+ std::string str;
71
+ ifs >> str;
72
+ if (str.empty()) return false;
73
+ if (str.size() < 6) {
74
+ fprintf(stderr, "bad format %s\n", str.c_str());
75
+ return false;
76
+ }
77
+ str = str.substr(0, str.size() - 6); // "+09:00"
78
+ cybozu::Time time(str);
79
+ time.setTime(time.getTime() + 9 * 3600);
80
+ time.toString(t.date, "%Y/%m/%d", false);
81
+ }
82
+ std::getline(ifs, t.name);
83
+ cybozu::Trim(t.name);
84
+ if (!ifs) break;
85
+ int2data_[id] = t;
86
+ name2id_[t.name] = id;
87
+ }
88
+ return true;
89
+ }
90
+ /**
91
+ @brief load list of Atnd Users / Events. (generates filename from isEvent parameter)
92
+ @param[in] dir directory name where list exists
93
+ @param[in] isEvent Is it an event list?
94
+ */
95
+ bool load(const std::string& dir, bool isEvent)
96
+ {
97
+ const std::string key = isEvent ? "event" : "user";
98
+ std::string name;
99
+ name = dir + "/atnd-" + key + ".txt";
100
+ if (!loadList(name, isEvent)) {
101
+ fprintf(stderr, "can't read %s (%d)\n", name.c_str(), isEvent);
102
+ return false;
103
+ }
104
+ return true;
105
+ }
106
+ };
107
+
108
+ int main(int argc, char** argv)
109
+ {
110
+ std::string data_dir = cybozu::GetExePath() + "../sample/data/plsi/";
111
+
112
+ int K = 20;
113
+ int Iter = 100;
114
+ argc--, argv++;
115
+ while (argc > 0) {
116
+ if (argc > 1 && strcmp(*argv, "-d") == 0) {
117
+ argc--, argv++;
118
+ data_dir = *argv;
119
+ } else if (argc > 1 && strcmp(*argv, "-k") == 0) {
120
+ argc--, argv++;
121
+ K = cybozu::atoi(*argv);
122
+ } else if (argc > 1 && strcmp(*argv, "-i") == 0) {
123
+ argc--, argv++;
124
+ Iter = cybozu::atoi(*argv);
125
+ } else {
126
+ usage();
127
+ }
128
+ argc--, argv++;
129
+ }
130
+ const std::string name = data_dir + "/atnd-user-matrix.txt";
131
+
132
+ cybozu::nlp::Plsi plsi;
133
+ try {
134
+ AtndInfo event_master, user_master;
135
+ event_master.load(data_dir, true);
136
+ user_master.load(data_dir, false);
137
+
138
+ load(plsi, name);
139
+ plsi.startLearning(K);
140
+ {
141
+ puts("learning");
142
+ double pre_likelihood = -1e30;
143
+ double beta = 1;
144
+ for (int i = 0; i < Iter; ++i) {
145
+ double likelihood = plsi.step();
146
+ printf("%d : %.3f %.3f %.3f\n", i, beta, likelihood, likelihood - pre_likelihood);
147
+ if (likelihood - pre_likelihood < 1) {
148
+ beta *= 0.9;
149
+ if (beta < 0.01) break;
150
+ }
151
+ pre_likelihood = likelihood;
152
+ }
153
+ }
154
+
155
+ int mode = 0;
156
+ cybozu::nlp::Plsi::SEARCH_TYPE search_type = cybozu::nlp::Plsi::JOINT;
157
+
158
+ for(;;) {
159
+ std::string st;
160
+ std::cin >> st;
161
+ if (st == "") break;
162
+ if (st == "ui") {
163
+ mode = 0;
164
+ printf("user => items\n");
165
+ continue;
166
+ }
167
+ if (st == "ii") {
168
+ mode = 1;
169
+ printf("item => items\n");
170
+ continue;
171
+ }
172
+ if (st == "sj") {
173
+ search_type = cybozu::nlp::Plsi::JOINT;
174
+ printf("search type: JOINT probability\n");
175
+ continue;
176
+ }
177
+ if (st == "sc") {
178
+ search_type = cybozu::nlp::Plsi::CONDITIONAL;
179
+ printf("search type: CONDITIONAL probability\n");
180
+ continue;
181
+ }
182
+ if (st == "sp") {
183
+ search_type = cybozu::nlp::Plsi::POSTERIOR;
184
+ printf("search type: POSTERIOR probability\n");
185
+ continue;
186
+ }
187
+
188
+ cybozu::nlp::TopScore<size_t>::Table tbl;
189
+ switch(mode) {
190
+ case 0:
191
+ tbl = plsi.search_items(cybozu::atoi(st), 10);
192
+ break;
193
+ case 1:
194
+ tbl = plsi.similar_items(cybozu::atoi(st), search_type, 10);
195
+ break;
196
+ }
197
+
198
+ for (size_t i = 0; i < tbl.size(); i++) {
199
+ cybozu::nlp::Plsi::ITEM_TYPE key = plsi.get_item_key(tbl[i].idx);
200
+ printf("%1.3f %d:%s\n", log(tbl[i].score), key, event_master.int2data_[key].name.c_str());
201
+ }
202
+ }
203
+
204
+ } catch (std::exception& e) {
205
+ printf("error : %s\n", e.what());
206
+ }
207
+ }