ooxml_crypt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +58 -0
  5. data/Rakefile +12 -0
  6. data/bin/console +15 -0
  7. data/bin/setup +8 -0
  8. data/ext/ooxml_crypt/extconf.rb +18 -0
  9. data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
  10. data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
  11. data/lib/ooxml_crypt/version.rb +5 -0
  12. data/lib/ooxml_crypt.rb +75 -0
  13. data/vendor/cybozulib/.github/workflows/main.yml +12 -0
  14. data/vendor/cybozulib/.gitignore +5 -0
  15. data/vendor/cybozulib/CMakeLists.txt +6 -0
  16. data/vendor/cybozulib/COPYRIGHT +27 -0
  17. data/vendor/cybozulib/Makefile +26 -0
  18. data/vendor/cybozulib/bin/libeay32.dll +0 -0
  19. data/vendor/cybozulib/bin/libmecab.dll +0 -0
  20. data/vendor/cybozulib/bin/ssleay32.dll +0 -0
  21. data/vendor/cybozulib/common.mk +116 -0
  22. data/vendor/cybozulib/common.props +25 -0
  23. data/vendor/cybozulib/cybozulib.sln +286 -0
  24. data/vendor/cybozulib/debug.props +14 -0
  25. data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
  26. data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
  27. data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
  28. data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
  29. data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
  30. data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
  31. data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
  32. data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
  33. data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
  34. data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
  35. data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
  36. data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
  37. data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
  38. data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
  39. data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
  40. data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
  41. data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
  42. data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
  43. data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
  44. data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
  45. data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
  46. data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
  47. data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
  48. data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
  49. data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
  50. data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
  51. data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
  52. data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
  53. data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
  54. data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
  55. data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
  56. data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
  57. data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
  58. data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
  59. data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
  60. data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
  61. data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
  62. data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
  63. data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
  64. data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
  65. data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
  66. data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
  67. data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
  68. data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
  69. data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
  70. data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
  71. data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
  72. data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
  73. data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
  74. data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
  75. data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
  76. data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
  77. data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
  78. data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
  79. data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
  80. data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
  81. data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
  82. data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
  83. data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
  84. data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
  85. data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
  86. data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
  87. data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
  88. data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
  89. data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
  90. data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
  91. data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
  92. data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
  93. data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
  94. data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
  95. data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
  96. data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
  97. data/vendor/cybozulib/include/sais.hxx +364 -0
  98. data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
  99. data/vendor/cybozulib/mk.bat +37 -0
  100. data/vendor/cybozulib/readme.md +29 -0
  101. data/vendor/cybozulib/release.props +12 -0
  102. data/vendor/cybozulib/sample/Makefile +30 -0
  103. data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
  104. data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
  105. data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
  106. data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
  107. data/vendor/cybozulib/sample/data/svd/test1 +4 -0
  108. data/vendor/cybozulib/sample/data/svd/test2 +4 -0
  109. data/vendor/cybozulib/sample/desymbol.cpp +127 -0
  110. data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
  111. data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
  112. data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
  113. data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
  114. data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
  115. data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
  116. data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
  117. data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
  118. data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
  119. data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
  120. data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
  121. data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
  122. data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
  123. data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
  124. data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
  125. data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
  126. data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
  127. data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
  128. data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
  129. data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
  130. data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
  131. data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
  132. data/vendor/cybozulib/src/Makefile +8 -0
  133. data/vendor/cybozulib/src/base/Makefile +19 -0
  134. data/vendor/cybozulib/test/Makefile +12 -0
  135. data/vendor/cybozulib/test/base/Makefile +37 -0
  136. data/vendor/cybozulib/test/base/array_test.cpp +173 -0
  137. data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
  138. data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
  139. data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
  140. data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
  141. data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
  142. data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
  143. data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
  144. data/vendor/cybozulib/test/base/config_test.cpp +236 -0
  145. data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
  146. data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
  147. data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
  148. data/vendor/cybozulib/test/base/data/a.xml +26 -0
  149. data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
  150. data/vendor/cybozulib/test/base/env_test.cpp +22 -0
  151. data/vendor/cybozulib/test/base/event_test.cpp +41 -0
  152. data/vendor/cybozulib/test/base/file_test.cpp +233 -0
  153. data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
  154. data/vendor/cybozulib/test/base/format_test.cpp +12 -0
  155. data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
  156. data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
  157. data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
  158. data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
  159. data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
  160. data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
  161. data/vendor/cybozulib/test/base/option_test.cpp +487 -0
  162. data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
  163. data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
  164. data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
  165. data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
  166. data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
  167. data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
  168. data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
  169. data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
  170. data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
  171. data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
  172. data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
  173. data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
  174. data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
  175. data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
  176. data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
  177. data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
  178. data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
  179. data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
  180. data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
  181. data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
  182. data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
  183. data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
  184. data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
  185. data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
  186. data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
  187. data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
  188. data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
  189. data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
  190. data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
  191. data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
  192. data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
  193. data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
  194. data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
  195. data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
  196. data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
  197. data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
  198. data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
  199. data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
  200. data/vendor/cybozulib/test/base/time_test.cpp +164 -0
  201. data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
  202. data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
  203. data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
  204. data/vendor/cybozulib/test/nlp/Makefile +27 -0
  205. data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
  206. data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
  207. data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
  208. data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
  209. data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
  210. data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
  211. data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
  212. data/vendor/cybozulib/tool/create_vcproj.py +186 -0
  213. data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
  214. data/vendor/msoffice/COPYRIGHT +27 -0
  215. data/vendor/msoffice/Makefile +29 -0
  216. data/vendor/msoffice/bin/64/msoc.dll +0 -0
  217. data/vendor/msoffice/bin/64/msocsample.exe +0 -0
  218. data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
  219. data/vendor/msoffice/bin/msoc.dll +0 -0
  220. data/vendor/msoffice/bin/msocsample.exe +0 -0
  221. data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
  222. data/vendor/msoffice/common.mk +71 -0
  223. data/vendor/msoffice/common.props +26 -0
  224. data/vendor/msoffice/debug.props +14 -0
  225. data/vendor/msoffice/include/attack.hpp +211 -0
  226. data/vendor/msoffice/include/cfb.hpp +777 -0
  227. data/vendor/msoffice/include/crypto_util.hpp +450 -0
  228. data/vendor/msoffice/include/custom_sha1.hpp +342 -0
  229. data/vendor/msoffice/include/decode.hpp +240 -0
  230. data/vendor/msoffice/include/encode.hpp +221 -0
  231. data/vendor/msoffice/include/make_dataspace.hpp +316 -0
  232. data/vendor/msoffice/include/msoc.h +129 -0
  233. data/vendor/msoffice/include/resource.hpp +7 -0
  234. data/vendor/msoffice/include/standard_encryption.hpp +145 -0
  235. data/vendor/msoffice/include/uint32vec.hpp +179 -0
  236. data/vendor/msoffice/include/util.hpp +212 -0
  237. data/vendor/msoffice/lib/.emptydir +0 -0
  238. data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
  239. data/vendor/msoffice/mk.bat +1 -0
  240. data/vendor/msoffice/mkdll.bat +3 -0
  241. data/vendor/msoffice/msoc.def +13 -0
  242. data/vendor/msoffice/msocsample.py +178 -0
  243. data/vendor/msoffice/msoffice12.sln +31 -0
  244. data/vendor/msoffice/readme.md +110 -0
  245. data/vendor/msoffice/release.props +28 -0
  246. data/vendor/msoffice/src/Makefile +19 -0
  247. data/vendor/msoffice/src/attack.cpp +124 -0
  248. data/vendor/msoffice/src/cfb_test.cpp +77 -0
  249. data/vendor/msoffice/src/minisample.c +54 -0
  250. data/vendor/msoffice/src/msocdll.cpp +276 -0
  251. data/vendor/msoffice/src/msocsample.c +136 -0
  252. data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
  253. data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
  254. data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
  255. data/vendor/msoffice/src/sha1.cpp +234 -0
  256. data/vendor/msoffice/test/Makefile +20 -0
  257. data/vendor/msoffice/test/cfb_test.cpp +74 -0
  258. data/vendor/msoffice/test/hash_test.cpp +59 -0
  259. data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
  260. data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
  261. data/vendor/msoffice/test/sampl.bat +8 -0
  262. data/vendor/msoffice/test_all.py +46 -0
  263. data/vendor/update +4 -0
  264. metadata +351 -0
@@ -0,0 +1,529 @@
1
+ #pragma once
2
+ /**
3
+ @file
4
+ @brief sparse vector
5
+
6
+ @author MITSUNARI Shigeo(@herumi)
7
+ @author MITSUNARI Shigeo
8
+ */
9
+ #include <vector>
10
+ #include <cybozu/exception.hpp>
11
+ #include <cybozu/serializer.hpp>
12
+ #include <assert.h>
13
+
14
+ namespace cybozu { namespace nlp {
15
+
16
+ namespace option {
17
+
18
+ class PositionTbl {
19
+ std::vector<unsigned int> v_;
20
+ public:
21
+ struct Curr {
22
+ size_t vecPos_;
23
+ Curr(const PositionTbl&)
24
+ : vecPos_(0)
25
+ {
26
+ }
27
+ Curr(size_t vecPos)
28
+ : vecPos_(vecPos)
29
+ {
30
+ }
31
+ };
32
+ void reserve(size_t size)
33
+ {
34
+ v_.reserve(size);
35
+ }
36
+ void set(size_t pos)
37
+ {
38
+ if (!v_.empty() && pos <= v_[v_.size() - 1]) {
39
+ throw cybozu::Exception("SparseException:PositionTbl:set:bad order pos") << pos;
40
+ }
41
+ if (pos > 0xffffffffU) {
42
+ throw cybozu::Exception("SparseException:PositionTbl:set:too large pos") << pos;
43
+ }
44
+ v_.push_back((unsigned int)pos);
45
+ }
46
+ void clear()
47
+ {
48
+ v_.clear();
49
+ }
50
+ void next(Curr& curr) const
51
+ {
52
+ curr.vecPos_++;
53
+ }
54
+ size_t get(const Curr& curr) const
55
+ {
56
+ return v_[curr.vecPos_];
57
+ }
58
+ void swap(PositionTbl& rhs)
59
+ {
60
+ v_.swap(rhs.v_);
61
+ }
62
+ };
63
+
64
+ /*
65
+ max difference between previous position and current position < 0x40000000 = (1 << 30)
66
+ data format
67
+ input x:
68
+ v[0] = (x & 0x3f) | (y << 6) ; len = y + 1 for y = 0, 1, 2, 3
69
+ v[1] = x >> 6
70
+ v[2] = x >> (6 + 8)
71
+ v[3] = x >> (6 + 8 + 8)
72
+ */
73
+
74
+ class CompressedPositionTbl {
75
+ mutable std::vector<unsigned char> v_;
76
+ size_t lastPos_;
77
+ mutable bool addDummy_; // add last dummy data into v_ to get speed and avoid buffer overrun
78
+ friend struct Curr;
79
+ public:
80
+ struct Curr {
81
+ size_t vecPos_;
82
+ size_t val_;
83
+ unsigned int pos_;
84
+ Curr(const CompressedPositionTbl& tbl)
85
+ : vecPos_(0)
86
+ , val_(0)
87
+ , pos_(0)
88
+ {
89
+ if (!tbl.addDummy_) {
90
+ tbl.v_.push_back(0);
91
+ tbl.addDummy_ = true;
92
+ }
93
+ tbl.setup(*this);
94
+ }
95
+ Curr(size_t vecPos)
96
+ : vecPos_(vecPos)
97
+ {
98
+ }
99
+ };
100
+ CompressedPositionTbl()
101
+ : lastPos_(0)
102
+ , addDummy_(false)
103
+ {
104
+ }
105
+ void clear()
106
+ {
107
+ v_.clear();
108
+ lastPos_ = 0;
109
+ addDummy_ = false;
110
+ }
111
+ void reserve(size_t size)
112
+ {
113
+ v_.reserve(size * 2); // ad hoc
114
+ }
115
+ void set(size_t pos)
116
+ {
117
+ if (addDummy_) {
118
+ v_.resize(v_.size() - 1);
119
+ addDummy_ = false;
120
+ }
121
+ if (!v_.empty() && pos <= lastPos_) {
122
+ throw cybozu::Exception("SparseException:CompressedPositionTbl:set:bad order pos") << pos;
123
+ }
124
+ if (pos - lastPos_ >= (1 << 30)) {
125
+ throw cybozu::Exception("SparseException:CompressedPositionTbl:set:too large pos") << pos;
126
+ }
127
+ unsigned int diff = (unsigned int)(pos - lastPos_);
128
+ lastPos_ = pos;
129
+ if (diff < (1 << 6)) {
130
+ v_.push_back((unsigned char)(diff));
131
+ } else if (diff < (1 << 14)) {
132
+ v_.push_back((unsigned char)(diff & 0x3f) | (1 << 6));
133
+ v_.push_back((unsigned char)(diff >> 6));
134
+ } else if (diff < (1 << 22)) {
135
+ v_.push_back((unsigned char)(diff & 0x3f) | (2 << 6));
136
+ v_.push_back((unsigned char)(diff >> 6));
137
+ v_.push_back((unsigned char)(diff >> 14));
138
+ } else {
139
+ assert(diff < (1 << 30));
140
+ v_.push_back((unsigned char)(diff & 0x3f) | (3 << 6));
141
+ v_.push_back((unsigned char)(diff >> 6));
142
+ v_.push_back((unsigned char)(diff >> 14));
143
+ v_.push_back((unsigned char)(diff >> 22));
144
+ }
145
+ }
146
+ void next(Curr& curr) const
147
+ {
148
+ setup(curr);
149
+ curr.vecPos_++;
150
+ }
151
+ size_t get(const Curr& curr) const { return curr.val_; }
152
+ void swap(CompressedPositionTbl& rhs)
153
+ {
154
+ v_.swap(rhs.v_);
155
+ std::swap(lastPos_, rhs.lastPos_);
156
+ std::swap(addDummy_, rhs.addDummy_);
157
+ }
158
+ private:
159
+ void setup(Curr& curr) const
160
+ {
161
+ unsigned int diff = v_[curr.pos_];
162
+ unsigned int t = diff >> 6;
163
+ unsigned int pos = curr.pos_;
164
+ if (t > 0) {
165
+ diff &= (1 << 6) - 1;
166
+ if (t == 1) {
167
+ diff |= (v_[pos + 1] << 6);
168
+ } else if (t == 2) {
169
+ diff |= (v_[pos + 1] << 6) | (v_[pos + 2] << 14);
170
+ } else {
171
+ diff |= (v_[pos + 1] << 6) | (v_[pos + 2] << 14) | (v_[pos + 3] << 22);
172
+ }
173
+ }
174
+ curr.val_ += diff;
175
+ curr.pos_ += t + 1;
176
+ }
177
+ };
178
+
179
+ } // cybozu::nlp::option
180
+
181
+ template<class T, class PosTbl = option::PositionTbl, int dummy = 0>
182
+ class SparseVector {
183
+ PosTbl posTbl_;
184
+ std::vector<T> vec_;
185
+
186
+ template<class S>
187
+ class ConstIterator {
188
+ typename PosTbl::Curr cur_;
189
+ const SparseVector<S, PosTbl> *self_;
190
+ public:
191
+ // for begin
192
+ ConstIterator(const SparseVector<S, PosTbl>* self)
193
+ : cur_(self->posTbl_)
194
+ , self_(self)
195
+ {
196
+ }
197
+ // for end
198
+ ConstIterator(size_t vecPos)
199
+ : cur_(vecPos)
200
+ {
201
+ }
202
+ size_t pos() const { return self_->posTbl_.get(cur_); }
203
+ S val() const { return (self_->vec_)[cur_.vecPos_]; }
204
+
205
+ const ConstIterator *operator->() const { return this; }
206
+ const ConstIterator& operator*() const { return *this; }
207
+ void operator++()
208
+ {
209
+ self_->posTbl_.next(cur_);
210
+ }
211
+ bool operator==(const ConstIterator& rhs) const { return cur_.vecPos_ == rhs.cur_.vecPos_; }
212
+ bool operator!=(const ConstIterator& rhs) const { return !operator==(rhs); }
213
+ };
214
+ public:
215
+ typedef ConstIterator<T> const_iterator;
216
+ typedef T value_type;
217
+ SparseVector()
218
+ {
219
+ }
220
+ template<class Map>
221
+ void set(const Map& m)
222
+ {
223
+ reserve(m.size());
224
+ for (typename Map::const_iterator i = m.begin(), ie = m.end(); i != ie; ++i) {
225
+ push_back(i->first, i->second);
226
+ }
227
+ }
228
+ void reserve(size_t size)
229
+ {
230
+ posTbl_.reserve(size);
231
+ vec_.reserve(size);
232
+ }
233
+ void push_back(size_t pos, const T& x)
234
+ {
235
+ posTbl_.set(pos);
236
+ vec_.push_back(x);
237
+ }
238
+ void clear()
239
+ {
240
+ posTbl_.clear();
241
+ vec_.clear();
242
+ }
243
+ const_iterator begin() const { return const_iterator(this); }
244
+ const_iterator end() const { return const_iterator(vec_.size()); }
245
+ size_t size() const { return vec_.size(); }
246
+ bool empty() const { return vec_.empty(); }
247
+ bool operator==(const SparseVector& rhs) const
248
+ {
249
+ if (size() != rhs.size()) return false;
250
+ for (const_iterator i1 = begin(), i2 = rhs.begin(), ie = end(); i1 != ie; ++i1, ++i2) {
251
+ if (i1->pos() != i2->pos()) return false;
252
+ if (i1->val() != i2->val()) return false;
253
+ }
254
+ return true;
255
+ }
256
+ bool operator!=(const SparseVector& rhs) const { return !operator==(rhs); }
257
+
258
+ void swap(SparseVector& rhs)
259
+ {
260
+ posTbl_.swap(rhs.posTbl_);
261
+ vec_.swap(rhs.vec_);
262
+ }
263
+ double norm() const
264
+ {
265
+ double ret = 0;
266
+ for (const_iterator i = begin(), ie = end(); i != ie; ++i) {
267
+ double v = i->val();
268
+ ret += v * v;
269
+ }
270
+ return ret;
271
+ }
272
+ template<class InputStream>
273
+ void load(InputStream& is)
274
+ {
275
+ size_t size;
276
+ cybozu::load(size, is);
277
+ clear();
278
+ reserve(size);
279
+ for (size_t i = 0; i < size; i++) {
280
+ size_t pos;
281
+ T val;
282
+ cybozu::load(pos, is);
283
+ cybozu::load(val, is);
284
+ push_back(pos, val);
285
+ }
286
+ }
287
+
288
+ template<class OutputStream>
289
+ void save(OutputStream& os) const
290
+ {
291
+ cybozu::save(os, size());
292
+ for (const_iterator i = begin(), ie = end(); i != ie; ++i) {
293
+ cybozu::save(os, i->pos());
294
+ cybozu::save(os, i->val());
295
+ }
296
+ }
297
+ };
298
+
299
+ template<class V1, class V2>
300
+ class Intersection {
301
+ Intersection(const Intersection&);
302
+ void operator=(const Intersection&);
303
+ const V1& v1_;
304
+ const V2& v2_;
305
+
306
+ template<class S1, class S2>
307
+ class ConstIterator {
308
+ typename S1::const_iterator begin1_;
309
+ typename S1::const_iterator end1_;
310
+ typename S2::const_iterator begin2_;
311
+ typename S2::const_iterator end2_;
312
+ bool isEnd_;
313
+ public:
314
+ ConstIterator(typename S1::const_iterator begin1, typename S1::const_iterator end1, typename S2::const_iterator begin2, typename S2::const_iterator end2, bool isEnd)
315
+ : begin1_(begin1)
316
+ , end1_(end1)
317
+ , begin2_(begin2)
318
+ , end2_(end2)
319
+ , isEnd_(isEnd)
320
+ {
321
+ if (isEnd_) return;
322
+ isEnd_ = begin1_ == end1_ || begin2_ == end2_;
323
+ if (isEnd_) return;
324
+ if (begin1_->pos() != begin2_->pos()) {
325
+ operator++();
326
+ }
327
+ }
328
+ size_t pos() const { return begin1_->pos(); }
329
+ typename S1::value_type val1() const { return begin1_->val(); }
330
+ typename S2::value_type val2() const { return begin2_->val(); }
331
+
332
+ const ConstIterator *operator->() const { return this; }
333
+ const ConstIterator& operator*() const { return *this; }
334
+ void operator++()
335
+ {
336
+ if (isEnd_) return;
337
+ if (begin1_->pos() == begin2_->pos()) {
338
+ ++begin1_;
339
+ isEnd_ = begin1_ == end1_;
340
+ if (isEnd_) return;
341
+ }
342
+ for (;;) {
343
+ while (begin1_->pos() < begin2_->pos()) {
344
+ ++begin1_;
345
+ if (begin1_ == end1_) {
346
+ isEnd_ = true;
347
+ return;
348
+ }
349
+ }
350
+ if (begin1_->pos() == begin2_->pos()) return;
351
+
352
+ while (begin1_->pos() > begin2_->pos()) {
353
+ ++begin2_;
354
+ if (begin2_ == end2_) {
355
+ isEnd_ = true;
356
+ return;
357
+ }
358
+ }
359
+ if (begin1_->pos() == begin2_->pos()) return;
360
+ }
361
+ }
362
+ bool operator==(const ConstIterator&) const { return isEnd_ == true; }
363
+ bool operator!=(const ConstIterator& rhs) const { return !operator==(rhs); }
364
+ };
365
+ public:
366
+ typedef ConstIterator<V1, V2> const_iterator;
367
+ Intersection(const V1& v1, const V2& v2)
368
+ : v1_(v1)
369
+ , v2_(v2)
370
+ {
371
+ }
372
+ const_iterator begin() const { return const_iterator(v1_.begin(), v1_.end(), v2_.begin(), v2_.end(), false); }
373
+ const_iterator end() const { return const_iterator(v1_.begin(), v1_.end(), v2_.begin(), v2_.end(), true); }
374
+ };
375
+
376
+ template<class V1, class V2>
377
+ class Union {
378
+ Union(const Union&);
379
+ void operator=(const Union&);
380
+ const V1& v1_;
381
+ const V2& v2_;
382
+
383
+ template<class S1, class S2>
384
+ class ConstIterator {
385
+ public:
386
+ /*
387
+ pos1 pos2 ; next action
388
+ End x x ; end
389
+ Only1 o x ; ++p1
390
+ Only2 x o ; ++p2
391
+ Small o < o ; ++p1
392
+ Equal o = o ; ++p1, ++p2
393
+ Large o > o ; ++p2
394
+ */
395
+ enum Mode {
396
+ End,
397
+ Only1,
398
+ Only2,
399
+ Small,
400
+ Equal,
401
+ Large
402
+ };
403
+ private:
404
+ typename S1::const_iterator begin1_;
405
+ typename S1::const_iterator end1_;
406
+ typename S2::const_iterator begin2_;
407
+ typename S2::const_iterator end2_;
408
+ Mode mode_;
409
+ size_t pos_;
410
+
411
+ void detectMode()
412
+ {
413
+ if (begin1_ != end1_) {
414
+ const size_t pos1 = begin1_->pos();
415
+ if (begin2_ != end2_) {
416
+ size_t pos2 = begin2_->pos();
417
+ if (pos1 < pos2) {
418
+ mode_ = Small;
419
+ pos_ = pos1;
420
+ } else if (pos1 == pos2) {
421
+ mode_ = Equal;
422
+ pos_ = pos1;
423
+ } else {
424
+ mode_ = Large;
425
+ pos_ = pos2;
426
+ }
427
+ } else {
428
+ mode_ = Only1;
429
+ pos_ = pos1;
430
+ }
431
+ } else {
432
+ if (begin2_ != end2_) {
433
+ mode_ = Only2;
434
+ pos_ = begin2_->pos();
435
+ } else {
436
+ mode_ = End;
437
+ }
438
+ }
439
+ }
440
+ public:
441
+ ConstIterator(typename S1::const_iterator begin1, typename S1::const_iterator end1, typename S2::const_iterator begin2, typename S2::const_iterator end2, bool isEnd)
442
+ : begin1_(begin1)
443
+ , end1_(end1)
444
+ , begin2_(begin2)
445
+ , end2_(end2)
446
+ , mode_(End)
447
+ , pos_(0)
448
+ {
449
+ if (isEnd) return;
450
+ detectMode();
451
+ }
452
+ Mode getMode() const { mode_; }
453
+ size_t pos() const { return pos_; }
454
+ bool hasVal1() const { return mode_ == Only1 || mode_ == Small || mode_ == Equal; }
455
+ bool hasVal2() const { return mode_ == Only2 || mode_ == Equal || mode_ == Large; }
456
+
457
+ /**
458
+ return value if exists otherwise 0
459
+ */
460
+ typename S1::value_type val1() const { return hasVal1() ? begin1_->val() : 0; }
461
+ typename S2::value_type val2() const { return hasVal2() ? begin2_->val() : 0; }
462
+
463
+ const ConstIterator *operator->() const { return this; }
464
+ const ConstIterator& operator*() const { return *this; }
465
+ void operator++()
466
+ {
467
+ if (mode_ == End) return;
468
+ switch (mode_) {
469
+ case Only1:
470
+ case Small:
471
+ case Equal:
472
+ ++begin1_;
473
+ break;
474
+ default:
475
+ break;
476
+ }
477
+ switch (mode_) {
478
+ case Only2:
479
+ case Equal:
480
+ case Large:
481
+ ++begin2_;
482
+ break;
483
+ default:
484
+ break;
485
+ }
486
+ detectMode();
487
+ }
488
+ bool operator==(const ConstIterator& rhs) const { return mode_ == rhs.mode_; }
489
+ bool operator!=(const ConstIterator& rhs) const { return !operator==(rhs); }
490
+ };
491
+ public:
492
+ typedef ConstIterator<V1, V2> const_iterator;
493
+ Union(const V1& v1, const V2& v2)
494
+ : v1_(v1)
495
+ , v2_(v2)
496
+ {
497
+ }
498
+ const_iterator begin() const { return const_iterator(v1_.begin(), v1_.end(), v2_.begin(), v2_.end(), false); }
499
+ const_iterator end() const { return const_iterator(v1_.begin(), v1_.end(), v2_.begin(), v2_.end(), true); }
500
+ };
501
+
502
+ /**
503
+ inner product of lhs and rhs
504
+ retval is the type of lhs::value_type or rhs::value_type
505
+ */
506
+ template<class Ret, class L, class Ltbl, class R, class Rtbl>
507
+ void InnerProduct(Ret *pret, const SparseVector<L, Ltbl>& lhs, const SparseVector<R, Rtbl>& rhs)
508
+ {
509
+ typedef SparseVector<L, Ltbl> Lvec;
510
+ typedef SparseVector<R, Rtbl> Rvec;
511
+
512
+ Ret ret = 0;
513
+ if ((const void*)&lhs == (const void*)&rhs) {
514
+ for (typename Lvec::const_iterator i = lhs.begin(), ie = lhs.end(); i != ie; ++i) {
515
+ ret += (Ret)i->val() * (Ret)i->val();
516
+ }
517
+ } else {
518
+ typedef Intersection<Lvec, Rvec> Inter;
519
+ Inter inter(lhs, rhs);
520
+ for (typename Inter::const_iterator i = inter.begin(), ie = inter.end(); i != ie; ++i) {
521
+ ret += (Ret)i->val1() * (Ret)i->val2();
522
+ }
523
+ }
524
+ *pret = ret;
525
+ }
526
+
527
+ } // cybozu::nlp
528
+
529
+ } // cybozu