ooxml_crypt 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (264) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +58 -0
  5. data/Rakefile +12 -0
  6. data/bin/console +15 -0
  7. data/bin/setup +8 -0
  8. data/ext/ooxml_crypt/extconf.rb +18 -0
  9. data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
  10. data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
  11. data/lib/ooxml_crypt/version.rb +5 -0
  12. data/lib/ooxml_crypt.rb +75 -0
  13. data/vendor/cybozulib/.github/workflows/main.yml +12 -0
  14. data/vendor/cybozulib/.gitignore +5 -0
  15. data/vendor/cybozulib/CMakeLists.txt +6 -0
  16. data/vendor/cybozulib/COPYRIGHT +27 -0
  17. data/vendor/cybozulib/Makefile +26 -0
  18. data/vendor/cybozulib/bin/libeay32.dll +0 -0
  19. data/vendor/cybozulib/bin/libmecab.dll +0 -0
  20. data/vendor/cybozulib/bin/ssleay32.dll +0 -0
  21. data/vendor/cybozulib/common.mk +116 -0
  22. data/vendor/cybozulib/common.props +25 -0
  23. data/vendor/cybozulib/cybozulib.sln +286 -0
  24. data/vendor/cybozulib/debug.props +14 -0
  25. data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
  26. data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
  27. data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
  28. data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
  29. data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
  30. data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
  31. data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
  32. data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
  33. data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
  34. data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
  35. data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
  36. data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
  37. data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
  38. data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
  39. data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
  40. data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
  41. data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
  42. data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
  43. data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
  44. data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
  45. data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
  46. data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
  47. data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
  48. data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
  49. data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
  50. data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
  51. data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
  52. data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
  53. data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
  54. data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
  55. data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
  56. data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
  57. data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
  58. data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
  59. data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
  60. data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
  61. data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
  62. data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
  63. data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
  64. data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
  65. data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
  66. data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
  67. data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
  68. data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
  69. data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
  70. data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
  71. data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
  72. data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
  73. data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
  74. data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
  75. data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
  76. data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
  77. data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
  78. data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
  79. data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
  80. data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
  81. data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
  82. data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
  83. data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
  84. data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
  85. data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
  86. data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
  87. data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
  88. data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
  89. data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
  90. data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
  91. data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
  92. data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
  93. data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
  94. data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
  95. data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
  96. data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
  97. data/vendor/cybozulib/include/sais.hxx +364 -0
  98. data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
  99. data/vendor/cybozulib/mk.bat +37 -0
  100. data/vendor/cybozulib/readme.md +29 -0
  101. data/vendor/cybozulib/release.props +12 -0
  102. data/vendor/cybozulib/sample/Makefile +30 -0
  103. data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
  104. data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
  105. data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
  106. data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
  107. data/vendor/cybozulib/sample/data/svd/test1 +4 -0
  108. data/vendor/cybozulib/sample/data/svd/test2 +4 -0
  109. data/vendor/cybozulib/sample/desymbol.cpp +127 -0
  110. data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
  111. data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
  112. data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
  113. data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
  114. data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
  115. data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
  116. data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
  117. data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
  118. data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
  119. data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
  120. data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
  121. data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
  122. data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
  123. data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
  124. data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
  125. data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
  126. data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
  127. data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
  128. data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
  129. data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
  130. data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
  131. data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
  132. data/vendor/cybozulib/src/Makefile +8 -0
  133. data/vendor/cybozulib/src/base/Makefile +19 -0
  134. data/vendor/cybozulib/test/Makefile +12 -0
  135. data/vendor/cybozulib/test/base/Makefile +37 -0
  136. data/vendor/cybozulib/test/base/array_test.cpp +173 -0
  137. data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
  138. data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
  139. data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
  140. data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
  141. data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
  142. data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
  143. data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
  144. data/vendor/cybozulib/test/base/config_test.cpp +236 -0
  145. data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
  146. data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
  147. data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
  148. data/vendor/cybozulib/test/base/data/a.xml +26 -0
  149. data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
  150. data/vendor/cybozulib/test/base/env_test.cpp +22 -0
  151. data/vendor/cybozulib/test/base/event_test.cpp +41 -0
  152. data/vendor/cybozulib/test/base/file_test.cpp +233 -0
  153. data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
  154. data/vendor/cybozulib/test/base/format_test.cpp +12 -0
  155. data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
  156. data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
  157. data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
  158. data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
  159. data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
  160. data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
  161. data/vendor/cybozulib/test/base/option_test.cpp +487 -0
  162. data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
  163. data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
  164. data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
  165. data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
  166. data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
  167. data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
  168. data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
  169. data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
  170. data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
  171. data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
  172. data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
  173. data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
  174. data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
  175. data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
  176. data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
  177. data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
  178. data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
  179. data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
  180. data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
  181. data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
  182. data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
  183. data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
  184. data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
  185. data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
  186. data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
  187. data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
  188. data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
  189. data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
  190. data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
  191. data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
  192. data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
  193. data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
  194. data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
  195. data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
  196. data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
  197. data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
  198. data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
  199. data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
  200. data/vendor/cybozulib/test/base/time_test.cpp +164 -0
  201. data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
  202. data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
  203. data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
  204. data/vendor/cybozulib/test/nlp/Makefile +27 -0
  205. data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
  206. data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
  207. data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
  208. data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
  209. data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
  210. data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
  211. data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
  212. data/vendor/cybozulib/tool/create_vcproj.py +186 -0
  213. data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
  214. data/vendor/msoffice/COPYRIGHT +27 -0
  215. data/vendor/msoffice/Makefile +29 -0
  216. data/vendor/msoffice/bin/64/msoc.dll +0 -0
  217. data/vendor/msoffice/bin/64/msocsample.exe +0 -0
  218. data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
  219. data/vendor/msoffice/bin/msoc.dll +0 -0
  220. data/vendor/msoffice/bin/msocsample.exe +0 -0
  221. data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
  222. data/vendor/msoffice/common.mk +71 -0
  223. data/vendor/msoffice/common.props +26 -0
  224. data/vendor/msoffice/debug.props +14 -0
  225. data/vendor/msoffice/include/attack.hpp +211 -0
  226. data/vendor/msoffice/include/cfb.hpp +777 -0
  227. data/vendor/msoffice/include/crypto_util.hpp +450 -0
  228. data/vendor/msoffice/include/custom_sha1.hpp +342 -0
  229. data/vendor/msoffice/include/decode.hpp +240 -0
  230. data/vendor/msoffice/include/encode.hpp +221 -0
  231. data/vendor/msoffice/include/make_dataspace.hpp +316 -0
  232. data/vendor/msoffice/include/msoc.h +129 -0
  233. data/vendor/msoffice/include/resource.hpp +7 -0
  234. data/vendor/msoffice/include/standard_encryption.hpp +145 -0
  235. data/vendor/msoffice/include/uint32vec.hpp +179 -0
  236. data/vendor/msoffice/include/util.hpp +212 -0
  237. data/vendor/msoffice/lib/.emptydir +0 -0
  238. data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
  239. data/vendor/msoffice/mk.bat +1 -0
  240. data/vendor/msoffice/mkdll.bat +3 -0
  241. data/vendor/msoffice/msoc.def +13 -0
  242. data/vendor/msoffice/msocsample.py +178 -0
  243. data/vendor/msoffice/msoffice12.sln +31 -0
  244. data/vendor/msoffice/readme.md +110 -0
  245. data/vendor/msoffice/release.props +28 -0
  246. data/vendor/msoffice/src/Makefile +19 -0
  247. data/vendor/msoffice/src/attack.cpp +124 -0
  248. data/vendor/msoffice/src/cfb_test.cpp +77 -0
  249. data/vendor/msoffice/src/minisample.c +54 -0
  250. data/vendor/msoffice/src/msocdll.cpp +276 -0
  251. data/vendor/msoffice/src/msocsample.c +136 -0
  252. data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
  253. data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
  254. data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
  255. data/vendor/msoffice/src/sha1.cpp +234 -0
  256. data/vendor/msoffice/test/Makefile +20 -0
  257. data/vendor/msoffice/test/cfb_test.cpp +74 -0
  258. data/vendor/msoffice/test/hash_test.cpp +59 -0
  259. data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
  260. data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
  261. data/vendor/msoffice/test/sampl.bat +8 -0
  262. data/vendor/msoffice/test_all.py +46 -0
  263. data/vendor/update +4 -0
  264. metadata +351 -0
@@ -0,0 +1,529 @@
1
+ #pragma once
2
+ /**
3
+ @file
4
+ @brief sparse vector
5
+
6
+ @author MITSUNARI Shigeo(@herumi)
7
+ @author MITSUNARI Shigeo
8
+ */
9
+ #include <vector>
10
+ #include <cybozu/exception.hpp>
11
+ #include <cybozu/serializer.hpp>
12
+ #include <assert.h>
13
+
14
+ namespace cybozu { namespace nlp {
15
+
16
+ namespace option {
17
+
18
+ class PositionTbl {
19
+ std::vector<unsigned int> v_;
20
+ public:
21
+ struct Curr {
22
+ size_t vecPos_;
23
+ Curr(const PositionTbl&)
24
+ : vecPos_(0)
25
+ {
26
+ }
27
+ Curr(size_t vecPos)
28
+ : vecPos_(vecPos)
29
+ {
30
+ }
31
+ };
32
+ void reserve(size_t size)
33
+ {
34
+ v_.reserve(size);
35
+ }
36
+ void set(size_t pos)
37
+ {
38
+ if (!v_.empty() && pos <= v_[v_.size() - 1]) {
39
+ throw cybozu::Exception("SparseException:PositionTbl:set:bad order pos") << pos;
40
+ }
41
+ if (pos > 0xffffffffU) {
42
+ throw cybozu::Exception("SparseException:PositionTbl:set:too large pos") << pos;
43
+ }
44
+ v_.push_back((unsigned int)pos);
45
+ }
46
+ void clear()
47
+ {
48
+ v_.clear();
49
+ }
50
+ void next(Curr& curr) const
51
+ {
52
+ curr.vecPos_++;
53
+ }
54
+ size_t get(const Curr& curr) const
55
+ {
56
+ return v_[curr.vecPos_];
57
+ }
58
+ void swap(PositionTbl& rhs)
59
+ {
60
+ v_.swap(rhs.v_);
61
+ }
62
+ };
63
+
64
+ /*
65
+ max difference between previous position and current position < 0x40000000 = (1 << 30)
66
+ data format
67
+ input x:
68
+ v[0] = (x & 0x3f) | (y << 6) ; len = y + 1 for y = 0, 1, 2, 3
69
+ v[1] = x >> 6
70
+ v[2] = x >> (6 + 8)
71
+ v[3] = x >> (6 + 8 + 8)
72
+ */
73
+
74
+ class CompressedPositionTbl {
75
+ mutable std::vector<unsigned char> v_;
76
+ size_t lastPos_;
77
+ mutable bool addDummy_; // add last dummy data into v_ to get speed and avoid buffer overrun
78
+ friend struct Curr;
79
+ public:
80
+ struct Curr {
81
+ size_t vecPos_;
82
+ size_t val_;
83
+ unsigned int pos_;
84
+ Curr(const CompressedPositionTbl& tbl)
85
+ : vecPos_(0)
86
+ , val_(0)
87
+ , pos_(0)
88
+ {
89
+ if (!tbl.addDummy_) {
90
+ tbl.v_.push_back(0);
91
+ tbl.addDummy_ = true;
92
+ }
93
+ tbl.setup(*this);
94
+ }
95
+ Curr(size_t vecPos)
96
+ : vecPos_(vecPos)
97
+ {
98
+ }
99
+ };
100
+ CompressedPositionTbl()
101
+ : lastPos_(0)
102
+ , addDummy_(false)
103
+ {
104
+ }
105
+ void clear()
106
+ {
107
+ v_.clear();
108
+ lastPos_ = 0;
109
+ addDummy_ = false;
110
+ }
111
+ void reserve(size_t size)
112
+ {
113
+ v_.reserve(size * 2); // ad hoc
114
+ }
115
+ void set(size_t pos)
116
+ {
117
+ if (addDummy_) {
118
+ v_.resize(v_.size() - 1);
119
+ addDummy_ = false;
120
+ }
121
+ if (!v_.empty() && pos <= lastPos_) {
122
+ throw cybozu::Exception("SparseException:CompressedPositionTbl:set:bad order pos") << pos;
123
+ }
124
+ if (pos - lastPos_ >= (1 << 30)) {
125
+ throw cybozu::Exception("SparseException:CompressedPositionTbl:set:too large pos") << pos;
126
+ }
127
+ unsigned int diff = (unsigned int)(pos - lastPos_);
128
+ lastPos_ = pos;
129
+ if (diff < (1 << 6)) {
130
+ v_.push_back((unsigned char)(diff));
131
+ } else if (diff < (1 << 14)) {
132
+ v_.push_back((unsigned char)(diff & 0x3f) | (1 << 6));
133
+ v_.push_back((unsigned char)(diff >> 6));
134
+ } else if (diff < (1 << 22)) {
135
+ v_.push_back((unsigned char)(diff & 0x3f) | (2 << 6));
136
+ v_.push_back((unsigned char)(diff >> 6));
137
+ v_.push_back((unsigned char)(diff >> 14));
138
+ } else {
139
+ assert(diff < (1 << 30));
140
+ v_.push_back((unsigned char)(diff & 0x3f) | (3 << 6));
141
+ v_.push_back((unsigned char)(diff >> 6));
142
+ v_.push_back((unsigned char)(diff >> 14));
143
+ v_.push_back((unsigned char)(diff >> 22));
144
+ }
145
+ }
146
+ void next(Curr& curr) const
147
+ {
148
+ setup(curr);
149
+ curr.vecPos_++;
150
+ }
151
+ size_t get(const Curr& curr) const { return curr.val_; }
152
+ void swap(CompressedPositionTbl& rhs)
153
+ {
154
+ v_.swap(rhs.v_);
155
+ std::swap(lastPos_, rhs.lastPos_);
156
+ std::swap(addDummy_, rhs.addDummy_);
157
+ }
158
+ private:
159
+ void setup(Curr& curr) const
160
+ {
161
+ unsigned int diff = v_[curr.pos_];
162
+ unsigned int t = diff >> 6;
163
+ unsigned int pos = curr.pos_;
164
+ if (t > 0) {
165
+ diff &= (1 << 6) - 1;
166
+ if (t == 1) {
167
+ diff |= (v_[pos + 1] << 6);
168
+ } else if (t == 2) {
169
+ diff |= (v_[pos + 1] << 6) | (v_[pos + 2] << 14);
170
+ } else {
171
+ diff |= (v_[pos + 1] << 6) | (v_[pos + 2] << 14) | (v_[pos + 3] << 22);
172
+ }
173
+ }
174
+ curr.val_ += diff;
175
+ curr.pos_ += t + 1;
176
+ }
177
+ };
178
+
179
+ } // cybozu::nlp::option
180
+
181
+ template<class T, class PosTbl = option::PositionTbl, int dummy = 0>
182
+ class SparseVector {
183
+ PosTbl posTbl_;
184
+ std::vector<T> vec_;
185
+
186
+ template<class S>
187
+ class ConstIterator {
188
+ typename PosTbl::Curr cur_;
189
+ const SparseVector<S, PosTbl> *self_;
190
+ public:
191
+ // for begin
192
+ ConstIterator(const SparseVector<S, PosTbl>* self)
193
+ : cur_(self->posTbl_)
194
+ , self_(self)
195
+ {
196
+ }
197
+ // for end
198
+ ConstIterator(size_t vecPos)
199
+ : cur_(vecPos)
200
+ {
201
+ }
202
+ size_t pos() const { return self_->posTbl_.get(cur_); }
203
+ S val() const { return (self_->vec_)[cur_.vecPos_]; }
204
+
205
+ const ConstIterator *operator->() const { return this; }
206
+ const ConstIterator& operator*() const { return *this; }
207
+ void operator++()
208
+ {
209
+ self_->posTbl_.next(cur_);
210
+ }
211
+ bool operator==(const ConstIterator& rhs) const { return cur_.vecPos_ == rhs.cur_.vecPos_; }
212
+ bool operator!=(const ConstIterator& rhs) const { return !operator==(rhs); }
213
+ };
214
+ public:
215
+ typedef ConstIterator<T> const_iterator;
216
+ typedef T value_type;
217
+ SparseVector()
218
+ {
219
+ }
220
+ template<class Map>
221
+ void set(const Map& m)
222
+ {
223
+ reserve(m.size());
224
+ for (typename Map::const_iterator i = m.begin(), ie = m.end(); i != ie; ++i) {
225
+ push_back(i->first, i->second);
226
+ }
227
+ }
228
+ void reserve(size_t size)
229
+ {
230
+ posTbl_.reserve(size);
231
+ vec_.reserve(size);
232
+ }
233
+ void push_back(size_t pos, const T& x)
234
+ {
235
+ posTbl_.set(pos);
236
+ vec_.push_back(x);
237
+ }
238
+ void clear()
239
+ {
240
+ posTbl_.clear();
241
+ vec_.clear();
242
+ }
243
+ const_iterator begin() const { return const_iterator(this); }
244
+ const_iterator end() const { return const_iterator(vec_.size()); }
245
+ size_t size() const { return vec_.size(); }
246
+ bool empty() const { return vec_.empty(); }
247
+ bool operator==(const SparseVector& rhs) const
248
+ {
249
+ if (size() != rhs.size()) return false;
250
+ for (const_iterator i1 = begin(), i2 = rhs.begin(), ie = end(); i1 != ie; ++i1, ++i2) {
251
+ if (i1->pos() != i2->pos()) return false;
252
+ if (i1->val() != i2->val()) return false;
253
+ }
254
+ return true;
255
+ }
256
+ bool operator!=(const SparseVector& rhs) const { return !operator==(rhs); }
257
+
258
+ void swap(SparseVector& rhs)
259
+ {
260
+ posTbl_.swap(rhs.posTbl_);
261
+ vec_.swap(rhs.vec_);
262
+ }
263
+ double norm() const
264
+ {
265
+ double ret = 0;
266
+ for (const_iterator i = begin(), ie = end(); i != ie; ++i) {
267
+ double v = i->val();
268
+ ret += v * v;
269
+ }
270
+ return ret;
271
+ }
272
+ template<class InputStream>
273
+ void load(InputStream& is)
274
+ {
275
+ size_t size;
276
+ cybozu::load(size, is);
277
+ clear();
278
+ reserve(size);
279
+ for (size_t i = 0; i < size; i++) {
280
+ size_t pos;
281
+ T val;
282
+ cybozu::load(pos, is);
283
+ cybozu::load(val, is);
284
+ push_back(pos, val);
285
+ }
286
+ }
287
+
288
+ template<class OutputStream>
289
+ void save(OutputStream& os) const
290
+ {
291
+ cybozu::save(os, size());
292
+ for (const_iterator i = begin(), ie = end(); i != ie; ++i) {
293
+ cybozu::save(os, i->pos());
294
+ cybozu::save(os, i->val());
295
+ }
296
+ }
297
+ };
298
+
299
+ template<class V1, class V2>
300
+ class Intersection {
301
+ Intersection(const Intersection&);
302
+ void operator=(const Intersection&);
303
+ const V1& v1_;
304
+ const V2& v2_;
305
+
306
+ template<class S1, class S2>
307
+ class ConstIterator {
308
+ typename S1::const_iterator begin1_;
309
+ typename S1::const_iterator end1_;
310
+ typename S2::const_iterator begin2_;
311
+ typename S2::const_iterator end2_;
312
+ bool isEnd_;
313
+ public:
314
+ ConstIterator(typename S1::const_iterator begin1, typename S1::const_iterator end1, typename S2::const_iterator begin2, typename S2::const_iterator end2, bool isEnd)
315
+ : begin1_(begin1)
316
+ , end1_(end1)
317
+ , begin2_(begin2)
318
+ , end2_(end2)
319
+ , isEnd_(isEnd)
320
+ {
321
+ if (isEnd_) return;
322
+ isEnd_ = begin1_ == end1_ || begin2_ == end2_;
323
+ if (isEnd_) return;
324
+ if (begin1_->pos() != begin2_->pos()) {
325
+ operator++();
326
+ }
327
+ }
328
+ size_t pos() const { return begin1_->pos(); }
329
+ typename S1::value_type val1() const { return begin1_->val(); }
330
+ typename S2::value_type val2() const { return begin2_->val(); }
331
+
332
+ const ConstIterator *operator->() const { return this; }
333
+ const ConstIterator& operator*() const { return *this; }
334
+ void operator++()
335
+ {
336
+ if (isEnd_) return;
337
+ if (begin1_->pos() == begin2_->pos()) {
338
+ ++begin1_;
339
+ isEnd_ = begin1_ == end1_;
340
+ if (isEnd_) return;
341
+ }
342
+ for (;;) {
343
+ while (begin1_->pos() < begin2_->pos()) {
344
+ ++begin1_;
345
+ if (begin1_ == end1_) {
346
+ isEnd_ = true;
347
+ return;
348
+ }
349
+ }
350
+ if (begin1_->pos() == begin2_->pos()) return;
351
+
352
+ while (begin1_->pos() > begin2_->pos()) {
353
+ ++begin2_;
354
+ if (begin2_ == end2_) {
355
+ isEnd_ = true;
356
+ return;
357
+ }
358
+ }
359
+ if (begin1_->pos() == begin2_->pos()) return;
360
+ }
361
+ }
362
+ bool operator==(const ConstIterator&) const { return isEnd_ == true; }
363
+ bool operator!=(const ConstIterator& rhs) const { return !operator==(rhs); }
364
+ };
365
+ public:
366
+ typedef ConstIterator<V1, V2> const_iterator;
367
+ Intersection(const V1& v1, const V2& v2)
368
+ : v1_(v1)
369
+ , v2_(v2)
370
+ {
371
+ }
372
+ const_iterator begin() const { return const_iterator(v1_.begin(), v1_.end(), v2_.begin(), v2_.end(), false); }
373
+ const_iterator end() const { return const_iterator(v1_.begin(), v1_.end(), v2_.begin(), v2_.end(), true); }
374
+ };
375
+
376
+ template<class V1, class V2>
377
+ class Union {
378
+ Union(const Union&);
379
+ void operator=(const Union&);
380
+ const V1& v1_;
381
+ const V2& v2_;
382
+
383
+ template<class S1, class S2>
384
+ class ConstIterator {
385
+ public:
386
+ /*
387
+ pos1 pos2 ; next action
388
+ End x x ; end
389
+ Only1 o x ; ++p1
390
+ Only2 x o ; ++p2
391
+ Small o < o ; ++p1
392
+ Equal o = o ; ++p1, ++p2
393
+ Large o > o ; ++p2
394
+ */
395
+ enum Mode {
396
+ End,
397
+ Only1,
398
+ Only2,
399
+ Small,
400
+ Equal,
401
+ Large
402
+ };
403
+ private:
404
+ typename S1::const_iterator begin1_;
405
+ typename S1::const_iterator end1_;
406
+ typename S2::const_iterator begin2_;
407
+ typename S2::const_iterator end2_;
408
+ Mode mode_;
409
+ size_t pos_;
410
+
411
+ void detectMode()
412
+ {
413
+ if (begin1_ != end1_) {
414
+ const size_t pos1 = begin1_->pos();
415
+ if (begin2_ != end2_) {
416
+ size_t pos2 = begin2_->pos();
417
+ if (pos1 < pos2) {
418
+ mode_ = Small;
419
+ pos_ = pos1;
420
+ } else if (pos1 == pos2) {
421
+ mode_ = Equal;
422
+ pos_ = pos1;
423
+ } else {
424
+ mode_ = Large;
425
+ pos_ = pos2;
426
+ }
427
+ } else {
428
+ mode_ = Only1;
429
+ pos_ = pos1;
430
+ }
431
+ } else {
432
+ if (begin2_ != end2_) {
433
+ mode_ = Only2;
434
+ pos_ = begin2_->pos();
435
+ } else {
436
+ mode_ = End;
437
+ }
438
+ }
439
+ }
440
+ public:
441
+ ConstIterator(typename S1::const_iterator begin1, typename S1::const_iterator end1, typename S2::const_iterator begin2, typename S2::const_iterator end2, bool isEnd)
442
+ : begin1_(begin1)
443
+ , end1_(end1)
444
+ , begin2_(begin2)
445
+ , end2_(end2)
446
+ , mode_(End)
447
+ , pos_(0)
448
+ {
449
+ if (isEnd) return;
450
+ detectMode();
451
+ }
452
+ Mode getMode() const { mode_; }
453
+ size_t pos() const { return pos_; }
454
+ bool hasVal1() const { return mode_ == Only1 || mode_ == Small || mode_ == Equal; }
455
+ bool hasVal2() const { return mode_ == Only2 || mode_ == Equal || mode_ == Large; }
456
+
457
+ /**
458
+ return value if exists otherwise 0
459
+ */
460
+ typename S1::value_type val1() const { return hasVal1() ? begin1_->val() : 0; }
461
+ typename S2::value_type val2() const { return hasVal2() ? begin2_->val() : 0; }
462
+
463
+ const ConstIterator *operator->() const { return this; }
464
+ const ConstIterator& operator*() const { return *this; }
465
+ void operator++()
466
+ {
467
+ if (mode_ == End) return;
468
+ switch (mode_) {
469
+ case Only1:
470
+ case Small:
471
+ case Equal:
472
+ ++begin1_;
473
+ break;
474
+ default:
475
+ break;
476
+ }
477
+ switch (mode_) {
478
+ case Only2:
479
+ case Equal:
480
+ case Large:
481
+ ++begin2_;
482
+ break;
483
+ default:
484
+ break;
485
+ }
486
+ detectMode();
487
+ }
488
+ bool operator==(const ConstIterator& rhs) const { return mode_ == rhs.mode_; }
489
+ bool operator!=(const ConstIterator& rhs) const { return !operator==(rhs); }
490
+ };
491
+ public:
492
+ typedef ConstIterator<V1, V2> const_iterator;
493
+ Union(const V1& v1, const V2& v2)
494
+ : v1_(v1)
495
+ , v2_(v2)
496
+ {
497
+ }
498
+ const_iterator begin() const { return const_iterator(v1_.begin(), v1_.end(), v2_.begin(), v2_.end(), false); }
499
+ const_iterator end() const { return const_iterator(v1_.begin(), v1_.end(), v2_.begin(), v2_.end(), true); }
500
+ };
501
+
502
+ /**
503
+ inner product of lhs and rhs
504
+ retval is the type of lhs::value_type or rhs::value_type
505
+ */
506
+ template<class Ret, class L, class Ltbl, class R, class Rtbl>
507
+ void InnerProduct(Ret *pret, const SparseVector<L, Ltbl>& lhs, const SparseVector<R, Rtbl>& rhs)
508
+ {
509
+ typedef SparseVector<L, Ltbl> Lvec;
510
+ typedef SparseVector<R, Rtbl> Rvec;
511
+
512
+ Ret ret = 0;
513
+ if ((const void*)&lhs == (const void*)&rhs) {
514
+ for (typename Lvec::const_iterator i = lhs.begin(), ie = lhs.end(); i != ie; ++i) {
515
+ ret += (Ret)i->val() * (Ret)i->val();
516
+ }
517
+ } else {
518
+ typedef Intersection<Lvec, Rvec> Inter;
519
+ Inter inter(lhs, rhs);
520
+ for (typename Inter::const_iterator i = inter.begin(), ie = inter.end(); i != ie; ++i) {
521
+ ret += (Ret)i->val1() * (Ret)i->val2();
522
+ }
523
+ }
524
+ *pret = ret;
525
+ }
526
+
527
+ } // cybozu::nlp
528
+
529
+ } // cybozu