ooxml_crypt 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (264) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +58 -0
  5. data/Rakefile +12 -0
  6. data/bin/console +15 -0
  7. data/bin/setup +8 -0
  8. data/ext/ooxml_crypt/extconf.rb +18 -0
  9. data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
  10. data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
  11. data/lib/ooxml_crypt/version.rb +5 -0
  12. data/lib/ooxml_crypt.rb +75 -0
  13. data/vendor/cybozulib/.github/workflows/main.yml +12 -0
  14. data/vendor/cybozulib/.gitignore +5 -0
  15. data/vendor/cybozulib/CMakeLists.txt +6 -0
  16. data/vendor/cybozulib/COPYRIGHT +27 -0
  17. data/vendor/cybozulib/Makefile +26 -0
  18. data/vendor/cybozulib/bin/libeay32.dll +0 -0
  19. data/vendor/cybozulib/bin/libmecab.dll +0 -0
  20. data/vendor/cybozulib/bin/ssleay32.dll +0 -0
  21. data/vendor/cybozulib/common.mk +116 -0
  22. data/vendor/cybozulib/common.props +25 -0
  23. data/vendor/cybozulib/cybozulib.sln +286 -0
  24. data/vendor/cybozulib/debug.props +14 -0
  25. data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
  26. data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
  27. data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
  28. data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
  29. data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
  30. data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
  31. data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
  32. data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
  33. data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
  34. data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
  35. data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
  36. data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
  37. data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
  38. data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
  39. data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
  40. data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
  41. data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
  42. data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
  43. data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
  44. data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
  45. data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
  46. data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
  47. data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
  48. data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
  49. data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
  50. data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
  51. data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
  52. data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
  53. data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
  54. data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
  55. data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
  56. data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
  57. data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
  58. data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
  59. data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
  60. data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
  61. data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
  62. data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
  63. data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
  64. data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
  65. data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
  66. data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
  67. data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
  68. data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
  69. data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
  70. data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
  71. data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
  72. data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
  73. data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
  74. data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
  75. data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
  76. data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
  77. data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
  78. data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
  79. data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
  80. data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
  81. data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
  82. data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
  83. data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
  84. data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
  85. data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
  86. data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
  87. data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
  88. data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
  89. data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
  90. data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
  91. data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
  92. data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
  93. data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
  94. data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
  95. data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
  96. data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
  97. data/vendor/cybozulib/include/sais.hxx +364 -0
  98. data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
  99. data/vendor/cybozulib/mk.bat +37 -0
  100. data/vendor/cybozulib/readme.md +29 -0
  101. data/vendor/cybozulib/release.props +12 -0
  102. data/vendor/cybozulib/sample/Makefile +30 -0
  103. data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
  104. data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
  105. data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
  106. data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
  107. data/vendor/cybozulib/sample/data/svd/test1 +4 -0
  108. data/vendor/cybozulib/sample/data/svd/test2 +4 -0
  109. data/vendor/cybozulib/sample/desymbol.cpp +127 -0
  110. data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
  111. data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
  112. data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
  113. data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
  114. data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
  115. data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
  116. data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
  117. data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
  118. data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
  119. data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
  120. data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
  121. data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
  122. data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
  123. data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
  124. data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
  125. data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
  126. data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
  127. data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
  128. data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
  129. data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
  130. data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
  131. data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
  132. data/vendor/cybozulib/src/Makefile +8 -0
  133. data/vendor/cybozulib/src/base/Makefile +19 -0
  134. data/vendor/cybozulib/test/Makefile +12 -0
  135. data/vendor/cybozulib/test/base/Makefile +37 -0
  136. data/vendor/cybozulib/test/base/array_test.cpp +173 -0
  137. data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
  138. data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
  139. data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
  140. data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
  141. data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
  142. data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
  143. data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
  144. data/vendor/cybozulib/test/base/config_test.cpp +236 -0
  145. data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
  146. data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
  147. data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
  148. data/vendor/cybozulib/test/base/data/a.xml +26 -0
  149. data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
  150. data/vendor/cybozulib/test/base/env_test.cpp +22 -0
  151. data/vendor/cybozulib/test/base/event_test.cpp +41 -0
  152. data/vendor/cybozulib/test/base/file_test.cpp +233 -0
  153. data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
  154. data/vendor/cybozulib/test/base/format_test.cpp +12 -0
  155. data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
  156. data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
  157. data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
  158. data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
  159. data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
  160. data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
  161. data/vendor/cybozulib/test/base/option_test.cpp +487 -0
  162. data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
  163. data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
  164. data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
  165. data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
  166. data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
  167. data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
  168. data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
  169. data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
  170. data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
  171. data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
  172. data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
  173. data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
  174. data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
  175. data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
  176. data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
  177. data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
  178. data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
  179. data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
  180. data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
  181. data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
  182. data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
  183. data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
  184. data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
  185. data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
  186. data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
  187. data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
  188. data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
  189. data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
  190. data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
  191. data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
  192. data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
  193. data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
  194. data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
  195. data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
  196. data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
  197. data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
  198. data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
  199. data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
  200. data/vendor/cybozulib/test/base/time_test.cpp +164 -0
  201. data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
  202. data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
  203. data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
  204. data/vendor/cybozulib/test/nlp/Makefile +27 -0
  205. data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
  206. data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
  207. data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
  208. data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
  209. data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
  210. data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
  211. data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
  212. data/vendor/cybozulib/tool/create_vcproj.py +186 -0
  213. data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
  214. data/vendor/msoffice/COPYRIGHT +27 -0
  215. data/vendor/msoffice/Makefile +29 -0
  216. data/vendor/msoffice/bin/64/msoc.dll +0 -0
  217. data/vendor/msoffice/bin/64/msocsample.exe +0 -0
  218. data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
  219. data/vendor/msoffice/bin/msoc.dll +0 -0
  220. data/vendor/msoffice/bin/msocsample.exe +0 -0
  221. data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
  222. data/vendor/msoffice/common.mk +71 -0
  223. data/vendor/msoffice/common.props +26 -0
  224. data/vendor/msoffice/debug.props +14 -0
  225. data/vendor/msoffice/include/attack.hpp +211 -0
  226. data/vendor/msoffice/include/cfb.hpp +777 -0
  227. data/vendor/msoffice/include/crypto_util.hpp +450 -0
  228. data/vendor/msoffice/include/custom_sha1.hpp +342 -0
  229. data/vendor/msoffice/include/decode.hpp +240 -0
  230. data/vendor/msoffice/include/encode.hpp +221 -0
  231. data/vendor/msoffice/include/make_dataspace.hpp +316 -0
  232. data/vendor/msoffice/include/msoc.h +129 -0
  233. data/vendor/msoffice/include/resource.hpp +7 -0
  234. data/vendor/msoffice/include/standard_encryption.hpp +145 -0
  235. data/vendor/msoffice/include/uint32vec.hpp +179 -0
  236. data/vendor/msoffice/include/util.hpp +212 -0
  237. data/vendor/msoffice/lib/.emptydir +0 -0
  238. data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
  239. data/vendor/msoffice/mk.bat +1 -0
  240. data/vendor/msoffice/mkdll.bat +3 -0
  241. data/vendor/msoffice/msoc.def +13 -0
  242. data/vendor/msoffice/msocsample.py +178 -0
  243. data/vendor/msoffice/msoffice12.sln +31 -0
  244. data/vendor/msoffice/readme.md +110 -0
  245. data/vendor/msoffice/release.props +28 -0
  246. data/vendor/msoffice/src/Makefile +19 -0
  247. data/vendor/msoffice/src/attack.cpp +124 -0
  248. data/vendor/msoffice/src/cfb_test.cpp +77 -0
  249. data/vendor/msoffice/src/minisample.c +54 -0
  250. data/vendor/msoffice/src/msocdll.cpp +276 -0
  251. data/vendor/msoffice/src/msocsample.c +136 -0
  252. data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
  253. data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
  254. data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
  255. data/vendor/msoffice/src/sha1.cpp +234 -0
  256. data/vendor/msoffice/test/Makefile +20 -0
  257. data/vendor/msoffice/test/cfb_test.cpp +74 -0
  258. data/vendor/msoffice/test/hash_test.cpp +59 -0
  259. data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
  260. data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
  261. data/vendor/msoffice/test/sampl.bat +8 -0
  262. data/vendor/msoffice/test_all.py +46 -0
  263. data/vendor/update +4 -0
  264. metadata +351 -0
@@ -0,0 +1,378 @@
1
+ #pragma once
2
+ /**
3
+ @file
4
+ @brief succinct vector
5
+ @author MITSUNARI Shigeo(@herumi)
6
+ @license modified new BSD license
7
+ http://opensource.org/licenses/BSD-3-Clause
8
+
9
+ @note use -msse4.2 option for popcnt
10
+ */
11
+ #include <assert.h>
12
+ #include <vector>
13
+ #include <cybozu/exception.hpp>
14
+ #include <cybozu/bit_operation.hpp>
15
+ #include <cybozu/select8.hpp>
16
+ #include <cybozu/serializer.hpp>
17
+ #include <iosfwd>
18
+
19
+ #ifdef _MSC_VER
20
+ #pragma warning(push)
21
+ #pragma warning(disable : 4127)
22
+ #endif
23
+
24
+ namespace cybozu {
25
+
26
+ const uint64_t NotFound = ~uint64_t(0);
27
+
28
+ namespace sucvector_util {
29
+
30
+ inline uint32_t rank64(uint64_t v, size_t i)
31
+ {
32
+ return cybozu::popcnt<uint64_t>(v & cybozu::makeBitMask64(i));
33
+ }
34
+
35
+ template<class T>
36
+ T getBlockNum(T x, T block)
37
+ {
38
+ return (x + block - 1) / block;
39
+ }
40
+
41
+ inline uint32_t select64(uint64_t v, size_t r)
42
+ {
43
+ assert(r <= 64);
44
+ if (r > popcnt(v)) return 64;
45
+ uint32_t pos = 0;
46
+ uint32_t c = popcnt(uint32_t(v));
47
+ if (r > c) {
48
+ r -= c;
49
+ pos = 32;
50
+ v >>= 32;
51
+ }
52
+ c = popcnt<uint32_t>(uint16_t(v));
53
+ if (r > c) {
54
+ r -= c;
55
+ pos += 16;
56
+ v >>= 16;
57
+ }
58
+ c = popcnt<uint32_t>(uint8_t(v));
59
+ if (r > c) {
60
+ r -= c;
61
+ pos += 8;
62
+ v >>= 8;
63
+ }
64
+ if (r == 8 && uint8_t(v) == 0xff) return pos + 7;
65
+ assert(r <= 8);
66
+ c = cybozu::select8_util::select8(uint8_t(v), r);
67
+ return pos + c;
68
+ }
69
+
70
+ } // cybozu::sucvector_util
71
+
72
+ /*
73
+ extra memory
74
+ (32 + 8 * 4) / 256 = 1/4 bit per bit for rank
75
+ */
76
+ template<class type, bool withSelect = true>
77
+ class SucVectorT {
78
+ typedef type size_type;
79
+ static const bool support1TiB = sizeof(size_type) == 8;
80
+ static const int maxBitLen = support1TiB ? 32 + 8 : 32; // don't increase this value
81
+ static const uint64_t maxBitSize = uint64_t(1) << maxBitLen;
82
+ struct Block {
83
+ uint64_t org[4];
84
+ union {
85
+ uint64_t a64;
86
+ struct {
87
+ uint32_t a;
88
+ uint8_t b[4]; // b[0] is used for (b[0] << 32) | a
89
+ } ab;
90
+ };
91
+ void clear()
92
+ {
93
+ memset(this, 0, sizeof(Block));
94
+ }
95
+ };
96
+ uint64_t bitSize_;
97
+ uint64_t numTbl_[2];
98
+ bool freezed_;
99
+ std::vector<Block> blk_;
100
+ typedef std::vector<uint32_t> Uint32Vec;
101
+ static const uint64_t posUnit = 1024;
102
+ Uint32Vec selTbl_[2];
103
+
104
+ template<int b>
105
+ uint64_t rank_a(size_t i) const
106
+ {
107
+ assert(i < blk_.size());
108
+ uint64_t ret;
109
+ if (support1TiB) {
110
+ ret = blk_[i].a64 & makeBitMask64(maxBitLen);
111
+ } else {
112
+ ret = blk_[i].ab.a;
113
+ }
114
+ if (!b) ret = i * uint64_t(256) - ret;
115
+ return ret;
116
+ }
117
+ template<bool b>
118
+ size_t get_b(size_t L, size_t i) const
119
+ {
120
+ assert(L < blk_.size());
121
+ assert(0 < i && i < 4);
122
+ size_t r = blk_[L].ab.b[i];
123
+ if (!b) r = 64 * i - r;
124
+ return r;
125
+ }
126
+ // call after blk_, numTbl_ are initialized
127
+ void initSelTbl()
128
+ {
129
+ if (!withSelect) return;
130
+ initSelTblSub<false>(selTbl_[0]);
131
+ initSelTblSub<true>(selTbl_[1]);
132
+ }
133
+ template<bool b>
134
+ void initSelTblSub(Uint32Vec& tbl)
135
+ {
136
+ const int tablePos = b ? 1 : 0;
137
+ assert(numTbl_[tablePos] / posUnit < 0x7fffffff - 1);
138
+ const size_t size = size_t(sucvector_util::getBlockNum(numTbl_[tablePos], posUnit));
139
+ tbl.resize(size);
140
+ uint32_t pos = 0;
141
+ for (size_t i = 0; i < size; i++) {
142
+ uint64_t r = i * posUnit;
143
+ while (rank_a<b>(pos) < r) {
144
+ pos++;
145
+ if (pos == blk_.size()) break;
146
+ }
147
+ tbl[i] = pos;
148
+ }
149
+ }
150
+ void initBlock(const uint64_t *buf, size_t blkNum)
151
+ {
152
+ uint64_t num1 = 0;
153
+ size_t pos = 0;
154
+ for (size_t i = 0, n = blk_.size(); i < n; i++) {
155
+ Block& blk = blk_[i];
156
+ if (support1TiB) {
157
+ blk.a64 = num1 % maxBitSize;
158
+ } else {
159
+ if (num1 > 0xffffffff) throw cybozu::Exception("SucVectorT:too large num1") << num1;
160
+ blk.ab.a = (uint32_t)num1;
161
+ }
162
+ uint32_t subNum1 = 0;
163
+ for (size_t j = 0; j < 4; j++) {
164
+ uint64_t v;
165
+ if (buf) {
166
+ v = pos < blkNum ? buf[pos++] : 0;
167
+ blk.org[j] = v;
168
+ } else {
169
+ v = blk.org[j];
170
+ }
171
+ uint32_t c = cybozu::popcnt(v);
172
+ num1 += c;
173
+ if (j > 0) {
174
+ blk.ab.b[j] = (uint8_t)subNum1;
175
+ }
176
+ subNum1 += c;
177
+ }
178
+ }
179
+ numTbl_[0] = blkNum * 64 - num1;
180
+ numTbl_[1] = num1;
181
+ initSelTbl();
182
+ freezed_ = true;
183
+ }
184
+ public:
185
+ /*
186
+ data format(endian is depend on CPU:eg. little endian for x86/x64)
187
+ bitSize : 8
188
+ numTbl_[0] : 8
189
+ numTbl_[1] : 8
190
+ blkSize : 8
191
+ blk data : blkSize * sizeof(Block)
192
+ */
193
+ template<class OutputStream>
194
+ void save(OutputStream& os) const
195
+ {
196
+ cybozu::save(os, bitSize_);
197
+ cybozu::save(os, numTbl_[0]);
198
+ cybozu::save(os, numTbl_[1]);
199
+ cybozu::savePodVec(os, blk_);
200
+ if (withSelect) {
201
+ cybozu::savePodVec(os, selTbl_[0]);
202
+ cybozu::savePodVec(os, selTbl_[1]);
203
+ }
204
+ }
205
+ template<class InputStream>
206
+ void load(InputStream& is)
207
+ {
208
+ cybozu::load(bitSize_, is);
209
+ cybozu::load(numTbl_[0], is);
210
+ cybozu::load(numTbl_[1], is);
211
+ cybozu::loadPodVec(blk_, is);
212
+ if (withSelect) {
213
+ cybozu::loadPodVec(selTbl_[0], is);
214
+ cybozu::loadPodVec(selTbl_[1], is);
215
+ } else {
216
+ initSelTbl();
217
+ }
218
+ }
219
+ SucVectorT() : bitSize_(0), freezed_(false) { numTbl_[0] = numTbl_[1] = 0; }
220
+ SucVectorT(const uint64_t *buf, uint64_t bitSize)
221
+ {
222
+ init(buf, bitSize);
223
+ }
224
+ /*
225
+ initialize SucVector
226
+ @param buf [in] bit pattern buffer
227
+ @param bitSize [in] bitSize ; buf size = (bitSize + 63) / 64
228
+ */
229
+ void init(const uint64_t *buf, uint64_t bitSize)
230
+ {
231
+ const size_t blkNum = resize(bitSize, false);
232
+ initBlock(buf, blkNum);
233
+ }
234
+ /*
235
+ initialize SucVector after calling set without BitVector
236
+ 1. resize(bitSize)
237
+ 2. construct bit vector with set(pos)
238
+ 3. ready()
239
+ */
240
+ size_t resize(size_t bitSize, bool doClear = true)
241
+ {
242
+ if (bitSize > maxBitSize) throw cybozu::Exception("SucVectorT:resize:too large bitSize") << bitSize;
243
+ assert((bitSize + 63) / 64 <= ~size_t(0));
244
+ bitSize_ = bitSize;
245
+ const size_t blkNum = size_t((bitSize + 63) / 64);
246
+ const size_t tblNum = (blkNum + 3) / 4; // tblNum <= 2^32
247
+ blk_.resize(tblNum);
248
+ if (doClear) {
249
+ for (size_t i = 0; i < tblNum; i++) {
250
+ blk_[i].clear();
251
+ }
252
+ }
253
+ freezed_ = false;
254
+ return blkNum;
255
+ }
256
+ void set(size_t idx)
257
+ {
258
+ if (freezed_) throw cybozu::Exception("SucVector:set:freezed");
259
+ if (idx >= bitSize_) throw cybozu::Exception("SucVector:set:bad idx") << idx;
260
+ const size_t q = idx / 256;
261
+ const size_t r = idx % 256;
262
+
263
+ uint64_t& b = blk_[q].org[r / 64];
264
+ b |= uint64_t(1) << (r % 64);
265
+ }
266
+ void ready()
267
+ {
268
+ initBlock(0, blk_.size() * 4);
269
+ }
270
+ uint64_t rank1(uint64_t pos) const
271
+ {
272
+ if (pos >= bitSize_) return numTbl_[1];
273
+ assert(pos / 256 <= ~size_t(0));
274
+ size_t q = size_t(pos / 256);
275
+ size_t r = size_t((pos / 64) & 3);
276
+ assert(q < blk_.size());
277
+ const Block& blk = blk_[q];
278
+ uint64_t ret;
279
+ if (support1TiB) {
280
+ ret = blk.a64 % maxBitSize;
281
+ } else {
282
+ ret = blk.ab.a;
283
+ }
284
+ if (r > 0) {
285
+ ret += blk.ab.b[r]; // faster on sandy-bridge
286
+ // ret += uint8_t(blk.a64 >> (32 + r * 8));
287
+ }
288
+ ret += cybozu::popcnt<uint64_t>(blk.org[r] & cybozu::makeBitMask64(pos & 63));
289
+ return ret;
290
+ }
291
+ uint64_t size() const { return bitSize_; }
292
+ uint64_t size(bool b) const { return numTbl_[b ? 1 : 0]; }
293
+ uint64_t rank0(uint64_t pos) const
294
+ {
295
+ return pos - rank1(pos);
296
+ }
297
+ uint64_t rank(bool b, uint64_t pos) const
298
+ {
299
+ if (b) return rank1(pos);
300
+ return rank0(pos);
301
+ }
302
+ bool get(uint64_t pos) const
303
+ {
304
+ if (pos >= bitSize_) throw cybozu::Exception("SucVector:get") << pos << bitSize_;
305
+ size_t q = size_t(pos / 256);
306
+ size_t r = size_t((pos / 64) & 3);
307
+ assert(q < blk_.size());
308
+ const Block& blk = blk_[q];
309
+ return (blk.org[r] & (1ULL << (pos & 63))) != 0;
310
+ }
311
+ uint64_t select0(uint64_t rank) const { return selectSub<false>(rank); }
312
+ uint64_t select1(uint64_t rank) const { return selectSub<true>(rank); }
313
+ uint64_t select(bool b, uint64_t rank) const
314
+ {
315
+ if (b) return select1(rank);
316
+ return select0(rank);
317
+ }
318
+
319
+ /*
320
+ 0123456789
321
+ 0100101101
322
+ ^ ^ ^^
323
+ 0 1 23
324
+ select(v, r) = min { i - 1 | rank(v, i) = r + 1 }
325
+ select(3) = 7
326
+ */
327
+ template<bool b>
328
+ uint64_t selectSub(uint64_t rank) const
329
+ {
330
+ if (!withSelect) throw cybozu::Exception("SucVector:selectSub is not supported");
331
+ const int tablePos = b ? 1 : 0;
332
+ if (rank >= numTbl_[tablePos]) return NotFound;
333
+ const Uint32Vec& tbl = selTbl_[tablePos];
334
+ assert(rank / posUnit < tbl.size());
335
+ const size_t pos = size_t(rank / posUnit);
336
+ size_t L = tbl[pos];
337
+ size_t R = pos >= tbl.size() - 1 ? blk_.size() : tbl[pos + 1];
338
+ rank++;
339
+ while (L < R) {
340
+ size_t M = (L + R) / 2; // (R - L) / 2 + L;
341
+ if (rank_a<b>(M) < rank) {
342
+ L = M + 1;
343
+ } else {
344
+ R = M;
345
+ }
346
+ }
347
+ if (L > 0) L--;
348
+ rank -= rank_a<b>(L);
349
+
350
+ size_t i = 0;
351
+ while (i < 3) {
352
+ size_t r = get_b<b>(L, i + 1);
353
+ if (r >= rank) {
354
+ break;
355
+ }
356
+ i++;
357
+ }
358
+ if (i > 0) {
359
+ size_t r = get_b<b>(L, i);
360
+ rank -= r;
361
+ }
362
+ uint64_t v = blk_[L].org[i];
363
+ if (!b) v = ~v;
364
+ assert(rank <= 64);
365
+ uint64_t ret = cybozu::sucvector_util::select64(v, size_t(rank));
366
+ ret += L * 256 + i * 64;
367
+ return ret;
368
+ }
369
+ };
370
+
371
+ typedef cybozu::SucVectorT<uint32_t> SucVectorLt4G;
372
+ typedef cybozu::SucVectorT<uint64_t> SucVector;
373
+
374
+ } // cybozu
375
+
376
+ #ifdef _WIN32
377
+ #pragma warning(pop)
378
+ #endif