ooxml_crypt 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +58 -0
  5. data/Rakefile +12 -0
  6. data/bin/console +15 -0
  7. data/bin/setup +8 -0
  8. data/ext/ooxml_crypt/extconf.rb +18 -0
  9. data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
  10. data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
  11. data/lib/ooxml_crypt/version.rb +5 -0
  12. data/lib/ooxml_crypt.rb +75 -0
  13. data/vendor/cybozulib/.github/workflows/main.yml +12 -0
  14. data/vendor/cybozulib/.gitignore +5 -0
  15. data/vendor/cybozulib/CMakeLists.txt +6 -0
  16. data/vendor/cybozulib/COPYRIGHT +27 -0
  17. data/vendor/cybozulib/Makefile +26 -0
  18. data/vendor/cybozulib/bin/libeay32.dll +0 -0
  19. data/vendor/cybozulib/bin/libmecab.dll +0 -0
  20. data/vendor/cybozulib/bin/ssleay32.dll +0 -0
  21. data/vendor/cybozulib/common.mk +116 -0
  22. data/vendor/cybozulib/common.props +25 -0
  23. data/vendor/cybozulib/cybozulib.sln +286 -0
  24. data/vendor/cybozulib/debug.props +14 -0
  25. data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
  26. data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
  27. data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
  28. data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
  29. data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
  30. data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
  31. data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
  32. data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
  33. data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
  34. data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
  35. data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
  36. data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
  37. data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
  38. data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
  39. data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
  40. data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
  41. data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
  42. data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
  43. data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
  44. data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
  45. data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
  46. data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
  47. data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
  48. data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
  49. data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
  50. data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
  51. data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
  52. data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
  53. data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
  54. data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
  55. data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
  56. data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
  57. data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
  58. data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
  59. data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
  60. data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
  61. data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
  62. data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
  63. data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
  64. data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
  65. data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
  66. data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
  67. data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
  68. data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
  69. data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
  70. data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
  71. data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
  72. data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
  73. data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
  74. data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
  75. data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
  76. data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
  77. data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
  78. data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
  79. data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
  80. data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
  81. data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
  82. data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
  83. data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
  84. data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
  85. data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
  86. data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
  87. data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
  88. data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
  89. data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
  90. data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
  91. data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
  92. data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
  93. data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
  94. data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
  95. data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
  96. data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
  97. data/vendor/cybozulib/include/sais.hxx +364 -0
  98. data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
  99. data/vendor/cybozulib/mk.bat +37 -0
  100. data/vendor/cybozulib/readme.md +29 -0
  101. data/vendor/cybozulib/release.props +12 -0
  102. data/vendor/cybozulib/sample/Makefile +30 -0
  103. data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
  104. data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
  105. data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
  106. data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
  107. data/vendor/cybozulib/sample/data/svd/test1 +4 -0
  108. data/vendor/cybozulib/sample/data/svd/test2 +4 -0
  109. data/vendor/cybozulib/sample/desymbol.cpp +127 -0
  110. data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
  111. data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
  112. data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
  113. data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
  114. data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
  115. data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
  116. data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
  117. data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
  118. data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
  119. data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
  120. data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
  121. data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
  122. data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
  123. data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
  124. data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
  125. data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
  126. data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
  127. data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
  128. data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
  129. data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
  130. data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
  131. data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
  132. data/vendor/cybozulib/src/Makefile +8 -0
  133. data/vendor/cybozulib/src/base/Makefile +19 -0
  134. data/vendor/cybozulib/test/Makefile +12 -0
  135. data/vendor/cybozulib/test/base/Makefile +37 -0
  136. data/vendor/cybozulib/test/base/array_test.cpp +173 -0
  137. data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
  138. data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
  139. data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
  140. data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
  141. data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
  142. data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
  143. data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
  144. data/vendor/cybozulib/test/base/config_test.cpp +236 -0
  145. data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
  146. data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
  147. data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
  148. data/vendor/cybozulib/test/base/data/a.xml +26 -0
  149. data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
  150. data/vendor/cybozulib/test/base/env_test.cpp +22 -0
  151. data/vendor/cybozulib/test/base/event_test.cpp +41 -0
  152. data/vendor/cybozulib/test/base/file_test.cpp +233 -0
  153. data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
  154. data/vendor/cybozulib/test/base/format_test.cpp +12 -0
  155. data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
  156. data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
  157. data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
  158. data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
  159. data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
  160. data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
  161. data/vendor/cybozulib/test/base/option_test.cpp +487 -0
  162. data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
  163. data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
  164. data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
  165. data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
  166. data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
  167. data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
  168. data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
  169. data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
  170. data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
  171. data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
  172. data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
  173. data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
  174. data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
  175. data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
  176. data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
  177. data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
  178. data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
  179. data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
  180. data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
  181. data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
  182. data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
  183. data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
  184. data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
  185. data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
  186. data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
  187. data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
  188. data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
  189. data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
  190. data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
  191. data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
  192. data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
  193. data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
  194. data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
  195. data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
  196. data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
  197. data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
  198. data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
  199. data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
  200. data/vendor/cybozulib/test/base/time_test.cpp +164 -0
  201. data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
  202. data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
  203. data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
  204. data/vendor/cybozulib/test/nlp/Makefile +27 -0
  205. data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
  206. data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
  207. data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
  208. data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
  209. data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
  210. data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
  211. data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
  212. data/vendor/cybozulib/tool/create_vcproj.py +186 -0
  213. data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
  214. data/vendor/msoffice/COPYRIGHT +27 -0
  215. data/vendor/msoffice/Makefile +29 -0
  216. data/vendor/msoffice/bin/64/msoc.dll +0 -0
  217. data/vendor/msoffice/bin/64/msocsample.exe +0 -0
  218. data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
  219. data/vendor/msoffice/bin/msoc.dll +0 -0
  220. data/vendor/msoffice/bin/msocsample.exe +0 -0
  221. data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
  222. data/vendor/msoffice/common.mk +71 -0
  223. data/vendor/msoffice/common.props +26 -0
  224. data/vendor/msoffice/debug.props +14 -0
  225. data/vendor/msoffice/include/attack.hpp +211 -0
  226. data/vendor/msoffice/include/cfb.hpp +777 -0
  227. data/vendor/msoffice/include/crypto_util.hpp +450 -0
  228. data/vendor/msoffice/include/custom_sha1.hpp +342 -0
  229. data/vendor/msoffice/include/decode.hpp +240 -0
  230. data/vendor/msoffice/include/encode.hpp +221 -0
  231. data/vendor/msoffice/include/make_dataspace.hpp +316 -0
  232. data/vendor/msoffice/include/msoc.h +129 -0
  233. data/vendor/msoffice/include/resource.hpp +7 -0
  234. data/vendor/msoffice/include/standard_encryption.hpp +145 -0
  235. data/vendor/msoffice/include/uint32vec.hpp +179 -0
  236. data/vendor/msoffice/include/util.hpp +212 -0
  237. data/vendor/msoffice/lib/.emptydir +0 -0
  238. data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
  239. data/vendor/msoffice/mk.bat +1 -0
  240. data/vendor/msoffice/mkdll.bat +3 -0
  241. data/vendor/msoffice/msoc.def +13 -0
  242. data/vendor/msoffice/msocsample.py +178 -0
  243. data/vendor/msoffice/msoffice12.sln +31 -0
  244. data/vendor/msoffice/readme.md +110 -0
  245. data/vendor/msoffice/release.props +28 -0
  246. data/vendor/msoffice/src/Makefile +19 -0
  247. data/vendor/msoffice/src/attack.cpp +124 -0
  248. data/vendor/msoffice/src/cfb_test.cpp +77 -0
  249. data/vendor/msoffice/src/minisample.c +54 -0
  250. data/vendor/msoffice/src/msocdll.cpp +276 -0
  251. data/vendor/msoffice/src/msocsample.c +136 -0
  252. data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
  253. data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
  254. data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
  255. data/vendor/msoffice/src/sha1.cpp +234 -0
  256. data/vendor/msoffice/test/Makefile +20 -0
  257. data/vendor/msoffice/test/cfb_test.cpp +74 -0
  258. data/vendor/msoffice/test/hash_test.cpp +59 -0
  259. data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
  260. data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
  261. data/vendor/msoffice/test/sampl.bat +8 -0
  262. data/vendor/msoffice/test_all.py +46 -0
  263. data/vendor/update +4 -0
  264. metadata +351 -0
@@ -0,0 +1,378 @@
1
+ #pragma once
2
+ /**
3
+ @file
4
+ @brief succinct vector
5
+ @author MITSUNARI Shigeo(@herumi)
6
+ @license modified new BSD license
7
+ http://opensource.org/licenses/BSD-3-Clause
8
+
9
+ @note use -msse4.2 option for popcnt
10
+ */
11
+ #include <assert.h>
12
+ #include <vector>
13
+ #include <cybozu/exception.hpp>
14
+ #include <cybozu/bit_operation.hpp>
15
+ #include <cybozu/select8.hpp>
16
+ #include <cybozu/serializer.hpp>
17
+ #include <iosfwd>
18
+
19
+ #ifdef _MSC_VER
20
+ #pragma warning(push)
21
+ #pragma warning(disable : 4127)
22
+ #endif
23
+
24
+ namespace cybozu {
25
+
26
+ const uint64_t NotFound = ~uint64_t(0);
27
+
28
+ namespace sucvector_util {
29
+
30
+ inline uint32_t rank64(uint64_t v, size_t i)
31
+ {
32
+ return cybozu::popcnt<uint64_t>(v & cybozu::makeBitMask64(i));
33
+ }
34
+
35
+ template<class T>
36
+ T getBlockNum(T x, T block)
37
+ {
38
+ return (x + block - 1) / block;
39
+ }
40
+
41
+ inline uint32_t select64(uint64_t v, size_t r)
42
+ {
43
+ assert(r <= 64);
44
+ if (r > popcnt(v)) return 64;
45
+ uint32_t pos = 0;
46
+ uint32_t c = popcnt(uint32_t(v));
47
+ if (r > c) {
48
+ r -= c;
49
+ pos = 32;
50
+ v >>= 32;
51
+ }
52
+ c = popcnt<uint32_t>(uint16_t(v));
53
+ if (r > c) {
54
+ r -= c;
55
+ pos += 16;
56
+ v >>= 16;
57
+ }
58
+ c = popcnt<uint32_t>(uint8_t(v));
59
+ if (r > c) {
60
+ r -= c;
61
+ pos += 8;
62
+ v >>= 8;
63
+ }
64
+ if (r == 8 && uint8_t(v) == 0xff) return pos + 7;
65
+ assert(r <= 8);
66
+ c = cybozu::select8_util::select8(uint8_t(v), r);
67
+ return pos + c;
68
+ }
69
+
70
+ } // cybozu::sucvector_util
71
+
72
+ /*
73
+ extra memory
74
+ (32 + 8 * 4) / 256 = 1/4 bit per bit for rank
75
+ */
76
+ template<class type, bool withSelect = true>
77
+ class SucVectorT {
78
+ typedef type size_type;
79
+ static const bool support1TiB = sizeof(size_type) == 8;
80
+ static const int maxBitLen = support1TiB ? 32 + 8 : 32; // don't increase this value
81
+ static const uint64_t maxBitSize = uint64_t(1) << maxBitLen;
82
+ struct Block {
83
+ uint64_t org[4];
84
+ union {
85
+ uint64_t a64;
86
+ struct {
87
+ uint32_t a;
88
+ uint8_t b[4]; // b[0] is used for (b[0] << 32) | a
89
+ } ab;
90
+ };
91
+ void clear()
92
+ {
93
+ memset(this, 0, sizeof(Block));
94
+ }
95
+ };
96
+ uint64_t bitSize_;
97
+ uint64_t numTbl_[2];
98
+ bool freezed_;
99
+ std::vector<Block> blk_;
100
+ typedef std::vector<uint32_t> Uint32Vec;
101
+ static const uint64_t posUnit = 1024;
102
+ Uint32Vec selTbl_[2];
103
+
104
+ template<int b>
105
+ uint64_t rank_a(size_t i) const
106
+ {
107
+ assert(i < blk_.size());
108
+ uint64_t ret;
109
+ if (support1TiB) {
110
+ ret = blk_[i].a64 & makeBitMask64(maxBitLen);
111
+ } else {
112
+ ret = blk_[i].ab.a;
113
+ }
114
+ if (!b) ret = i * uint64_t(256) - ret;
115
+ return ret;
116
+ }
117
+ template<bool b>
118
+ size_t get_b(size_t L, size_t i) const
119
+ {
120
+ assert(L < blk_.size());
121
+ assert(0 < i && i < 4);
122
+ size_t r = blk_[L].ab.b[i];
123
+ if (!b) r = 64 * i - r;
124
+ return r;
125
+ }
126
+ // call after blk_, numTbl_ are initialized
127
+ void initSelTbl()
128
+ {
129
+ if (!withSelect) return;
130
+ initSelTblSub<false>(selTbl_[0]);
131
+ initSelTblSub<true>(selTbl_[1]);
132
+ }
133
+ template<bool b>
134
+ void initSelTblSub(Uint32Vec& tbl)
135
+ {
136
+ const int tablePos = b ? 1 : 0;
137
+ assert(numTbl_[tablePos] / posUnit < 0x7fffffff - 1);
138
+ const size_t size = size_t(sucvector_util::getBlockNum(numTbl_[tablePos], posUnit));
139
+ tbl.resize(size);
140
+ uint32_t pos = 0;
141
+ for (size_t i = 0; i < size; i++) {
142
+ uint64_t r = i * posUnit;
143
+ while (rank_a<b>(pos) < r) {
144
+ pos++;
145
+ if (pos == blk_.size()) break;
146
+ }
147
+ tbl[i] = pos;
148
+ }
149
+ }
150
+ void initBlock(const uint64_t *buf, size_t blkNum)
151
+ {
152
+ uint64_t num1 = 0;
153
+ size_t pos = 0;
154
+ for (size_t i = 0, n = blk_.size(); i < n; i++) {
155
+ Block& blk = blk_[i];
156
+ if (support1TiB) {
157
+ blk.a64 = num1 % maxBitSize;
158
+ } else {
159
+ if (num1 > 0xffffffff) throw cybozu::Exception("SucVectorT:too large num1") << num1;
160
+ blk.ab.a = (uint32_t)num1;
161
+ }
162
+ uint32_t subNum1 = 0;
163
+ for (size_t j = 0; j < 4; j++) {
164
+ uint64_t v;
165
+ if (buf) {
166
+ v = pos < blkNum ? buf[pos++] : 0;
167
+ blk.org[j] = v;
168
+ } else {
169
+ v = blk.org[j];
170
+ }
171
+ uint32_t c = cybozu::popcnt(v);
172
+ num1 += c;
173
+ if (j > 0) {
174
+ blk.ab.b[j] = (uint8_t)subNum1;
175
+ }
176
+ subNum1 += c;
177
+ }
178
+ }
179
+ numTbl_[0] = blkNum * 64 - num1;
180
+ numTbl_[1] = num1;
181
+ initSelTbl();
182
+ freezed_ = true;
183
+ }
184
+ public:
185
+ /*
186
+ data format(endian is depend on CPU:eg. little endian for x86/x64)
187
+ bitSize : 8
188
+ numTbl_[0] : 8
189
+ numTbl_[1] : 8
190
+ blkSize : 8
191
+ blk data : blkSize * sizeof(Block)
192
+ */
193
+ template<class OutputStream>
194
+ void save(OutputStream& os) const
195
+ {
196
+ cybozu::save(os, bitSize_);
197
+ cybozu::save(os, numTbl_[0]);
198
+ cybozu::save(os, numTbl_[1]);
199
+ cybozu::savePodVec(os, blk_);
200
+ if (withSelect) {
201
+ cybozu::savePodVec(os, selTbl_[0]);
202
+ cybozu::savePodVec(os, selTbl_[1]);
203
+ }
204
+ }
205
+ template<class InputStream>
206
+ void load(InputStream& is)
207
+ {
208
+ cybozu::load(bitSize_, is);
209
+ cybozu::load(numTbl_[0], is);
210
+ cybozu::load(numTbl_[1], is);
211
+ cybozu::loadPodVec(blk_, is);
212
+ if (withSelect) {
213
+ cybozu::loadPodVec(selTbl_[0], is);
214
+ cybozu::loadPodVec(selTbl_[1], is);
215
+ } else {
216
+ initSelTbl();
217
+ }
218
+ }
219
+ SucVectorT() : bitSize_(0), freezed_(false) { numTbl_[0] = numTbl_[1] = 0; }
220
+ SucVectorT(const uint64_t *buf, uint64_t bitSize)
221
+ {
222
+ init(buf, bitSize);
223
+ }
224
+ /*
225
+ initialize SucVector
226
+ @param buf [in] bit pattern buffer
227
+ @param bitSize [in] bitSize ; buf size = (bitSize + 63) / 64
228
+ */
229
+ void init(const uint64_t *buf, uint64_t bitSize)
230
+ {
231
+ const size_t blkNum = resize(bitSize, false);
232
+ initBlock(buf, blkNum);
233
+ }
234
+ /*
235
+ initialize SucVector after calling set without BitVector
236
+ 1. resize(bitSize)
237
+ 2. construct bit vector with set(pos)
238
+ 3. ready()
239
+ */
240
+ size_t resize(size_t bitSize, bool doClear = true)
241
+ {
242
+ if (bitSize > maxBitSize) throw cybozu::Exception("SucVectorT:resize:too large bitSize") << bitSize;
243
+ assert((bitSize + 63) / 64 <= ~size_t(0));
244
+ bitSize_ = bitSize;
245
+ const size_t blkNum = size_t((bitSize + 63) / 64);
246
+ const size_t tblNum = (blkNum + 3) / 4; // tblNum <= 2^32
247
+ blk_.resize(tblNum);
248
+ if (doClear) {
249
+ for (size_t i = 0; i < tblNum; i++) {
250
+ blk_[i].clear();
251
+ }
252
+ }
253
+ freezed_ = false;
254
+ return blkNum;
255
+ }
256
+ void set(size_t idx)
257
+ {
258
+ if (freezed_) throw cybozu::Exception("SucVector:set:freezed");
259
+ if (idx >= bitSize_) throw cybozu::Exception("SucVector:set:bad idx") << idx;
260
+ const size_t q = idx / 256;
261
+ const size_t r = idx % 256;
262
+
263
+ uint64_t& b = blk_[q].org[r / 64];
264
+ b |= uint64_t(1) << (r % 64);
265
+ }
266
+ void ready()
267
+ {
268
+ initBlock(0, blk_.size() * 4);
269
+ }
270
+ uint64_t rank1(uint64_t pos) const
271
+ {
272
+ if (pos >= bitSize_) return numTbl_[1];
273
+ assert(pos / 256 <= ~size_t(0));
274
+ size_t q = size_t(pos / 256);
275
+ size_t r = size_t((pos / 64) & 3);
276
+ assert(q < blk_.size());
277
+ const Block& blk = blk_[q];
278
+ uint64_t ret;
279
+ if (support1TiB) {
280
+ ret = blk.a64 % maxBitSize;
281
+ } else {
282
+ ret = blk.ab.a;
283
+ }
284
+ if (r > 0) {
285
+ ret += blk.ab.b[r]; // faster on sandy-bridge
286
+ // ret += uint8_t(blk.a64 >> (32 + r * 8));
287
+ }
288
+ ret += cybozu::popcnt<uint64_t>(blk.org[r] & cybozu::makeBitMask64(pos & 63));
289
+ return ret;
290
+ }
291
+ uint64_t size() const { return bitSize_; }
292
+ uint64_t size(bool b) const { return numTbl_[b ? 1 : 0]; }
293
+ uint64_t rank0(uint64_t pos) const
294
+ {
295
+ return pos - rank1(pos);
296
+ }
297
+ uint64_t rank(bool b, uint64_t pos) const
298
+ {
299
+ if (b) return rank1(pos);
300
+ return rank0(pos);
301
+ }
302
+ bool get(uint64_t pos) const
303
+ {
304
+ if (pos >= bitSize_) throw cybozu::Exception("SucVector:get") << pos << bitSize_;
305
+ size_t q = size_t(pos / 256);
306
+ size_t r = size_t((pos / 64) & 3);
307
+ assert(q < blk_.size());
308
+ const Block& blk = blk_[q];
309
+ return (blk.org[r] & (1ULL << (pos & 63))) != 0;
310
+ }
311
+ uint64_t select0(uint64_t rank) const { return selectSub<false>(rank); }
312
+ uint64_t select1(uint64_t rank) const { return selectSub<true>(rank); }
313
+ uint64_t select(bool b, uint64_t rank) const
314
+ {
315
+ if (b) return select1(rank);
316
+ return select0(rank);
317
+ }
318
+
319
+ /*
320
+ 0123456789
321
+ 0100101101
322
+ ^ ^ ^^
323
+ 0 1 23
324
+ select(v, r) = min { i - 1 | rank(v, i) = r + 1 }
325
+ select(3) = 7
326
+ */
327
+ template<bool b>
328
+ uint64_t selectSub(uint64_t rank) const
329
+ {
330
+ if (!withSelect) throw cybozu::Exception("SucVector:selectSub is not supported");
331
+ const int tablePos = b ? 1 : 0;
332
+ if (rank >= numTbl_[tablePos]) return NotFound;
333
+ const Uint32Vec& tbl = selTbl_[tablePos];
334
+ assert(rank / posUnit < tbl.size());
335
+ const size_t pos = size_t(rank / posUnit);
336
+ size_t L = tbl[pos];
337
+ size_t R = pos >= tbl.size() - 1 ? blk_.size() : tbl[pos + 1];
338
+ rank++;
339
+ while (L < R) {
340
+ size_t M = (L + R) / 2; // (R - L) / 2 + L;
341
+ if (rank_a<b>(M) < rank) {
342
+ L = M + 1;
343
+ } else {
344
+ R = M;
345
+ }
346
+ }
347
+ if (L > 0) L--;
348
+ rank -= rank_a<b>(L);
349
+
350
+ size_t i = 0;
351
+ while (i < 3) {
352
+ size_t r = get_b<b>(L, i + 1);
353
+ if (r >= rank) {
354
+ break;
355
+ }
356
+ i++;
357
+ }
358
+ if (i > 0) {
359
+ size_t r = get_b<b>(L, i);
360
+ rank -= r;
361
+ }
362
+ uint64_t v = blk_[L].org[i];
363
+ if (!b) v = ~v;
364
+ assert(rank <= 64);
365
+ uint64_t ret = cybozu::sucvector_util::select64(v, size_t(rank));
366
+ ret += L * 256 + i * 64;
367
+ return ret;
368
+ }
369
+ };
370
+
371
+ typedef cybozu::SucVectorT<uint32_t> SucVectorLt4G;
372
+ typedef cybozu::SucVectorT<uint64_t> SucVector;
373
+
374
+ } // cybozu
375
+
376
+ #ifdef _WIN32
377
+ #pragma warning(pop)
378
+ #endif