ooxml_crypt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +58 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ext/ooxml_crypt/extconf.rb +18 -0
- data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
- data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
- data/lib/ooxml_crypt/version.rb +5 -0
- data/lib/ooxml_crypt.rb +75 -0
- data/vendor/cybozulib/.github/workflows/main.yml +12 -0
- data/vendor/cybozulib/.gitignore +5 -0
- data/vendor/cybozulib/CMakeLists.txt +6 -0
- data/vendor/cybozulib/COPYRIGHT +27 -0
- data/vendor/cybozulib/Makefile +26 -0
- data/vendor/cybozulib/bin/libeay32.dll +0 -0
- data/vendor/cybozulib/bin/libmecab.dll +0 -0
- data/vendor/cybozulib/bin/ssleay32.dll +0 -0
- data/vendor/cybozulib/common.mk +116 -0
- data/vendor/cybozulib/common.props +25 -0
- data/vendor/cybozulib/cybozulib.sln +286 -0
- data/vendor/cybozulib/debug.props +14 -0
- data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
- data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
- data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
- data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
- data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
- data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
- data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
- data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
- data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
- data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
- data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
- data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
- data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
- data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
- data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
- data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
- data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
- data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
- data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
- data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
- data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
- data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
- data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
- data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
- data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
- data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
- data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
- data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
- data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
- data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
- data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
- data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
- data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
- data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
- data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
- data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
- data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
- data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
- data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
- data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
- data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
- data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
- data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
- data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
- data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
- data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
- data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
- data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
- data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
- data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
- data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
- data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
- data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
- data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
- data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
- data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
- data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
- data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
- data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
- data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
- data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
- data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
- data/vendor/cybozulib/include/sais.hxx +364 -0
- data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
- data/vendor/cybozulib/mk.bat +37 -0
- data/vendor/cybozulib/readme.md +29 -0
- data/vendor/cybozulib/release.props +12 -0
- data/vendor/cybozulib/sample/Makefile +30 -0
- data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
- data/vendor/cybozulib/sample/data/svd/test1 +4 -0
- data/vendor/cybozulib/sample/data/svd/test2 +4 -0
- data/vendor/cybozulib/sample/desymbol.cpp +127 -0
- data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
- data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
- data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
- data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
- data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
- data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
- data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
- data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
- data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
- data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
- data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
- data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
- data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
- data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
- data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
- data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
- data/vendor/cybozulib/src/Makefile +8 -0
- data/vendor/cybozulib/src/base/Makefile +19 -0
- data/vendor/cybozulib/test/Makefile +12 -0
- data/vendor/cybozulib/test/base/Makefile +37 -0
- data/vendor/cybozulib/test/base/array_test.cpp +173 -0
- data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
- data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
- data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
- data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
- data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
- data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
- data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
- data/vendor/cybozulib/test/base/config_test.cpp +236 -0
- data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
- data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
- data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
- data/vendor/cybozulib/test/base/data/a.xml +26 -0
- data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
- data/vendor/cybozulib/test/base/env_test.cpp +22 -0
- data/vendor/cybozulib/test/base/event_test.cpp +41 -0
- data/vendor/cybozulib/test/base/file_test.cpp +233 -0
- data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
- data/vendor/cybozulib/test/base/format_test.cpp +12 -0
- data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
- data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
- data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
- data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
- data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
- data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
- data/vendor/cybozulib/test/base/option_test.cpp +487 -0
- data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
- data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
- data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
- data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
- data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
- data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
- data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
- data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
- data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
- data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
- data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
- data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
- data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
- data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
- data/vendor/cybozulib/test/base/time_test.cpp +164 -0
- data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
- data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
- data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
- data/vendor/cybozulib/test/nlp/Makefile +27 -0
- data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
- data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
- data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
- data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
- data/vendor/cybozulib/tool/create_vcproj.py +186 -0
- data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
- data/vendor/msoffice/COPYRIGHT +27 -0
- data/vendor/msoffice/Makefile +29 -0
- data/vendor/msoffice/bin/64/msoc.dll +0 -0
- data/vendor/msoffice/bin/64/msocsample.exe +0 -0
- data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/bin/msoc.dll +0 -0
- data/vendor/msoffice/bin/msocsample.exe +0 -0
- data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/common.mk +71 -0
- data/vendor/msoffice/common.props +26 -0
- data/vendor/msoffice/debug.props +14 -0
- data/vendor/msoffice/include/attack.hpp +211 -0
- data/vendor/msoffice/include/cfb.hpp +777 -0
- data/vendor/msoffice/include/crypto_util.hpp +450 -0
- data/vendor/msoffice/include/custom_sha1.hpp +342 -0
- data/vendor/msoffice/include/decode.hpp +240 -0
- data/vendor/msoffice/include/encode.hpp +221 -0
- data/vendor/msoffice/include/make_dataspace.hpp +316 -0
- data/vendor/msoffice/include/msoc.h +129 -0
- data/vendor/msoffice/include/resource.hpp +7 -0
- data/vendor/msoffice/include/standard_encryption.hpp +145 -0
- data/vendor/msoffice/include/uint32vec.hpp +179 -0
- data/vendor/msoffice/include/util.hpp +212 -0
- data/vendor/msoffice/lib/.emptydir +0 -0
- data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
- data/vendor/msoffice/mk.bat +1 -0
- data/vendor/msoffice/mkdll.bat +3 -0
- data/vendor/msoffice/msoc.def +13 -0
- data/vendor/msoffice/msocsample.py +178 -0
- data/vendor/msoffice/msoffice12.sln +31 -0
- data/vendor/msoffice/readme.md +110 -0
- data/vendor/msoffice/release.props +28 -0
- data/vendor/msoffice/src/Makefile +19 -0
- data/vendor/msoffice/src/attack.cpp +124 -0
- data/vendor/msoffice/src/cfb_test.cpp +77 -0
- data/vendor/msoffice/src/minisample.c +54 -0
- data/vendor/msoffice/src/msocdll.cpp +276 -0
- data/vendor/msoffice/src/msocsample.c +136 -0
- data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
- data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
- data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
- data/vendor/msoffice/src/sha1.cpp +234 -0
- data/vendor/msoffice/test/Makefile +20 -0
- data/vendor/msoffice/test/cfb_test.cpp +74 -0
- data/vendor/msoffice/test/hash_test.cpp +59 -0
- data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
- data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
- data/vendor/msoffice/test/sampl.bat +8 -0
- data/vendor/msoffice/test_all.py +46 -0
- data/vendor/update +4 -0
- metadata +351 -0
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
/**
|
|
3
|
+
@file
|
|
4
|
+
@brief wrapper of __m128
|
|
5
|
+
@author MITSUNARI Shigeo(@herumi)
|
|
6
|
+
@license modified new BSD license
|
|
7
|
+
http://opensource.org/licenses/BSD-3-Clause
|
|
8
|
+
*/
|
|
9
|
+
#include <cybozu/inttype.hpp>
|
|
10
|
+
#include <stdio.h>
|
|
11
|
+
#include <assert.h>
|
|
12
|
+
#ifdef _WIN32
|
|
13
|
+
#include <intrin.h>
|
|
14
|
+
#else
|
|
15
|
+
#ifdef __linux__
|
|
16
|
+
#include <x86intrin.h>
|
|
17
|
+
#else
|
|
18
|
+
#include <emmintrin.h>
|
|
19
|
+
#endif
|
|
20
|
+
#endif
|
|
21
|
+
|
|
22
|
+
#ifndef MIE_PACK // for shufps
|
|
23
|
+
#define MIE_PACK(x, y, z, w) ((x) * 64 + (y) * 16 + (z) * 4 + (w))
|
|
24
|
+
#endif
|
|
25
|
+
|
|
26
|
+
namespace cybozu {
|
|
27
|
+
|
|
28
|
+
struct V128 {
|
|
29
|
+
__m128i x_;
|
|
30
|
+
V128()
|
|
31
|
+
{
|
|
32
|
+
}
|
|
33
|
+
V128(const uint32_t *p)
|
|
34
|
+
: x_(_mm_load_si128((const __m128i*)p))
|
|
35
|
+
{
|
|
36
|
+
}
|
|
37
|
+
V128(__m128i x)
|
|
38
|
+
: x_(x)
|
|
39
|
+
{
|
|
40
|
+
}
|
|
41
|
+
V128(__m128 x)
|
|
42
|
+
: x_(_mm_castps_si128(x))
|
|
43
|
+
{
|
|
44
|
+
}
|
|
45
|
+
__m128 to_ps() const { return _mm_castsi128_ps(x_); }
|
|
46
|
+
// m = [x3:x2:x1:x0]
|
|
47
|
+
V128(uint32_t x3, uint32_t x2, uint32_t x1, uint32_t x0)
|
|
48
|
+
: x_(_mm_set_epi32(x3, x2, x1, x0))
|
|
49
|
+
{
|
|
50
|
+
}
|
|
51
|
+
explicit V128(uint32_t x)
|
|
52
|
+
: x_(_mm_cvtsi32_si128(x))
|
|
53
|
+
{
|
|
54
|
+
}
|
|
55
|
+
#if defined(_M_X64) || defined(__x86_64__)
|
|
56
|
+
explicit V128(uint64_t x)
|
|
57
|
+
: x_(_mm_cvtsi64_si128(x))
|
|
58
|
+
{
|
|
59
|
+
}
|
|
60
|
+
#endif
|
|
61
|
+
V128(const V128& rhs)
|
|
62
|
+
: x_(rhs.x_)
|
|
63
|
+
{
|
|
64
|
+
}
|
|
65
|
+
void clear()
|
|
66
|
+
{
|
|
67
|
+
*this = _mm_setzero_si128();
|
|
68
|
+
}
|
|
69
|
+
void set(uint32_t x)
|
|
70
|
+
{
|
|
71
|
+
x_ = _mm_set1_epi32(x);
|
|
72
|
+
}
|
|
73
|
+
// m = [x3:x2:x1:x0]
|
|
74
|
+
void set(uint32_t x3, uint32_t x2, uint32_t x1, uint32_t x0)
|
|
75
|
+
{
|
|
76
|
+
x_ = _mm_set_epi32(x3, x2, x1, x0);
|
|
77
|
+
}
|
|
78
|
+
// aligned
|
|
79
|
+
void store(uint32_t *p) const
|
|
80
|
+
{
|
|
81
|
+
_mm_store_si128((__m128i*)p, x_);
|
|
82
|
+
}
|
|
83
|
+
// unaligned
|
|
84
|
+
void store_u(uint32_t *p) const
|
|
85
|
+
{
|
|
86
|
+
_mm_storeu_si128((__m128i*)p, x_);
|
|
87
|
+
}
|
|
88
|
+
// aligned
|
|
89
|
+
void load(const uint32_t *p)
|
|
90
|
+
{
|
|
91
|
+
x_ = _mm_load_si128((const __m128i*)p);
|
|
92
|
+
}
|
|
93
|
+
// unaligned
|
|
94
|
+
void load_u(const uint32_t *p)
|
|
95
|
+
{
|
|
96
|
+
x_ = _mm_loadu_si128((const __m128i*)p);
|
|
97
|
+
}
|
|
98
|
+
/*
|
|
99
|
+
*this >>= n
|
|
100
|
+
*/
|
|
101
|
+
template<int n>
|
|
102
|
+
void shrBit();
|
|
103
|
+
/*
|
|
104
|
+
*this <<= n
|
|
105
|
+
*/
|
|
106
|
+
template<int n>
|
|
107
|
+
void shlBit();
|
|
108
|
+
void put(const char *msg = 0) const
|
|
109
|
+
{
|
|
110
|
+
uint32_t v[4];
|
|
111
|
+
store_u(v);
|
|
112
|
+
if (msg) printf("%s", msg);
|
|
113
|
+
printf("%08x:%08x:%08x:%08x", v[3], v[2], v[1], v[0]);
|
|
114
|
+
if (msg) putchar('\n');
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
inline uint32_t movd(const V128& a)
|
|
119
|
+
{
|
|
120
|
+
return _mm_cvtsi128_si32(a.x_);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
inline V128 Zero()
|
|
124
|
+
{
|
|
125
|
+
return _mm_setzero_si128();
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
template<int n>
|
|
129
|
+
inline V128 psrldq(const V128& a)
|
|
130
|
+
{
|
|
131
|
+
return _mm_srli_si128(a.x_, n);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
template<int n>
|
|
135
|
+
inline V128 pslldq(const V128& a)
|
|
136
|
+
{
|
|
137
|
+
return _mm_slli_si128(a.x_, n);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
template<int n>
|
|
141
|
+
inline V128 psrlq(const V128& a)
|
|
142
|
+
{
|
|
143
|
+
return _mm_srli_epi64(a.x_, n);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
inline V128 psrlq(const V128& a, const V128& n)
|
|
147
|
+
{
|
|
148
|
+
return _mm_srl_epi64(a.x_, n.x_);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
template<int n>
|
|
152
|
+
inline V128 psllq(const V128& a)
|
|
153
|
+
{
|
|
154
|
+
return _mm_slli_epi64(a.x_, n);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
inline V128 psllq(const V128& a, const V128& n)
|
|
158
|
+
{
|
|
159
|
+
return _mm_sll_epi64(a.x_, n.x_);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
template<int n>
|
|
163
|
+
inline V128 palignr(const V128& a, const V128& b)
|
|
164
|
+
{
|
|
165
|
+
return _mm_alignr_epi8(a.x_, b.x_, n);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
inline V128 punpckhdq(const V128& a, const V128& b)
|
|
169
|
+
{
|
|
170
|
+
return _mm_unpackhi_epi32(a.x_, b.x_);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
inline V128 punpckhqdq(const V128& a, const V128& b)
|
|
174
|
+
{
|
|
175
|
+
return _mm_unpackhi_epi64(a.x_, b.x_);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
inline V128 punpckldq(const V128& a, const V128& b)
|
|
179
|
+
{
|
|
180
|
+
return _mm_unpacklo_epi32(a.x_, b.x_);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
inline V128 punpcklqdq(const V128& a, const V128& b)
|
|
184
|
+
{
|
|
185
|
+
return _mm_unpacklo_epi64(a.x_, b.x_);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
inline V128 unpcklps(const V128& a, const V128& b)
|
|
189
|
+
{
|
|
190
|
+
return _mm_unpacklo_ps(a.to_ps(), b.to_ps());
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
inline V128 unpckhps(const V128& a, const V128& b)
|
|
194
|
+
{
|
|
195
|
+
return _mm_unpackhi_ps(a.to_ps(), b.to_ps());
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
inline V128 paddd(const V128& a, const V128& b)
|
|
199
|
+
{
|
|
200
|
+
return _mm_add_epi32(a.x_, b.x_);
|
|
201
|
+
}
|
|
202
|
+
inline V128 psubd(const V128& a, const V128& b)
|
|
203
|
+
{
|
|
204
|
+
return _mm_sub_epi32(a.x_, b.x_);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
inline V128 pandn(const V128& a, const V128& b)
|
|
208
|
+
{
|
|
209
|
+
return _mm_andnot_si128(a.x_, b.x_);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
inline V128 por(const V128& a, const V128& b)
|
|
213
|
+
{
|
|
214
|
+
return _mm_or_si128(a.x_, b.x_);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
inline V128 pand(const V128& a, const V128& b)
|
|
218
|
+
{
|
|
219
|
+
return _mm_and_si128(a.x_, b.x_);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
inline V128 pxor(const V128& a, const V128& b)
|
|
223
|
+
{
|
|
224
|
+
return _mm_xor_si128(a.x_, b.x_);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
inline V128 pmaxsd(const V128& a, const V128& b)
|
|
228
|
+
{
|
|
229
|
+
return _mm_max_epi32(a.x_, b.x_);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
inline V128 pminsd(const V128& a, const V128& b)
|
|
233
|
+
{
|
|
234
|
+
return _mm_min_epi32(a.x_, b.x_);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
inline V128 pmaxud(const V128& a, const V128& b)
|
|
238
|
+
{
|
|
239
|
+
return _mm_max_epu32(a.x_, b.x_);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
inline V128 pminud(const V128& a, const V128& b)
|
|
243
|
+
{
|
|
244
|
+
return _mm_min_epu32(a.x_, b.x_);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
inline V128 pcmpeqd(const V128& a, const V128& b)
|
|
248
|
+
{
|
|
249
|
+
return _mm_cmpeq_epi32(a.x_, b.x_);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
inline V128 pcmpgtd(const V128& a, const V128& b)
|
|
253
|
+
{
|
|
254
|
+
return _mm_cmpgt_epi32(a.x_, b.x_);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
inline uint32_t pmovmskb(const V128& a)
|
|
258
|
+
{
|
|
259
|
+
return _mm_movemask_epi8(a.x_);
|
|
260
|
+
}
|
|
261
|
+
inline V128 pshufb(const V128& a, const V128& b)
|
|
262
|
+
{
|
|
263
|
+
return _mm_shuffle_epi8(a.x_, b.x_);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
template<int n>
|
|
267
|
+
inline V128 pshufd(const V128& a)
|
|
268
|
+
{
|
|
269
|
+
return _mm_shuffle_epi32(a.x_, n);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
template<int idx>
|
|
273
|
+
inline uint32_t pextrd(const V128& a)
|
|
274
|
+
{
|
|
275
|
+
return _mm_extract_epi32(a.x_, idx);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
template<int idx>
|
|
279
|
+
inline V128 pinsrd(const V128& a, uint32_t v)
|
|
280
|
+
{
|
|
281
|
+
return _mm_castsi128_ps(_mm_insert_epi32(a.x_, v, idx));
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
template<int idx>
|
|
285
|
+
inline V128 pinsrb(const V128& a, uint8_t v)
|
|
286
|
+
{
|
|
287
|
+
return _mm_castsi128_ps(_mm_insert_epi8(a.x_, v, idx));
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
inline int ptest_zf(const V128& a, const V128& b)
|
|
291
|
+
{
|
|
292
|
+
return _mm_testz_si128(a.x_, b.x_);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
inline int ptest_cf(const V128& a, const V128& b)
|
|
296
|
+
{
|
|
297
|
+
return _mm_testc_si128(a.x_, b.x_);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
inline V128 psadbw(const V128& a, const V128& b)
|
|
301
|
+
{
|
|
302
|
+
return _mm_sad_epu8(a.x_, b.x_);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
inline void swap128(uint32_t *p, uint32_t *q)
|
|
306
|
+
{
|
|
307
|
+
V128 t(p);
|
|
308
|
+
V128(q).store(p);
|
|
309
|
+
t.store(q);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
inline void copy128(uint32_t *dest, const uint32_t *src)
|
|
313
|
+
{
|
|
314
|
+
V128(src).store(dest);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
template<int n>
|
|
318
|
+
inline void V128::shrBit()
|
|
319
|
+
{
|
|
320
|
+
assert(n < 64);
|
|
321
|
+
*this = psrlq<n>(*this) | psllq<64 - n>(psrldq<8>(*this));
|
|
322
|
+
#if 0
|
|
323
|
+
if (n == 64) {
|
|
324
|
+
*this = psrldq<8>(*this);
|
|
325
|
+
} else if (n > 64) {
|
|
326
|
+
*this = psrlq<n - 64>(psrldq<8>(*this));
|
|
327
|
+
}
|
|
328
|
+
#endif
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
template<int n>
|
|
332
|
+
inline void V128::shlBit()
|
|
333
|
+
{
|
|
334
|
+
assert(n < 64);
|
|
335
|
+
*this = psllq<n>(*this) | psrlq<64 - n>(pslldq<8>(*this));
|
|
336
|
+
#if 0
|
|
337
|
+
if (n == 64) {
|
|
338
|
+
*this = pslldq<8>(*this);
|
|
339
|
+
} else if (n > 64) {
|
|
340
|
+
*this = psllq<n - 64>(pslldq<8>(*this));
|
|
341
|
+
}
|
|
342
|
+
#endif
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/*
|
|
346
|
+
byte rotr [x2:x1:x0]
|
|
347
|
+
*/
|
|
348
|
+
template<int n>
|
|
349
|
+
inline void rotrByte(V128& x0, V128& x1, V128& x2)
|
|
350
|
+
{
|
|
351
|
+
V128 t(x0);
|
|
352
|
+
x0 = palignr<n>(x1, x0);
|
|
353
|
+
x1 = palignr<n>(x2, x1);
|
|
354
|
+
x2 = palignr<n>(t, x2);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/*
|
|
358
|
+
byte rotl [x2:x1:x0]
|
|
359
|
+
*/
|
|
360
|
+
template<int n>
|
|
361
|
+
inline void rotlByte(V128& x0, V128& x1, V128& x2)
|
|
362
|
+
{
|
|
363
|
+
V128 t(x2);
|
|
364
|
+
x2 = palignr<16 - n>(x2, x1);
|
|
365
|
+
x1 = palignr<16 - n>(x1, x0);
|
|
366
|
+
x0 = palignr<16 - n>(x0, t);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
#if defined(_M_X64) || defined(__x86_64__)
|
|
370
|
+
inline uint64_t movq(const V128& a)
|
|
371
|
+
{
|
|
372
|
+
return _mm_cvtsi128_si64(a.x_);
|
|
373
|
+
}
|
|
374
|
+
#endif
|
|
375
|
+
|
|
376
|
+
} // cybozu
|
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
/**
|
|
3
|
+
@file
|
|
4
|
+
@brief Wavelet Matrix
|
|
5
|
+
@author MITSUNARI Shigeo(@herumi)
|
|
6
|
+
@license modified new BSD license
|
|
7
|
+
http://opensource.org/licenses/BSD-3-Clause
|
|
8
|
+
*/
|
|
9
|
+
#include <cybozu/sucvector.hpp>
|
|
10
|
+
#include <stdio.h>
|
|
11
|
+
#ifdef _MSC_VER
|
|
12
|
+
#pragma warning(push)
|
|
13
|
+
#pragma warning(disable:4127)
|
|
14
|
+
#endif
|
|
15
|
+
|
|
16
|
+
//#define CYBOZU_WAVELET_MATRIX_DIRECT_CONSTRUCT
|
|
17
|
+
|
|
18
|
+
#ifndef CYBOZU_WAVELET_MATRIX_DIRECT_CONSTRUCT
|
|
19
|
+
#include <cybozu/bitvector.hpp>
|
|
20
|
+
#endif
|
|
21
|
+
|
|
22
|
+
namespace cybozu {
|
|
23
|
+
/*
|
|
24
|
+
current version supports only max 32GiB
|
|
25
|
+
*/
|
|
26
|
+
template<bool withSelect = true, class SucVector = cybozu::SucVectorT<uint32_t, false> >
|
|
27
|
+
class WaveletMatrixT {
|
|
28
|
+
typedef uint32_t size_type;
|
|
29
|
+
bool getPos(uint64_t v, size_t pos) const
|
|
30
|
+
{
|
|
31
|
+
return (v & (uint64_t(1) << pos)) != 0;
|
|
32
|
+
}
|
|
33
|
+
template<class V>
|
|
34
|
+
size_t countZero(const V& in, size_t bitPos) const
|
|
35
|
+
{
|
|
36
|
+
size_t ret = 0;
|
|
37
|
+
const size_t mask = size_t(1) << bitPos;
|
|
38
|
+
for (size_t i =0, n = in.size(); i < n; i++) {
|
|
39
|
+
if (!(in[i] & mask)) ret++;
|
|
40
|
+
}
|
|
41
|
+
return ret;
|
|
42
|
+
}
|
|
43
|
+
void initFromTbl(std::vector<size_type>& tbl, size_t pos, size_t from, size_t i) const
|
|
44
|
+
{
|
|
45
|
+
if (i == valBitLen_) {
|
|
46
|
+
tbl[pos] = (size_type)from;
|
|
47
|
+
} else {
|
|
48
|
+
initFromTbl(tbl, pos, svv[i].rank(false, from), i + 1);
|
|
49
|
+
initFromTbl(tbl, pos + (size_t(1) << (valBitLen_ - 1 - i)), svv[i].rank(true, from) + offTbl[i], i + 1);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
void initFromLtTbl(std::vector<size_type>& tbl, size_t pos, size_t from, size_t ret, size_t i) const
|
|
53
|
+
{
|
|
54
|
+
if (i == valBitLen_) {
|
|
55
|
+
tbl[pos] = (size_type)ret;
|
|
56
|
+
} else {
|
|
57
|
+
size_t end = svv[i].rank1(from);
|
|
58
|
+
initFromLtTbl(tbl, pos, from - end, ret, i + 1);
|
|
59
|
+
initFromLtTbl(tbl, pos + (size_t(1) << (valBitLen_ - 1 - i)), offTbl[i] + end, ret + from - end, i + 1);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
typedef std::vector<SucVector> SucVecVec;
|
|
63
|
+
uint64_t maxVal_;
|
|
64
|
+
size_t valBitLen_;
|
|
65
|
+
size_t size_;
|
|
66
|
+
SucVecVec svv;
|
|
67
|
+
std::vector<size_type> offTbl;
|
|
68
|
+
std::vector<size_type> fromTbl;
|
|
69
|
+
std::vector<size_type> fromLtTbl;
|
|
70
|
+
typedef std::vector<uint32_t> Uint32Vec;
|
|
71
|
+
static const uint64_t posUnit = 256;
|
|
72
|
+
std::vector<Uint32Vec> selTbl_;
|
|
73
|
+
|
|
74
|
+
// call after initialized
|
|
75
|
+
template<class Vec>
|
|
76
|
+
void initSelTbl(std::vector<Uint32Vec>& tblVec, const Vec& vec) const
|
|
77
|
+
{
|
|
78
|
+
if (!withSelect) return;
|
|
79
|
+
tblVec.resize(maxVal_);
|
|
80
|
+
|
|
81
|
+
Uint32Vec iTbl(maxVal_);
|
|
82
|
+
Uint32Vec numTbl(maxVal_);
|
|
83
|
+
for (uint64_t v = 0; v < maxVal_; v++) {
|
|
84
|
+
const size_t size = sucvector_util::getBlockNum(this->size(v), posUnit);
|
|
85
|
+
tblVec[v].resize(size);
|
|
86
|
+
iTbl[v] = 1;
|
|
87
|
+
}
|
|
88
|
+
for (uint32_t pos = 0, n = (uint32_t)vec.size(); pos < n; pos++) {
|
|
89
|
+
uint64_t v = vec[pos];
|
|
90
|
+
uint32_t i = iTbl[v];
|
|
91
|
+
numTbl[v]++;
|
|
92
|
+
if (numTbl[v] >= i * posUnit) {
|
|
93
|
+
if (i < tblVec[v].size()) {
|
|
94
|
+
tblVec[v][i] = pos + 1;
|
|
95
|
+
iTbl[v]++;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
public:
|
|
101
|
+
WaveletMatrixT()
|
|
102
|
+
: maxVal_(1)
|
|
103
|
+
, valBitLen_(0)
|
|
104
|
+
, size_(0)
|
|
105
|
+
{
|
|
106
|
+
}
|
|
107
|
+
/*
|
|
108
|
+
data format(endian is depend on CPU:eg. little endian for x86/x64)
|
|
109
|
+
valBitLen : 8
|
|
110
|
+
maxVal : 8
|
|
111
|
+
size : 8
|
|
112
|
+
svv
|
|
113
|
+
offTblSize : 8
|
|
114
|
+
offTbl
|
|
115
|
+
fromTblSize : 8
|
|
116
|
+
fromTbl
|
|
117
|
+
fromLtTblSize : 8
|
|
118
|
+
fromLtTbl
|
|
119
|
+
*/
|
|
120
|
+
template<class OutputStream>
|
|
121
|
+
void save(OutputStream& os) const
|
|
122
|
+
{
|
|
123
|
+
cybozu::save(os, maxVal_);
|
|
124
|
+
cybozu::save(os, valBitLen_);
|
|
125
|
+
cybozu::save(os, size_);
|
|
126
|
+
assert(valBitLen_ == svv.size());
|
|
127
|
+
for (size_t i = 0; i < valBitLen_; i++) {
|
|
128
|
+
svv[i].save(os);
|
|
129
|
+
}
|
|
130
|
+
cybozu::savePodVec(os, offTbl);
|
|
131
|
+
cybozu::savePodVec(os, fromTbl);
|
|
132
|
+
cybozu::savePodVec(os, fromLtTbl);
|
|
133
|
+
|
|
134
|
+
if (withSelect) {
|
|
135
|
+
for (uint64_t v = 0; v < maxVal_; v++) {
|
|
136
|
+
cybozu::savePodVec(os, selTbl_[v]);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
template<class InputStream>
|
|
141
|
+
void load(InputStream& is)
|
|
142
|
+
{
|
|
143
|
+
cybozu::load(maxVal_, is);
|
|
144
|
+
cybozu::load(valBitLen_, is);
|
|
145
|
+
cybozu::load(size_, is);
|
|
146
|
+
svv.resize(valBitLen_);
|
|
147
|
+
for (size_t i = 0; i < valBitLen_; i++) {
|
|
148
|
+
svv[i].load(is);
|
|
149
|
+
}
|
|
150
|
+
cybozu::loadPodVec(offTbl, is);
|
|
151
|
+
cybozu::loadPodVec(fromTbl, is);
|
|
152
|
+
cybozu::loadPodVec(fromLtTbl, is);
|
|
153
|
+
|
|
154
|
+
if (withSelect) {
|
|
155
|
+
selTbl_.resize(maxVal_);
|
|
156
|
+
for (uint64_t v = 0; v < maxVal_; v++) {
|
|
157
|
+
cybozu::loadPodVec(selTbl_[v], is);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
uint64_t size() const { return size_; }
|
|
162
|
+
uint64_t size(uint64_t val) const
|
|
163
|
+
{
|
|
164
|
+
assert(val < maxVal_);
|
|
165
|
+
return rank(val, size_);
|
|
166
|
+
}
|
|
167
|
+
template<class Vec>
|
|
168
|
+
void init(const Vec& vec, size_t valBitLen)
|
|
169
|
+
{
|
|
170
|
+
if (vec.size() > (uint64_t(1) << 32)) throw cybozu::Exception("WaveletMatrix:init:too large") << vec.size();
|
|
171
|
+
if (valBitLen > 16) throw cybozu::Exception("WaveletMatrix:init:too large valBitLen") << valBitLen;
|
|
172
|
+
valBitLen_ = valBitLen;
|
|
173
|
+
maxVal_ = uint64_t(1) << valBitLen_;
|
|
174
|
+
size_ = vec.size();
|
|
175
|
+
svv.resize(valBitLen_);
|
|
176
|
+
|
|
177
|
+
// count zero bit
|
|
178
|
+
offTbl.resize(valBitLen_);
|
|
179
|
+
for (size_t i = 0, n = offTbl.size(); i < n; i++) {
|
|
180
|
+
offTbl[i] = (size_type)countZero(vec, valBitLen - 1 - i);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// construct svv
|
|
184
|
+
Vec cur = vec, next;
|
|
185
|
+
next.resize(size_);
|
|
186
|
+
for (size_t i = 0; i < valBitLen; i++) {
|
|
187
|
+
#ifdef CYBOZU_WAVELET_MATRIX_DIRECT_CONSTRUCT
|
|
188
|
+
SucVector& sv = svv[i];
|
|
189
|
+
#else
|
|
190
|
+
cybozu::BitVector sv;
|
|
191
|
+
#endif
|
|
192
|
+
sv.resize(size_);
|
|
193
|
+
size_t zeroPos = 0;
|
|
194
|
+
size_t onePos = offTbl[i];
|
|
195
|
+
for (size_t j = 0; j < size_; j++) {
|
|
196
|
+
bool b = getPos(cur[j], valBitLen - 1 - i);
|
|
197
|
+
if (b) {
|
|
198
|
+
sv.set(j);
|
|
199
|
+
}
|
|
200
|
+
if (i == valBitLen - 1) continue;
|
|
201
|
+
if (b) {
|
|
202
|
+
next[onePos++] = cur[j];
|
|
203
|
+
} else {
|
|
204
|
+
next[zeroPos++] = cur[j];
|
|
205
|
+
}
|
|
206
|
+
};
|
|
207
|
+
#ifdef CYBOZU_WAVELET_MATRIX_DIRECT_CONSTRUCT
|
|
208
|
+
sv.ready();
|
|
209
|
+
#else
|
|
210
|
+
svv[i].init(sv.getBlock(), sv.size());
|
|
211
|
+
#endif
|
|
212
|
+
next.swap(cur);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// construct fromTbl
|
|
216
|
+
fromTbl.resize(maxVal_);
|
|
217
|
+
initFromTbl(fromTbl, 0, 0, 0);
|
|
218
|
+
|
|
219
|
+
fromLtTbl.resize(maxVal_);
|
|
220
|
+
initFromLtTbl(fromLtTbl, 0, 0, 0, 0);
|
|
221
|
+
|
|
222
|
+
initSelTbl(selTbl_, vec);
|
|
223
|
+
}
|
|
224
|
+
uint64_t get(uint64_t pos) const
|
|
225
|
+
{
|
|
226
|
+
assert(pos < size_);
|
|
227
|
+
uint64_t ret = 0;
|
|
228
|
+
size_t i = 0;
|
|
229
|
+
for (;;) {
|
|
230
|
+
bool b = svv[i].get(pos);
|
|
231
|
+
ret = (ret << 1) | uint32_t(b);
|
|
232
|
+
if (i == valBitLen_ - 1) return ret;
|
|
233
|
+
#if 0
|
|
234
|
+
pos = svv[i].rank(b, pos);
|
|
235
|
+
if (b) pos += offTbl[i];
|
|
236
|
+
#else
|
|
237
|
+
if (b) {
|
|
238
|
+
pos = offTbl[i] + svv[i].rank1(pos);
|
|
239
|
+
} else {
|
|
240
|
+
pos -= svv[i].rank1(pos);
|
|
241
|
+
}
|
|
242
|
+
#endif
|
|
243
|
+
i++;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
/*
|
|
247
|
+
get number of val in [0, pos)
|
|
248
|
+
@note shotcut idea to reduce computing 'from' by @echizen_tm
|
|
249
|
+
see http://ja.scribd.com/doc/102636443/Wavelet-Matrix
|
|
250
|
+
*/
|
|
251
|
+
uint64_t rank(uint64_t val, uint64_t pos) const
|
|
252
|
+
{
|
|
253
|
+
assert(val < maxVal_);
|
|
254
|
+
if (pos > size_) pos = size_;
|
|
255
|
+
for (size_t i = 0; i < valBitLen_; i++) {
|
|
256
|
+
bool b = (val & (uint64_t(1) << (valBitLen_ - 1 - i))) != 0;
|
|
257
|
+
if (b) {
|
|
258
|
+
pos = offTbl[i] + svv[i].rank1(pos);
|
|
259
|
+
} else {
|
|
260
|
+
pos -= svv[i].rank1(pos);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
return pos - fromTbl[val];
|
|
264
|
+
}
|
|
265
|
+
/*
|
|
266
|
+
get value and rank
|
|
267
|
+
val = get(pos);
|
|
268
|
+
return rank(val, pos);
|
|
269
|
+
*/
|
|
270
|
+
template<class T>
|
|
271
|
+
uint64_t get(T* pval, uint64_t pos) const
|
|
272
|
+
{
|
|
273
|
+
if (pos > size_) pos = size_;
|
|
274
|
+
uint64_t ret = 0;
|
|
275
|
+
for (size_t i = 0; i < valBitLen_; i++) {
|
|
276
|
+
bool b = svv[i].get(pos);
|
|
277
|
+
ret = (ret << 1) | uint32_t(b);
|
|
278
|
+
if (b) {
|
|
279
|
+
pos = offTbl[i] + svv[i].rank1(pos);
|
|
280
|
+
} else {
|
|
281
|
+
pos -= svv[i].rank1(pos);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
*pval = (T)ret;
|
|
285
|
+
return pos - fromTbl[ret];
|
|
286
|
+
}
|
|
287
|
+
/*
|
|
288
|
+
get number of less than val in [0, pos)
|
|
289
|
+
*/
|
|
290
|
+
uint64_t rankLt(uint64_t val, uint64_t pos) const
|
|
291
|
+
{
|
|
292
|
+
assert(val < maxVal_);
|
|
293
|
+
if (pos > size_) pos = size_;
|
|
294
|
+
uint64_t ret = 0;
|
|
295
|
+
for (size_t i = 0; i < valBitLen_; i++) {
|
|
296
|
+
bool b = getPos(val, valBitLen_ - 1 - i);
|
|
297
|
+
uint64_t end = svv[i].rank1(pos);
|
|
298
|
+
if (b) {
|
|
299
|
+
ret += pos - end;
|
|
300
|
+
pos = offTbl[i] + end;
|
|
301
|
+
} else {
|
|
302
|
+
pos -= end;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
return ret - fromLtTbl[val];
|
|
306
|
+
}
|
|
307
|
+
uint64_t select(uint64_t val, uint64_t rank) const
|
|
308
|
+
{
|
|
309
|
+
if (!withSelect) throw cybozu::Exception("WaveletMatrix:select:not support");
|
|
310
|
+
assert(val < maxVal_);
|
|
311
|
+
const Uint32Vec& tbl = selTbl_[val];
|
|
312
|
+
if (rank / posUnit >= tbl.size()) return cybozu::NotFound;
|
|
313
|
+
const size_t pos = size_t(rank / posUnit);
|
|
314
|
+
// size_t L = 0;
|
|
315
|
+
// size_t R = size_;
|
|
316
|
+
size_t L = tbl[pos];
|
|
317
|
+
size_t R = pos >= tbl.size() - 1 ? size_ : tbl[pos + 1];
|
|
318
|
+
//printf("val=%d, rank=%d, L=%d, R=%d, size=%d\n", (int)val, (int)rank, (int)L, (int)R, (int)size_);
|
|
319
|
+
rank++;
|
|
320
|
+
while (L < R) {
|
|
321
|
+
size_t M = (L + R) / 2;
|
|
322
|
+
if (this->rank(val, M) < rank) {
|
|
323
|
+
L = M + 1;
|
|
324
|
+
} else {
|
|
325
|
+
R = M;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
if (L > 0) L--;
|
|
329
|
+
for (;;) {
|
|
330
|
+
if (this->rank(val, L) == rank) {
|
|
331
|
+
return L - 1;
|
|
332
|
+
}
|
|
333
|
+
L++;
|
|
334
|
+
if (L > size_) return cybozu::NotFound;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
};
|
|
338
|
+
|
|
339
|
+
typedef WaveletMatrixT<> WaveletMatrix;
|
|
340
|
+
|
|
341
|
+
} // cybozu
|
|
342
|
+
|
|
343
|
+
#ifdef _MSC_VER
|
|
344
|
+
#pragma warning(pop)
|
|
345
|
+
#endif
|