ooxml_crypt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +58 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ext/ooxml_crypt/extconf.rb +18 -0
- data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
- data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
- data/lib/ooxml_crypt/version.rb +5 -0
- data/lib/ooxml_crypt.rb +75 -0
- data/vendor/cybozulib/.github/workflows/main.yml +12 -0
- data/vendor/cybozulib/.gitignore +5 -0
- data/vendor/cybozulib/CMakeLists.txt +6 -0
- data/vendor/cybozulib/COPYRIGHT +27 -0
- data/vendor/cybozulib/Makefile +26 -0
- data/vendor/cybozulib/bin/libeay32.dll +0 -0
- data/vendor/cybozulib/bin/libmecab.dll +0 -0
- data/vendor/cybozulib/bin/ssleay32.dll +0 -0
- data/vendor/cybozulib/common.mk +116 -0
- data/vendor/cybozulib/common.props +25 -0
- data/vendor/cybozulib/cybozulib.sln +286 -0
- data/vendor/cybozulib/debug.props +14 -0
- data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
- data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
- data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
- data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
- data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
- data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
- data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
- data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
- data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
- data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
- data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
- data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
- data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
- data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
- data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
- data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
- data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
- data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
- data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
- data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
- data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
- data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
- data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
- data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
- data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
- data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
- data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
- data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
- data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
- data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
- data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
- data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
- data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
- data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
- data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
- data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
- data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
- data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
- data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
- data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
- data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
- data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
- data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
- data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
- data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
- data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
- data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
- data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
- data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
- data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
- data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
- data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
- data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
- data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
- data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
- data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
- data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
- data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
- data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
- data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
- data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
- data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
- data/vendor/cybozulib/include/sais.hxx +364 -0
- data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
- data/vendor/cybozulib/mk.bat +37 -0
- data/vendor/cybozulib/readme.md +29 -0
- data/vendor/cybozulib/release.props +12 -0
- data/vendor/cybozulib/sample/Makefile +30 -0
- data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
- data/vendor/cybozulib/sample/data/svd/test1 +4 -0
- data/vendor/cybozulib/sample/data/svd/test2 +4 -0
- data/vendor/cybozulib/sample/desymbol.cpp +127 -0
- data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
- data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
- data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
- data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
- data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
- data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
- data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
- data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
- data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
- data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
- data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
- data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
- data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
- data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
- data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
- data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
- data/vendor/cybozulib/src/Makefile +8 -0
- data/vendor/cybozulib/src/base/Makefile +19 -0
- data/vendor/cybozulib/test/Makefile +12 -0
- data/vendor/cybozulib/test/base/Makefile +37 -0
- data/vendor/cybozulib/test/base/array_test.cpp +173 -0
- data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
- data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
- data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
- data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
- data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
- data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
- data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
- data/vendor/cybozulib/test/base/config_test.cpp +236 -0
- data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
- data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
- data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
- data/vendor/cybozulib/test/base/data/a.xml +26 -0
- data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
- data/vendor/cybozulib/test/base/env_test.cpp +22 -0
- data/vendor/cybozulib/test/base/event_test.cpp +41 -0
- data/vendor/cybozulib/test/base/file_test.cpp +233 -0
- data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
- data/vendor/cybozulib/test/base/format_test.cpp +12 -0
- data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
- data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
- data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
- data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
- data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
- data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
- data/vendor/cybozulib/test/base/option_test.cpp +487 -0
- data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
- data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
- data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
- data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
- data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
- data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
- data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
- data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
- data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
- data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
- data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
- data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
- data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
- data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
- data/vendor/cybozulib/test/base/time_test.cpp +164 -0
- data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
- data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
- data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
- data/vendor/cybozulib/test/nlp/Makefile +27 -0
- data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
- data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
- data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
- data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
- data/vendor/cybozulib/tool/create_vcproj.py +186 -0
- data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
- data/vendor/msoffice/COPYRIGHT +27 -0
- data/vendor/msoffice/Makefile +29 -0
- data/vendor/msoffice/bin/64/msoc.dll +0 -0
- data/vendor/msoffice/bin/64/msocsample.exe +0 -0
- data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/bin/msoc.dll +0 -0
- data/vendor/msoffice/bin/msocsample.exe +0 -0
- data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/common.mk +71 -0
- data/vendor/msoffice/common.props +26 -0
- data/vendor/msoffice/debug.props +14 -0
- data/vendor/msoffice/include/attack.hpp +211 -0
- data/vendor/msoffice/include/cfb.hpp +777 -0
- data/vendor/msoffice/include/crypto_util.hpp +450 -0
- data/vendor/msoffice/include/custom_sha1.hpp +342 -0
- data/vendor/msoffice/include/decode.hpp +240 -0
- data/vendor/msoffice/include/encode.hpp +221 -0
- data/vendor/msoffice/include/make_dataspace.hpp +316 -0
- data/vendor/msoffice/include/msoc.h +129 -0
- data/vendor/msoffice/include/resource.hpp +7 -0
- data/vendor/msoffice/include/standard_encryption.hpp +145 -0
- data/vendor/msoffice/include/uint32vec.hpp +179 -0
- data/vendor/msoffice/include/util.hpp +212 -0
- data/vendor/msoffice/lib/.emptydir +0 -0
- data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
- data/vendor/msoffice/mk.bat +1 -0
- data/vendor/msoffice/mkdll.bat +3 -0
- data/vendor/msoffice/msoc.def +13 -0
- data/vendor/msoffice/msocsample.py +178 -0
- data/vendor/msoffice/msoffice12.sln +31 -0
- data/vendor/msoffice/readme.md +110 -0
- data/vendor/msoffice/release.props +28 -0
- data/vendor/msoffice/src/Makefile +19 -0
- data/vendor/msoffice/src/attack.cpp +124 -0
- data/vendor/msoffice/src/cfb_test.cpp +77 -0
- data/vendor/msoffice/src/minisample.c +54 -0
- data/vendor/msoffice/src/msocdll.cpp +276 -0
- data/vendor/msoffice/src/msocsample.c +136 -0
- data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
- data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
- data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
- data/vendor/msoffice/src/sha1.cpp +234 -0
- data/vendor/msoffice/test/Makefile +20 -0
- data/vendor/msoffice/test/cfb_test.cpp +74 -0
- data/vendor/msoffice/test/hash_test.cpp +59 -0
- data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
- data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
- data/vendor/msoffice/test/sampl.bat +8 -0
- data/vendor/msoffice/test_all.py +46 -0
- data/vendor/update +4 -0
- metadata +351 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* sais.hxx for sais-lite
|
|
3
|
+
* Copyright (c) 2008-2009 Yuta Mori All Rights Reserved.
|
|
4
|
+
*
|
|
5
|
+
* Permission is hereby granted, free of charge, to any person
|
|
6
|
+
* obtaining a copy of this software and associated documentation
|
|
7
|
+
* files (the "Software"), to deal in the Software without
|
|
8
|
+
* restriction, including without limitation the rights to use,
|
|
9
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
* copies of the Software, and to permit persons to whom the
|
|
11
|
+
* Software is furnished to do so, subject to the following
|
|
12
|
+
* conditions:
|
|
13
|
+
*
|
|
14
|
+
* The above copyright notice and this permission notice shall be
|
|
15
|
+
* included in all copies or substantial portions of the Software.
|
|
16
|
+
*
|
|
17
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
18
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
19
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
20
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
21
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
22
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
23
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
24
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
#ifndef _SAIS_HXX
|
|
28
|
+
#define _SAIS_HXX 1
|
|
29
|
+
#ifdef __cplusplus
|
|
30
|
+
|
|
31
|
+
#ifdef __INTEL_COMPILER
|
|
32
|
+
#pragma warning(disable : 383 981 1418)
|
|
33
|
+
// for icc 64-bit
|
|
34
|
+
//#define __builtin_vsnprintf(a, b, c, d) __builtin_vsnprintf(a, b, c, (char *)d)
|
|
35
|
+
#endif
|
|
36
|
+
|
|
37
|
+
#include <iterator>
|
|
38
|
+
#ifdef _OPENMP
|
|
39
|
+
# include <omp.h>
|
|
40
|
+
#endif
|
|
41
|
+
|
|
42
|
+
namespace saisxx_private {
|
|
43
|
+
|
|
44
|
+
/* find the start or end of each bucket */
|
|
45
|
+
template<typename string_type, typename bucket_type, typename index_type>
|
|
46
|
+
void
|
|
47
|
+
getCounts(const string_type T, bucket_type C, index_type n, index_type k) {
|
|
48
|
+
#ifdef _OPENMP
|
|
49
|
+
bucket_type D;
|
|
50
|
+
index_type i, j, p, sum, first, last;
|
|
51
|
+
int thnum, maxthreads = omp_get_max_threads();
|
|
52
|
+
#pragma omp parallel default(shared) private(D, i, thnum, first, last)
|
|
53
|
+
{
|
|
54
|
+
thnum = omp_get_thread_num();
|
|
55
|
+
D = C + thnum * k;
|
|
56
|
+
first = n / maxthreads * thnum;
|
|
57
|
+
last = (thnum < (maxthreads - 1)) ? n / maxthreads * (thnum + 1) : n;
|
|
58
|
+
for(i = 0; i < k; ++i) { D[i] = 0; }
|
|
59
|
+
for(i = first; i < last; ++i) { ++D[T[i]]; }
|
|
60
|
+
}
|
|
61
|
+
if(1 < maxthreads) {
|
|
62
|
+
#pragma omp parallel for default(shared) private(i, j, p, sum)
|
|
63
|
+
for(i = 0; i < k; ++i) {
|
|
64
|
+
for(j = 1, p = i + k, sum = C[i]; j < maxthreads; ++j, p += k) {
|
|
65
|
+
sum += C[p];
|
|
66
|
+
}
|
|
67
|
+
C[i] = sum;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
#else
|
|
71
|
+
index_type i;
|
|
72
|
+
for(i = 0; i < k; ++i) { C[i] = 0; }
|
|
73
|
+
for(i = 0; i < n; ++i) { ++C[T[i]]; }
|
|
74
|
+
#endif
|
|
75
|
+
}
|
|
76
|
+
template<typename bucket_type, typename index_type>
|
|
77
|
+
void
|
|
78
|
+
getBuckets(const bucket_type C, bucket_type B, index_type k, bool end) {
|
|
79
|
+
index_type i, sum = 0;
|
|
80
|
+
if(end) { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum; } }
|
|
81
|
+
else { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum - C[i]; } }
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/* compute SA and BWT */
|
|
85
|
+
template<typename string_type, typename sarray_type,
|
|
86
|
+
typename bucket_type, typename index_type>
|
|
87
|
+
void
|
|
88
|
+
induceSA(string_type T, sarray_type SA, bucket_type C, bucket_type B,
|
|
89
|
+
index_type n, index_type k) {
|
|
90
|
+
typedef typename std::iterator_traits<string_type>::value_type char_type;
|
|
91
|
+
sarray_type b;
|
|
92
|
+
index_type i, j;
|
|
93
|
+
char_type c0, c1;
|
|
94
|
+
/* compute SAl */
|
|
95
|
+
if(C == B) { getCounts(T, C, n, k); }
|
|
96
|
+
getBuckets(C, B, k, false); /* find starts of buckets */
|
|
97
|
+
b = SA + B[c1 = T[j = n - 1]];
|
|
98
|
+
*b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
|
|
99
|
+
for(i = 0; i < n; ++i) {
|
|
100
|
+
j = SA[i], SA[i] = ~j;
|
|
101
|
+
if(0 < j) {
|
|
102
|
+
if((c0 = T[--j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
|
|
103
|
+
*b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
/* compute SAs */
|
|
107
|
+
if(C == B) { getCounts(T, C, n, k); }
|
|
108
|
+
getBuckets(C, B, k, true); /* find ends of buckets */
|
|
109
|
+
for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
|
|
110
|
+
if(0 < (j = SA[i])) {
|
|
111
|
+
if((c0 = T[--j]) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
|
|
112
|
+
*--b = ((j == 0) || (T[j - 1] > c1)) ? ~j : j;
|
|
113
|
+
} else {
|
|
114
|
+
SA[i] = ~j;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
template<typename string_type, typename sarray_type,
|
|
119
|
+
typename bucket_type, typename index_type>
|
|
120
|
+
int
|
|
121
|
+
computeBWT(string_type T, sarray_type SA, bucket_type C, bucket_type B,
|
|
122
|
+
index_type n, index_type k) {
|
|
123
|
+
typedef typename std::iterator_traits<string_type>::value_type char_type;
|
|
124
|
+
sarray_type b;
|
|
125
|
+
index_type i, j, pidx = -1;
|
|
126
|
+
char_type c0, c1;
|
|
127
|
+
/* compute SAl */
|
|
128
|
+
if(C == B) { getCounts(T, C, n, k); }
|
|
129
|
+
getBuckets(C, B, k, false); /* find starts of buckets */
|
|
130
|
+
b = SA + B[c1 = T[j = n - 1]];
|
|
131
|
+
*b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
|
|
132
|
+
for(i = 0; i < n; ++i) {
|
|
133
|
+
if(0 < (j = SA[i])) {
|
|
134
|
+
SA[i] = ~(c0 = T[--j]);
|
|
135
|
+
if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
|
|
136
|
+
*b++ = ((0 < j) && (T[j - 1] < c1)) ? ~j : j;
|
|
137
|
+
} else if(j != 0) {
|
|
138
|
+
SA[i] = ~j;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
/* compute SAs */
|
|
142
|
+
if(C == B) { getCounts(T, C, n, k); }
|
|
143
|
+
getBuckets(C, B, k, true); /* find ends of buckets */
|
|
144
|
+
for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
|
|
145
|
+
if(0 < (j = SA[i])) {
|
|
146
|
+
SA[i] = (c0 = T[--j]);
|
|
147
|
+
if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
|
|
148
|
+
*--b = ((0 < j) && (T[j - 1] > c1)) ? ~((index_type)T[j - 1]) : j;
|
|
149
|
+
} else if(j != 0) {
|
|
150
|
+
SA[i] = ~j;
|
|
151
|
+
} else {
|
|
152
|
+
pidx = i;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
return pidx;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/* find the suffix array SA of T[0..n-1] in {0..k}^n
|
|
159
|
+
use a working space (excluding s and SA) of at most 2n+O(1) for a constant alphabet */
|
|
160
|
+
template<typename string_type, typename sarray_type, typename index_type>
|
|
161
|
+
int
|
|
162
|
+
suffixsort(string_type T, sarray_type SA,
|
|
163
|
+
index_type fs, index_type n, index_type k,
|
|
164
|
+
bool isbwt) {
|
|
165
|
+
typedef typename std::iterator_traits<string_type>::value_type char_type;
|
|
166
|
+
sarray_type RA;
|
|
167
|
+
index_type i, j, m, p, q, plen, qlen, name, pidx = 0;
|
|
168
|
+
bool diff;
|
|
169
|
+
int c;
|
|
170
|
+
#ifdef _OPENMP
|
|
171
|
+
int maxthreads = omp_get_max_threads();
|
|
172
|
+
#else
|
|
173
|
+
# define maxthreads 1
|
|
174
|
+
#endif
|
|
175
|
+
char_type c0, c1;
|
|
176
|
+
|
|
177
|
+
/* stage 1: reduce the problem by at least 1/2
|
|
178
|
+
sort all the S-substrings */
|
|
179
|
+
if(fs < (maxthreads * k)) {
|
|
180
|
+
index_type *C, *B;
|
|
181
|
+
if((C = new index_type[maxthreads * k]) == 0) { return -2; }
|
|
182
|
+
B = (1 < maxthreads) ? C + k : C;
|
|
183
|
+
getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
|
|
184
|
+
#ifdef _OPENMP
|
|
185
|
+
#pragma omp parallel for default(shared) private(i)
|
|
186
|
+
#endif
|
|
187
|
+
for(i = 0; i < n; ++i) { SA[i] = 0; }
|
|
188
|
+
for(i = n - 2, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) {
|
|
189
|
+
if((c0 = T[i]) < (c1 + c)) { c = 1; }
|
|
190
|
+
else if(c != 0) { SA[--B[c1]] = i + 1, c = 0; }
|
|
191
|
+
}
|
|
192
|
+
induceSA(T, SA, C, B, n, k);
|
|
193
|
+
delete [] C;
|
|
194
|
+
} else {
|
|
195
|
+
sarray_type C, B;
|
|
196
|
+
C = SA + n;
|
|
197
|
+
B = ((1 < maxthreads) || (k <= (fs - k))) ? C + k : C;
|
|
198
|
+
getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
|
|
199
|
+
#ifdef _OPENMP
|
|
200
|
+
#pragma omp parallel for default(shared) private(i)
|
|
201
|
+
#endif
|
|
202
|
+
for(i = 0; i < n; ++i) { SA[i] = 0; }
|
|
203
|
+
for(i = n - 2, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) {
|
|
204
|
+
if((c0 = T[i]) < (c1 + c)) { c = 1; }
|
|
205
|
+
else if(c != 0) { SA[--B[c1]] = i + 1, c = 0; }
|
|
206
|
+
}
|
|
207
|
+
induceSA(T, SA, C, B, n, k);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/* compact all the sorted substrings into the first m items of SA
|
|
211
|
+
2*m must be not larger than n (proveable) */
|
|
212
|
+
#ifdef _OPENMP
|
|
213
|
+
#pragma omp parallel for default(shared) private(i, j, p, c0, c1)
|
|
214
|
+
for(i = 0; i < n; ++i) {
|
|
215
|
+
p = SA[i];
|
|
216
|
+
if((0 < p) && (T[p - 1] > (c0 = T[p]))) {
|
|
217
|
+
for(j = p + 1; (j < n) && (c0 == (c1 = T[j])); ++j) { }
|
|
218
|
+
if((j < n) && (c0 < c1)) { SA[i] = ~p; }
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
for(i = 0, m = 0; i < n; ++i) { if((p = SA[i]) < 0) { SA[m++] = ~p; } }
|
|
222
|
+
#else
|
|
223
|
+
for(i = 0, m = 0; i < n; ++i) {
|
|
224
|
+
p = SA[i];
|
|
225
|
+
if((0 < p) && (T[p - 1] > (c0 = T[p]))) {
|
|
226
|
+
for(j = p + 1; (j < n) && (c0 == (c1 = T[j])); ++j) { }
|
|
227
|
+
if((j < n) && (c0 < c1)) { SA[m++] = p; }
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
#endif
|
|
231
|
+
j = m + (n >> 1);
|
|
232
|
+
#ifdef _OPENMP
|
|
233
|
+
#pragma omp parallel for default(shared) private(i)
|
|
234
|
+
#endif
|
|
235
|
+
for(i = m; i < j; ++i) { SA[i] = 0; } /* init the name array buffer */
|
|
236
|
+
/* store the length of all substrings */
|
|
237
|
+
for(i = n - 2, j = n, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) {
|
|
238
|
+
if((c0 = T[i]) < (c1 + c)) { c = 1; }
|
|
239
|
+
else if(c != 0) { SA[m + ((i + 1) >> 1)] = j - i - 1; j = i + 1; c = 0; }
|
|
240
|
+
}
|
|
241
|
+
/* find the lexicographic names of all substrings */
|
|
242
|
+
for(i = 0, name = 0, q = n, qlen = 0; i < m; ++i) {
|
|
243
|
+
p = SA[i], plen = SA[m + (p >> 1)], diff = true;
|
|
244
|
+
if(plen == qlen) {
|
|
245
|
+
for(j = 0; (j < plen) && (T[p + j] == T[q + j]); ++j) { }
|
|
246
|
+
if(j == plen) { diff = false; }
|
|
247
|
+
}
|
|
248
|
+
if(diff != false) { ++name, q = p, qlen = plen; }
|
|
249
|
+
SA[m + (p >> 1)] = name;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/* stage 2: solve the reduced problem
|
|
253
|
+
recurse if names are not yet unique */
|
|
254
|
+
if(name < m) {
|
|
255
|
+
RA = SA + n + fs - m;
|
|
256
|
+
for(i = m + (n >> 1) - 1, j = m - 1; m <= i; --i) {
|
|
257
|
+
if(SA[i] != 0) { RA[j--] = SA[i] - 1; }
|
|
258
|
+
}
|
|
259
|
+
if(suffixsort(RA, SA, fs + n - m * 2, m, name, false) != 0) { return -2; }
|
|
260
|
+
for(i = n - 2, j = m - 1, c = 0, c1 = T[n - 1]; 0 <= i; --i, c1 = c0) {
|
|
261
|
+
if((c0 = T[i]) < (c1 + c)) { c = 1; }
|
|
262
|
+
else if(c != 0) { RA[j--] = i + 1, c = 0; } /* get p1 */
|
|
263
|
+
}
|
|
264
|
+
#ifdef _OPENMP
|
|
265
|
+
#pragma omp parallel for default(shared) private(i)
|
|
266
|
+
#endif
|
|
267
|
+
for(i = 0; i < m; ++i) { SA[i] = RA[SA[i]]; } /* get index in s */
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/* stage 3: induce the result for the original problem */
|
|
271
|
+
if(fs < (maxthreads * k)) {
|
|
272
|
+
index_type *B, *C;
|
|
273
|
+
if((C = new index_type[maxthreads * k]) == 0) { return -2; }
|
|
274
|
+
B = (1 < maxthreads) ? C + k : C;
|
|
275
|
+
/* put all left-most S characters into their buckets */
|
|
276
|
+
getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
|
|
277
|
+
#ifdef _OPENMP
|
|
278
|
+
#pragma omp parallel for default(shared) private(i)
|
|
279
|
+
#endif
|
|
280
|
+
for(i = m; i < n; ++i) { SA[i] = 0; } /* init SA[m..n-1] */
|
|
281
|
+
for(i = m - 1; 0 <= i; --i) {
|
|
282
|
+
j = SA[i], SA[i] = 0;
|
|
283
|
+
SA[--B[T[j]]] = j;
|
|
284
|
+
}
|
|
285
|
+
if(isbwt == false) { induceSA(T, SA, C, B, n, k); }
|
|
286
|
+
else { pidx = computeBWT(T, SA, C, B, n, k); }
|
|
287
|
+
delete [] C;
|
|
288
|
+
} else {
|
|
289
|
+
sarray_type C, B;
|
|
290
|
+
C = SA + n;
|
|
291
|
+
B = ((1 < maxthreads) || (k <= (fs - k))) ? C + k : C;
|
|
292
|
+
/* put all left-most S characters into their buckets */
|
|
293
|
+
getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */
|
|
294
|
+
#ifdef _OPENMP
|
|
295
|
+
#pragma omp parallel for default(shared) private(i)
|
|
296
|
+
#endif
|
|
297
|
+
for(i = m; i < n; ++i) { SA[i] = 0; } /* init SA[m..n-1] */
|
|
298
|
+
for(i = m - 1; 0 <= i; --i) {
|
|
299
|
+
j = SA[i], SA[i] = 0;
|
|
300
|
+
SA[--B[T[j]]] = j;
|
|
301
|
+
}
|
|
302
|
+
if(isbwt == false) { induceSA(T, SA, C, B, n, k); }
|
|
303
|
+
else { pidx = computeBWT(T, SA, C, B, n, k); }
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
return pidx;
|
|
307
|
+
#ifndef _OPENMP
|
|
308
|
+
# undef maxthreads
|
|
309
|
+
#endif
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
} /* namespace saisxx_private */
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* @brief Constructs the suffix array of a given string in linear time.
|
|
317
|
+
* @param T[0..n-1] The input string. (random access iterator)
|
|
318
|
+
* @param SA[0..n-1] The output array of suffixes. (random access iterator)
|
|
319
|
+
* @param n The length of the given string.
|
|
320
|
+
* @param k The alphabet size.
|
|
321
|
+
* @return 0 if no error occurred, -1 or -2 otherwise.
|
|
322
|
+
*/
|
|
323
|
+
template<typename string_type, typename sarray_type, typename index_type>
|
|
324
|
+
int
|
|
325
|
+
saisxx(string_type T, sarray_type SA, index_type n, index_type k = 256) {
|
|
326
|
+
int err;
|
|
327
|
+
if((n < 0) || (k <= 0)) { return -1; }
|
|
328
|
+
if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; }
|
|
329
|
+
try { err = saisxx_private::suffixsort(T, SA, 0, n, k, false); }
|
|
330
|
+
catch(...) { err = -2; }
|
|
331
|
+
return err;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* @brief Constructs the burrows-wheeler transformed string of a given string in linear time.
|
|
336
|
+
* @param T[0..n-1] The input string. (random access iterator)
|
|
337
|
+
* @param U[0..n-1] The output string. (random access iterator)
|
|
338
|
+
* @param A[0..n-1] The temporary array. (random access iterator)
|
|
339
|
+
* @param n The length of the given string.
|
|
340
|
+
* @param k The alphabet size.
|
|
341
|
+
* @return The primary index if no error occurred, -1 or -2 otherwise.
|
|
342
|
+
*/
|
|
343
|
+
template<typename string_type, typename sarray_type, typename index_type>
|
|
344
|
+
index_type
|
|
345
|
+
saisxx_bwt(string_type T, string_type U, sarray_type A, index_type n, index_type k = 256) {
|
|
346
|
+
typedef typename std::iterator_traits<string_type>::value_type char_type;
|
|
347
|
+
index_type i, pidx;
|
|
348
|
+
if((n < 0) || (k <= 0)) { return -1; }
|
|
349
|
+
if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
|
|
350
|
+
try {
|
|
351
|
+
pidx = saisxx_private::suffixsort(T, A, 0, n, k, true);
|
|
352
|
+
if(0 <= pidx) {
|
|
353
|
+
U[0] = T[n - 1];
|
|
354
|
+
for(i = 0; i < pidx; ++i) { U[i + 1] = (char_type)A[i]; }
|
|
355
|
+
for(i += 1; i < n; ++i) { U[i] = (char_type)A[i]; }
|
|
356
|
+
pidx += 1;
|
|
357
|
+
}
|
|
358
|
+
} catch(...) { pidx = -2; }
|
|
359
|
+
return pidx;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
#endif /* __cplusplus */
|
|
364
|
+
#endif /* _SAIS_HXX */
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
#include <stdlib.h>
|
|
3
|
+
#include <cybozu/inttype.hpp>
|
|
4
|
+
|
|
5
|
+
inline uint64_t select8(uint64_t v, size_t r)
|
|
6
|
+
{
|
|
7
|
+
size_t count = 0;
|
|
8
|
+
for (int i = 0; i < 64; i++) {
|
|
9
|
+
if (v & (uint64_t(1) << i))
|
|
10
|
+
count++;
|
|
11
|
+
if (count == r)
|
|
12
|
+
return i;
|
|
13
|
+
}
|
|
14
|
+
return 64;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
int main()
|
|
18
|
+
{
|
|
19
|
+
for (int x = 0; x < 256; x++) {
|
|
20
|
+
printf("{");
|
|
21
|
+
for (int i = 0; i < 8; i++) {
|
|
22
|
+
printf("%d, ", select8(x, i));
|
|
23
|
+
}
|
|
24
|
+
printf("},\n");
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
@echo off
|
|
2
|
+
|
|
3
|
+
if /i "%1"=="release" (
|
|
4
|
+
set mode=Release
|
|
5
|
+
set suf=
|
|
6
|
+
) else (
|
|
7
|
+
set mode=Debug
|
|
8
|
+
set suf=d
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
if /i "%PROCESSOR_ARCHITECTURE%" == "AMD64" (
|
|
12
|
+
set mode="%mode%|x64"
|
|
13
|
+
) else (
|
|
14
|
+
set mode="%mode%|Win32"
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
@echo.
|
|
18
|
+
@echo ******** Building project(%mode%) ********
|
|
19
|
+
@echo.
|
|
20
|
+
mkdir bin lib include
|
|
21
|
+
devenv cybozulib.sln /Build %mode%
|
|
22
|
+
@rem for /F "usebackq" %%p in (`"dir /S /B *.vcproj"`) do devenv %%p /Build %mode%
|
|
23
|
+
@echo.
|
|
24
|
+
@echo ******** Unit test ********
|
|
25
|
+
@echo.
|
|
26
|
+
rm -rf result.txt
|
|
27
|
+
for %%e in (bin\*_test%suf%.exe) do (%%e | grep "ctest:name") >> result.txt
|
|
28
|
+
grep -v "ng=0, exception=0" result.txt
|
|
29
|
+
if %ERRORLEVEL% == 0 goto sample
|
|
30
|
+
echo "all unit tests are ok"
|
|
31
|
+
:sample
|
|
32
|
+
@echo.
|
|
33
|
+
@echo ******** Run sample ********
|
|
34
|
+
@echo.
|
|
35
|
+
bin\exception_smpl%suf%.exe
|
|
36
|
+
|
|
37
|
+
:end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[](https://github.com/herumi/cybozulib/actions/workflows/main.yml)
|
|
2
|
+
|
|
3
|
+
# cybozulib
|
|
4
|
+
|
|
5
|
+
# Abstract
|
|
6
|
+
This is a tiny C++ library for Windows and Linux.
|
|
7
|
+
|
|
8
|
+
# How to use
|
|
9
|
+
|
|
10
|
+
directory position
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
<work dir>/cybozulib
|
|
14
|
+
/cybozulib_ext ; necessary for Windows if openssl, gmp are used
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
# License
|
|
18
|
+
[BSD 3-Clause License](http://opensource.org/licenses/BSD-3-Clause)
|
|
19
|
+
|
|
20
|
+
# Author
|
|
21
|
+
|
|
22
|
+
MITSUNARI Shigeo(herumi@nifty.com)
|
|
23
|
+
|
|
24
|
+
# Disclaimer
|
|
25
|
+
This OSS is my own personal work and does not have any relationship with Cybozu Labs, Inc.,
|
|
26
|
+
Cybozu Inc. or any other organization which I belong to.
|
|
27
|
+
|
|
28
|
+
# sais.hxx
|
|
29
|
+
sais.hxx is written by Yuta Mori.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
����<?xml version="1.0" encoding="utf-8"?>
|
|
2
|
+
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
3
|
+
<ImportGroup Label="PropertySheets" />
|
|
4
|
+
<PropertyGroup Label="UserMacros" />
|
|
5
|
+
<PropertyGroup />
|
|
6
|
+
<ItemDefinitionGroup>
|
|
7
|
+
<ClCompile>
|
|
8
|
+
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
|
9
|
+
</ClCompile>
|
|
10
|
+
</ItemDefinitionGroup>
|
|
11
|
+
<ItemGroup />
|
|
12
|
+
</Project>
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
include ../common.mk
|
|
2
|
+
|
|
3
|
+
TARGET=$(TEST_FILE)
|
|
4
|
+
LIBS=
|
|
5
|
+
|
|
6
|
+
SRC=$(wildcard *smpl.cpp)
|
|
7
|
+
ifneq ($(HAS_EIGEN),1)
|
|
8
|
+
SRC:=$(filter-out svd_bench_smpl.cpp,$(SRC))
|
|
9
|
+
SRC:=$(filter-out svd_smpl.cpp,$(SRC))
|
|
10
|
+
endif
|
|
11
|
+
ifneq ($(HAS_MECAB),1)
|
|
12
|
+
SRC:=$(filter-out mecab_smpl.cpp,$(SRC))
|
|
13
|
+
endif
|
|
14
|
+
|
|
15
|
+
all: $(TARGET)
|
|
16
|
+
|
|
17
|
+
test: $(TARGET)
|
|
18
|
+
@$(SAMPLE_TEST)
|
|
19
|
+
|
|
20
|
+
$(OBJDIR):
|
|
21
|
+
@$(MKDIR) $(OBJDIR)
|
|
22
|
+
|
|
23
|
+
clean:
|
|
24
|
+
$(CLEAN)
|
|
25
|
+
|
|
26
|
+
$(LIBS):
|
|
27
|
+
$(MAKE) -C ../src
|
|
28
|
+
|
|
29
|
+
-include $(DEPEND_FILE)
|
|
30
|
+
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
#include <cybozu/csucvector.hpp>
|
|
2
|
+
#include <cybozu/mmap.hpp>
|
|
3
|
+
#include <sstream>
|
|
4
|
+
|
|
5
|
+
void test(const std::string& inName)
|
|
6
|
+
{
|
|
7
|
+
cybozu::Mmap m(inName);
|
|
8
|
+
const uint64_t *blk = reinterpret_cast<const uint64_t*>(m.get());
|
|
9
|
+
cybozu::CSucVector cv2(blk, m.size() * 8);
|
|
10
|
+
cybozu::CSucVector cv;
|
|
11
|
+
{
|
|
12
|
+
std::stringstream ss;
|
|
13
|
+
cv2.save(ss);
|
|
14
|
+
cv.load(ss);
|
|
15
|
+
const int inSize = (int)m.size();
|
|
16
|
+
const int outSize = (int)ss.str().size();
|
|
17
|
+
printf("rate = %.2f%% %d / %d\n", outSize * 100.0 / inSize, outSize, inSize);
|
|
18
|
+
}
|
|
19
|
+
cybozu::BitVector bv;
|
|
20
|
+
bv.resize(m.size() * 8);
|
|
21
|
+
for (size_t i = 0, n = bv.size(); i < n; i++) {
|
|
22
|
+
if (cv.get(i)) bv.set(i);
|
|
23
|
+
}
|
|
24
|
+
const uint64_t *p = bv.getBlock();
|
|
25
|
+
for (size_t i = 0, n = bv.getBlockSize(); i < n; i++) {
|
|
26
|
+
if (p[i] != blk[i]) {
|
|
27
|
+
printf("err i=%d %llx %llx\n", (int)i, (long long)blk[i], (long long)p[i]);
|
|
28
|
+
exit(1);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
int main(int argc, char *argv[])
|
|
34
|
+
{
|
|
35
|
+
argc--, argv++;
|
|
36
|
+
if (argc != 1) {
|
|
37
|
+
fprintf(stderr, "csucvector_smp.exe\n");
|
|
38
|
+
return 1;
|
|
39
|
+
}
|
|
40
|
+
const std::string inName(argv[0]);
|
|
41
|
+
test(inName);
|
|
42
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/*
|
|
2
|
+
convert <addr> to <file:line addr>
|
|
3
|
+
input stdin
|
|
4
|
+
output stdout
|
|
5
|
+
sudo apt-get install libbfd-dev
|
|
6
|
+
build this with -lbfd
|
|
7
|
+
*/
|
|
8
|
+
#include <stdio.h>
|
|
9
|
+
#include <stdlib.h>
|
|
10
|
+
#include <iostream>
|
|
11
|
+
#include <fstream>
|
|
12
|
+
#include <cybozu/bfd.hpp>
|
|
13
|
+
#include <cybozu/stacktrace.hpp>
|
|
14
|
+
#include <cybozu/atoi.hpp>
|
|
15
|
+
#include <cybozu/option.hpp>
|
|
16
|
+
#include <cybozu/string_operation.hpp>
|
|
17
|
+
|
|
18
|
+
/*
|
|
19
|
+
find "[0x" ([0-9a-f]+) "]"
|
|
20
|
+
*/
|
|
21
|
+
const void* findAddr(const std::string& str)
|
|
22
|
+
{
|
|
23
|
+
size_t p = str.find("[0x");
|
|
24
|
+
if (p == std::string::npos) return 0;
|
|
25
|
+
size_t q = str.find("]", p + 3);
|
|
26
|
+
if (q == std::string::npos) return 0;
|
|
27
|
+
bool b;
|
|
28
|
+
size_t addr = cybozu::hextoi(&b, &str[p + 3], q - p - 3);
|
|
29
|
+
if (!b) return 0;
|
|
30
|
+
return (const void*)addr;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
void decode(std::string& str, cybozu::Bfd& bfd)
|
|
34
|
+
{
|
|
35
|
+
const void *addr = findAddr(str);
|
|
36
|
+
if (addr == 0) return;
|
|
37
|
+
std::string file;
|
|
38
|
+
std::string func;
|
|
39
|
+
int line;
|
|
40
|
+
if (!bfd.getInfo(&file, &func, &line, addr)) return;
|
|
41
|
+
cybozu::Demangle(func, func);
|
|
42
|
+
str = file + ':' + cybozu::itoa(line) + ' ' + func + ' ' + str;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
bool tryDecodeOnelineAndPut(const std::string& str, cybozu::Bfd& bfd)
|
|
46
|
+
{
|
|
47
|
+
const std::string oneline = "<<<STACKTRACE>>> ";
|
|
48
|
+
if (str.find(oneline) != 0) return false;
|
|
49
|
+
printf("<<<STACKTRACE\n");
|
|
50
|
+
std::istringstream iss(str.substr(oneline.size()));
|
|
51
|
+
std::string addrStr;
|
|
52
|
+
while (iss >> addrStr) {
|
|
53
|
+
if (addrStr.find("0x") != 0) break;
|
|
54
|
+
bool b;
|
|
55
|
+
size_t addr = cybozu::hextoi(&b, &addrStr[2], addrStr.size() - 2);
|
|
56
|
+
if (!b) break;
|
|
57
|
+
std::string file;
|
|
58
|
+
std::string func;
|
|
59
|
+
int line;
|
|
60
|
+
if (bfd.getInfo(&file, &func, &line, (const void*)addr)) {
|
|
61
|
+
cybozu::Demangle(func, func);
|
|
62
|
+
printf("%s:%d %s ", file.c_str(), line, func.c_str());
|
|
63
|
+
}
|
|
64
|
+
printf("%s\n", addrStr.c_str());
|
|
65
|
+
}
|
|
66
|
+
printf(">>>STACKTRACE\n");
|
|
67
|
+
return true;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
int main(int argc, char **argv)
|
|
71
|
+
try
|
|
72
|
+
{
|
|
73
|
+
cybozu::Option opt;
|
|
74
|
+
bool doCheckAll = false;
|
|
75
|
+
std::string exeName;
|
|
76
|
+
std::string textName;
|
|
77
|
+
opt.appendBoolOpt(&doCheckAll, "a", ": check all text");
|
|
78
|
+
opt.appendOpt(&textName, "-", "f", ": text file");
|
|
79
|
+
opt.appendParam(&exeName, "exe", ": exec file");
|
|
80
|
+
opt.appendHelp("h", ": put this message");
|
|
81
|
+
if (!opt.parse(argc, argv)) {
|
|
82
|
+
opt.usage();
|
|
83
|
+
return 1;
|
|
84
|
+
}
|
|
85
|
+
const std::string beginStackTrace = "<<<STACKTRACE";
|
|
86
|
+
const std::string endStackTrace = ">>>STACKTRACE";
|
|
87
|
+
bool inStackTrace = false;
|
|
88
|
+
cybozu::Bfd bfd(exeName);
|
|
89
|
+
std::string line;
|
|
90
|
+
std::istream *pis = 0;
|
|
91
|
+
std::ifstream ifs;
|
|
92
|
+
if (textName == "-") {
|
|
93
|
+
pis = &std::cin;
|
|
94
|
+
} else {
|
|
95
|
+
ifs.open(textName.c_str(), std::ios::binary);
|
|
96
|
+
pis = &ifs;
|
|
97
|
+
}
|
|
98
|
+
fprintf(stderr, "textName=%s, exeName=%s\n", textName.c_str(), exeName.c_str());
|
|
99
|
+
while (std::getline(*pis, line)) {
|
|
100
|
+
cybozu::Strip(line);
|
|
101
|
+
if (tryDecodeOnelineAndPut(line, bfd)) {
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
bool doDecode = false;
|
|
105
|
+
if (doCheckAll) {
|
|
106
|
+
doDecode = true;
|
|
107
|
+
} else {
|
|
108
|
+
if (inStackTrace) {
|
|
109
|
+
if (line.find(endStackTrace) == 0) {
|
|
110
|
+
inStackTrace = false;
|
|
111
|
+
} else {
|
|
112
|
+
doDecode = true;
|
|
113
|
+
}
|
|
114
|
+
} else {
|
|
115
|
+
if (line.find(beginStackTrace) == 0) {
|
|
116
|
+
inStackTrace = true;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
if (doDecode) {
|
|
121
|
+
decode(line, bfd);
|
|
122
|
+
}
|
|
123
|
+
printf("%s\n", line.c_str());
|
|
124
|
+
}
|
|
125
|
+
} catch (std::exception& e) {
|
|
126
|
+
printf("ERR %s\n", e.what());
|
|
127
|
+
}
|