ooxml_crypt 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +58 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ext/ooxml_crypt/extconf.rb +18 -0
- data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
- data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
- data/lib/ooxml_crypt/version.rb +5 -0
- data/lib/ooxml_crypt.rb +75 -0
- data/vendor/cybozulib/.github/workflows/main.yml +12 -0
- data/vendor/cybozulib/.gitignore +5 -0
- data/vendor/cybozulib/CMakeLists.txt +6 -0
- data/vendor/cybozulib/COPYRIGHT +27 -0
- data/vendor/cybozulib/Makefile +26 -0
- data/vendor/cybozulib/bin/libeay32.dll +0 -0
- data/vendor/cybozulib/bin/libmecab.dll +0 -0
- data/vendor/cybozulib/bin/ssleay32.dll +0 -0
- data/vendor/cybozulib/common.mk +116 -0
- data/vendor/cybozulib/common.props +25 -0
- data/vendor/cybozulib/cybozulib.sln +286 -0
- data/vendor/cybozulib/debug.props +14 -0
- data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
- data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
- data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
- data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
- data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
- data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
- data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
- data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
- data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
- data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
- data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
- data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
- data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
- data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
- data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
- data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
- data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
- data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
- data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
- data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
- data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
- data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
- data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
- data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
- data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
- data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
- data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
- data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
- data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
- data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
- data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
- data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
- data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
- data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
- data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
- data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
- data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
- data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
- data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
- data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
- data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
- data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
- data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
- data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
- data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
- data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
- data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
- data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
- data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
- data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
- data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
- data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
- data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
- data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
- data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
- data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
- data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
- data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
- data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
- data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
- data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
- data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
- data/vendor/cybozulib/include/sais.hxx +364 -0
- data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
- data/vendor/cybozulib/mk.bat +37 -0
- data/vendor/cybozulib/readme.md +29 -0
- data/vendor/cybozulib/release.props +12 -0
- data/vendor/cybozulib/sample/Makefile +30 -0
- data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
- data/vendor/cybozulib/sample/data/svd/test1 +4 -0
- data/vendor/cybozulib/sample/data/svd/test2 +4 -0
- data/vendor/cybozulib/sample/desymbol.cpp +127 -0
- data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
- data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
- data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
- data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
- data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
- data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
- data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
- data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
- data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
- data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
- data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
- data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
- data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
- data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
- data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
- data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
- data/vendor/cybozulib/src/Makefile +8 -0
- data/vendor/cybozulib/src/base/Makefile +19 -0
- data/vendor/cybozulib/test/Makefile +12 -0
- data/vendor/cybozulib/test/base/Makefile +37 -0
- data/vendor/cybozulib/test/base/array_test.cpp +173 -0
- data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
- data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
- data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
- data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
- data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
- data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
- data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
- data/vendor/cybozulib/test/base/config_test.cpp +236 -0
- data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
- data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
- data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
- data/vendor/cybozulib/test/base/data/a.xml +26 -0
- data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
- data/vendor/cybozulib/test/base/env_test.cpp +22 -0
- data/vendor/cybozulib/test/base/event_test.cpp +41 -0
- data/vendor/cybozulib/test/base/file_test.cpp +233 -0
- data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
- data/vendor/cybozulib/test/base/format_test.cpp +12 -0
- data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
- data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
- data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
- data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
- data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
- data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
- data/vendor/cybozulib/test/base/option_test.cpp +487 -0
- data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
- data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
- data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
- data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
- data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
- data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
- data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
- data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
- data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
- data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
- data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
- data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
- data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
- data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
- data/vendor/cybozulib/test/base/time_test.cpp +164 -0
- data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
- data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
- data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
- data/vendor/cybozulib/test/nlp/Makefile +27 -0
- data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
- data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
- data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
- data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
- data/vendor/cybozulib/tool/create_vcproj.py +186 -0
- data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
- data/vendor/msoffice/COPYRIGHT +27 -0
- data/vendor/msoffice/Makefile +29 -0
- data/vendor/msoffice/bin/64/msoc.dll +0 -0
- data/vendor/msoffice/bin/64/msocsample.exe +0 -0
- data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/bin/msoc.dll +0 -0
- data/vendor/msoffice/bin/msocsample.exe +0 -0
- data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/common.mk +71 -0
- data/vendor/msoffice/common.props +26 -0
- data/vendor/msoffice/debug.props +14 -0
- data/vendor/msoffice/include/attack.hpp +211 -0
- data/vendor/msoffice/include/cfb.hpp +777 -0
- data/vendor/msoffice/include/crypto_util.hpp +450 -0
- data/vendor/msoffice/include/custom_sha1.hpp +342 -0
- data/vendor/msoffice/include/decode.hpp +240 -0
- data/vendor/msoffice/include/encode.hpp +221 -0
- data/vendor/msoffice/include/make_dataspace.hpp +316 -0
- data/vendor/msoffice/include/msoc.h +129 -0
- data/vendor/msoffice/include/resource.hpp +7 -0
- data/vendor/msoffice/include/standard_encryption.hpp +145 -0
- data/vendor/msoffice/include/uint32vec.hpp +179 -0
- data/vendor/msoffice/include/util.hpp +212 -0
- data/vendor/msoffice/lib/.emptydir +0 -0
- data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
- data/vendor/msoffice/mk.bat +1 -0
- data/vendor/msoffice/mkdll.bat +3 -0
- data/vendor/msoffice/msoc.def +13 -0
- data/vendor/msoffice/msocsample.py +178 -0
- data/vendor/msoffice/msoffice12.sln +31 -0
- data/vendor/msoffice/readme.md +110 -0
- data/vendor/msoffice/release.props +28 -0
- data/vendor/msoffice/src/Makefile +19 -0
- data/vendor/msoffice/src/attack.cpp +124 -0
- data/vendor/msoffice/src/cfb_test.cpp +77 -0
- data/vendor/msoffice/src/minisample.c +54 -0
- data/vendor/msoffice/src/msocdll.cpp +276 -0
- data/vendor/msoffice/src/msocsample.c +136 -0
- data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
- data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
- data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
- data/vendor/msoffice/src/sha1.cpp +234 -0
- data/vendor/msoffice/test/Makefile +20 -0
- data/vendor/msoffice/test/cfb_test.cpp +74 -0
- data/vendor/msoffice/test/hash_test.cpp +59 -0
- data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
- data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
- data/vendor/msoffice/test/sampl.bat +8 -0
- data/vendor/msoffice/test_all.py +46 -0
- data/vendor/update +4 -0
- metadata +351 -0
@@ -0,0 +1,291 @@
|
|
1
|
+
#pragma once
|
2
|
+
/**
|
3
|
+
@file
|
4
|
+
@brief FM-index
|
5
|
+
@author MITSUNARI Shigeo(@herumi)
|
6
|
+
@license modified new BSD license
|
7
|
+
http://opensource.org/licenses/BSD-3-Clause
|
8
|
+
*/
|
9
|
+
#include <map>
|
10
|
+
#include <vector>
|
11
|
+
#include <fstream>
|
12
|
+
#include <stdio.h>
|
13
|
+
#ifdef CYBOZU_FMINDEX_USE_CSUCVECTOR
|
14
|
+
#include <cybozu/csucvector.hpp>
|
15
|
+
#endif
|
16
|
+
#include <cybozu/wavelet_matrix.hpp>
|
17
|
+
#include <cybozu/bitvector.hpp>
|
18
|
+
#include <cybozu/frequency.hpp>
|
19
|
+
|
20
|
+
#ifdef _MSC_VER
|
21
|
+
#pragma warning(push)
|
22
|
+
#pragma warning(disable:4244)
|
23
|
+
#pragma warning(disable:4389)
|
24
|
+
#pragma warning(disable:4018)
|
25
|
+
#endif
|
26
|
+
#include "sais.hxx"
|
27
|
+
#ifdef _MSC_VER
|
28
|
+
#pragma warning(pop)
|
29
|
+
#endif
|
30
|
+
|
31
|
+
#ifdef _MSC_VER
|
32
|
+
#pragma warning(push)
|
33
|
+
#pragma warning(disable:4127) // constant condition
|
34
|
+
#endif
|
35
|
+
|
36
|
+
namespace cybozu {
|
37
|
+
/*
|
38
|
+
T : type of alphabet
|
39
|
+
isRawData : deal with input data as is
|
40
|
+
T must be uint8_t or uint16_t if isRawData
|
41
|
+
*/
|
42
|
+
template<class T, bool isRawData = false>
|
43
|
+
class FMindexT {
|
44
|
+
public:
|
45
|
+
static const size_t maxCharNum = size_t(1) << (sizeof(T) * 8);
|
46
|
+
typedef std::vector<uint32_t> Vec32;
|
47
|
+
typedef std::vector<T> Vec;
|
48
|
+
#ifdef CYBOZU_FMINDEX_USE_CSUCVECTOR
|
49
|
+
typedef cybozu::CSucVector SucVector;
|
50
|
+
#else
|
51
|
+
typedef cybozu::SucVectorT<uint32_t, false> SucVector;
|
52
|
+
#endif
|
53
|
+
typedef cybozu::WaveletMatrixT<false, SucVector> WaveletMatrix;
|
54
|
+
Vec32 cf;
|
55
|
+
WaveletMatrix wm;
|
56
|
+
Vec32 alignedSa;
|
57
|
+
SucVector alignedPos;
|
58
|
+
cybozu::Frequency<T, uint32_t> freq;
|
59
|
+
int skip_;
|
60
|
+
size_t charNum_;
|
61
|
+
|
62
|
+
/*
|
63
|
+
setup freq, cf by [begin, end)
|
64
|
+
*/
|
65
|
+
template<class Iter>
|
66
|
+
void initCf(Vec& v, Iter begin, Iter end)
|
67
|
+
{
|
68
|
+
const size_t size = std::distance(begin, end);
|
69
|
+
if (size >= (uint64_t(1) << 32) - 1) {
|
70
|
+
throw cybozu::Exception("FMindexT:initCf:too large dataSize") << size;
|
71
|
+
}
|
72
|
+
v.resize(size + 1); // add NUL at the end of data
|
73
|
+
if (isRawData) {
|
74
|
+
assert(sizeof(T) <= 16);
|
75
|
+
charNum_ = size_t(1) << (sizeof(T) * 8);
|
76
|
+
std::vector<uint32_t> charNumTbl(charNum_);
|
77
|
+
charNumTbl[0] = 1;
|
78
|
+
for (size_t i = 0; i < size; i++) {
|
79
|
+
T c = *begin++;
|
80
|
+
if (c <= 0) throw cybozu::Exception("FMindext:initCf:zero alphabet") << c;
|
81
|
+
v[i] = c;
|
82
|
+
charNumTbl[c]++;
|
83
|
+
}
|
84
|
+
cf.resize(charNum_);
|
85
|
+
uint32_t sum = 0;
|
86
|
+
for (size_t i = 0; i < charNum_; i++) {
|
87
|
+
cf[i] = sum;
|
88
|
+
sum += charNumTbl[i];
|
89
|
+
}
|
90
|
+
} else {
|
91
|
+
freq.init(begin, end);
|
92
|
+
charNum_ = freq.size() + 1; // +1 means last zero
|
93
|
+
if (charNum_ > maxCharNum) throw cybozu::Exception("FMindexT:initCf:too many alphabet");
|
94
|
+
for (size_t i = 0; i < size; i++) {
|
95
|
+
v[i] = static_cast<T>(freq.getIndex(*begin++) + 1);
|
96
|
+
}
|
97
|
+
cf.resize(charNum_);
|
98
|
+
cf[0] = 0;
|
99
|
+
uint32_t sum = 1;
|
100
|
+
for (size_t i = 1; i < charNum_; i++) {
|
101
|
+
cf[i] = sum;
|
102
|
+
sum += freq.getFrequency(freq.getElement(i - 1));
|
103
|
+
}
|
104
|
+
}
|
105
|
+
}
|
106
|
+
void initBwt(Vec& bwt, const Vec& s, const Vec32& sa) const
|
107
|
+
{
|
108
|
+
const size_t size = sa.size();
|
109
|
+
bwt.resize(size);
|
110
|
+
for (size_t i = 0; i < size; i++) {
|
111
|
+
if (sa[i] > 0) {
|
112
|
+
bwt[i] = s[sa[i] - 1];
|
113
|
+
} else {
|
114
|
+
bwt[i] = s[size - 1];
|
115
|
+
}
|
116
|
+
}
|
117
|
+
}
|
118
|
+
size_t getBitLen(size_t x) const
|
119
|
+
{
|
120
|
+
if (x == 0) return 1;
|
121
|
+
size_t ret = 0;
|
122
|
+
while (x > 0) {
|
123
|
+
x >>= 1;
|
124
|
+
ret++;
|
125
|
+
}
|
126
|
+
return ret;
|
127
|
+
}
|
128
|
+
public:
|
129
|
+
FMindexT()
|
130
|
+
: skip_(8)
|
131
|
+
, charNum_(0)
|
132
|
+
{
|
133
|
+
}
|
134
|
+
|
135
|
+
/*
|
136
|
+
[begin, end)
|
137
|
+
replace '\0' in [begin, end) with space
|
138
|
+
append '\0' at the end of [begin, end)
|
139
|
+
*/
|
140
|
+
template<class Iter>
|
141
|
+
void init(Iter begin, Iter end, int skip = 8)
|
142
|
+
{
|
143
|
+
if (skip <= 0) {
|
144
|
+
throw cybozu::Exception("FMindexT:buildFMindex:skip is positive") << skip;
|
145
|
+
}
|
146
|
+
skip_ = skip;
|
147
|
+
Vec v;
|
148
|
+
initCf(v, begin, end);
|
149
|
+
const size_t dataSize = v.size();
|
150
|
+
|
151
|
+
Vec32 sa;
|
152
|
+
sa.resize(dataSize);
|
153
|
+
if (saisxx(&v[0], &sa[0], (int)dataSize, (int)charNum_) == -1) {
|
154
|
+
throw cybozu::Exception("FMindexT:init:saisxx");
|
155
|
+
}
|
156
|
+
Vec bwt;
|
157
|
+
initBwt(bwt, v, sa);
|
158
|
+
wm.init(bwt, getBitLen(charNum_));
|
159
|
+
|
160
|
+
#if 1
|
161
|
+
cybozu::BitVector bv;
|
162
|
+
bv.resize(dataSize);
|
163
|
+
for (size_t i = 0; i < dataSize; i++) {
|
164
|
+
if ((sa[i] % skip) == 0) {
|
165
|
+
bv.set(i);
|
166
|
+
alignedSa.push_back(sa[i]);
|
167
|
+
}
|
168
|
+
}
|
169
|
+
alignedPos.init(bv.getBlock(), bv.size());
|
170
|
+
#else
|
171
|
+
alignedPos.resize(dataSize);
|
172
|
+
for (size_t i = 0; i < dataSize; i++) {
|
173
|
+
if ((sa[i] % skip) == 0) {
|
174
|
+
alignedPos.set(i);
|
175
|
+
alignedSa.push_back(sa[i]);
|
176
|
+
}
|
177
|
+
}
|
178
|
+
alignedPos.ready();
|
179
|
+
#endif
|
180
|
+
}
|
181
|
+
|
182
|
+
/*
|
183
|
+
get range of bwt for key
|
184
|
+
*/
|
185
|
+
template<class Int, class Key>
|
186
|
+
bool getRange(Int* pbegin, Int* pend, const Key& _key) const
|
187
|
+
{
|
188
|
+
if (_key.empty()) return false;
|
189
|
+
const size_t keySize = _key.size();
|
190
|
+
const typename Key::value_type *key;
|
191
|
+
Key cvtKey;
|
192
|
+
if (isRawData) {
|
193
|
+
key = &_key[0];
|
194
|
+
} else {
|
195
|
+
cvtKey.resize(keySize);
|
196
|
+
for (size_t i = 0; i < keySize; i++) {
|
197
|
+
if (freq.getFrequency(_key[i]) == 0) return false;
|
198
|
+
cvtKey[i] = typename Key::value_type(freq.getIndex(_key[i]) + 1);
|
199
|
+
}
|
200
|
+
key = &cvtKey[0];
|
201
|
+
}
|
202
|
+
size_t i = keySize - 1;
|
203
|
+
size_t begin = 0;
|
204
|
+
size_t end = wm.size();
|
205
|
+
while (begin < end) {
|
206
|
+
const T c = key[i];
|
207
|
+
const uint32_t cfc = cf[c];
|
208
|
+
begin = cfc + wm.rank(c, begin);
|
209
|
+
end = cfc + wm.rank(c, end);
|
210
|
+
if (i == 0) break;
|
211
|
+
i--;
|
212
|
+
}
|
213
|
+
|
214
|
+
if (begin < end) {
|
215
|
+
*pbegin = Int(begin);
|
216
|
+
*pend = Int(end);
|
217
|
+
return true;
|
218
|
+
}
|
219
|
+
return false;
|
220
|
+
}
|
221
|
+
template<class Int>
|
222
|
+
bool getRange(Int* pbegin, Int* pend, const char *key) const
|
223
|
+
{
|
224
|
+
return getRange(pbegin, pend, std::string(key));
|
225
|
+
}
|
226
|
+
size_t convertPosition(size_t bwtPos) const
|
227
|
+
{
|
228
|
+
size_t t = 0;
|
229
|
+
while (!alignedPos.get(bwtPos)) {
|
230
|
+
T c;
|
231
|
+
bwtPos = wm.get(&c, bwtPos);
|
232
|
+
bwtPos += cf[c];
|
233
|
+
t++;
|
234
|
+
}
|
235
|
+
return t + alignedSa[alignedPos.rank1(bwtPos)];
|
236
|
+
}
|
237
|
+
/*
|
238
|
+
get previous string at pos
|
239
|
+
@note assume T is vector or std::string
|
240
|
+
*/
|
241
|
+
template<class Str>
|
242
|
+
void getPrevString(Str& str, size_t bwtPos, size_t len) const
|
243
|
+
{
|
244
|
+
str.resize(len);
|
245
|
+
T c;
|
246
|
+
while (len > 0) {
|
247
|
+
bwtPos = wm.get(&c, bwtPos);
|
248
|
+
bwtPos += cf[c];
|
249
|
+
if (c == 0) {
|
250
|
+
str.erase(str.begin(), str.begin() + len);
|
251
|
+
return;
|
252
|
+
}
|
253
|
+
len--;
|
254
|
+
str[len] = isRawData ? c : freq.getElement(c - 1);
|
255
|
+
}
|
256
|
+
}
|
257
|
+
|
258
|
+
template<class OutputStream>
|
259
|
+
void save(OutputStream& os) const
|
260
|
+
{
|
261
|
+
cybozu::save(os, skip_);
|
262
|
+
cybozu::savePodVec(os, cf);
|
263
|
+
wm.save(os);
|
264
|
+
cybozu::savePodVec(os, alignedSa);
|
265
|
+
alignedPos.save(os);
|
266
|
+
if (!isRawData) freq.save(os);
|
267
|
+
}
|
268
|
+
template<class InputStream>
|
269
|
+
void load(InputStream& is)
|
270
|
+
{
|
271
|
+
cybozu::load(skip_, is);
|
272
|
+
cybozu::loadPodVec(cf, is);
|
273
|
+
wm.load(is);
|
274
|
+
cybozu::loadPodVec(alignedSa, is);
|
275
|
+
alignedPos.load(is);
|
276
|
+
if (isRawData) {
|
277
|
+
charNum_ = size_t(1) << (sizeof(T) * 8);
|
278
|
+
} else {
|
279
|
+
freq.load(is);
|
280
|
+
charNum_ = freq.size();
|
281
|
+
}
|
282
|
+
}
|
283
|
+
};
|
284
|
+
|
285
|
+
typedef FMindexT<uint8_t> FMindex;
|
286
|
+
|
287
|
+
} // cybozu
|
288
|
+
|
289
|
+
#ifdef _MSC_VER
|
290
|
+
#pragma warning(pop)
|
291
|
+
#endif
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#pragma once
|
2
|
+
/**
|
3
|
+
@file
|
4
|
+
@brief format string
|
5
|
+
@author MITSUNARI Shigeo(@herumi)
|
6
|
+
*/
|
7
|
+
#include <string>
|
8
|
+
#include <stdio.h>
|
9
|
+
#include <stdarg.h>
|
10
|
+
#include <stdlib.h>
|
11
|
+
#include <cybozu/exception.hpp>
|
12
|
+
|
13
|
+
#if defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
|
14
|
+
#define CYBOZU_FORMAT_DISABLE_WARNING
|
15
|
+
#pragma GCC diagnostic push
|
16
|
+
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
|
17
|
+
#endif
|
18
|
+
|
19
|
+
namespace cybozu {
|
20
|
+
|
21
|
+
inline void vformat(std::string& str, const char *format, va_list args)
|
22
|
+
{
|
23
|
+
#ifdef _MSC_VER
|
24
|
+
_locale_t curLoc = _get_current_locale();
|
25
|
+
int size = _vscprintf_l(format, curLoc, args);
|
26
|
+
if (size < 0 || size >= INT_MAX) throw cybozu::Exception("vformat:_vscprintf_l");
|
27
|
+
|
28
|
+
str.resize(size + 1);
|
29
|
+
|
30
|
+
int ret = _vsprintf_s_l(&str[0], size + 1, format, curLoc, args);
|
31
|
+
if (ret < 0) throw cybozu::Exception("vformat:_vsprintf_s_l");
|
32
|
+
str.resize(size);
|
33
|
+
#else
|
34
|
+
#if 1
|
35
|
+
char *p;
|
36
|
+
int ret = vasprintf(&p, format, args);
|
37
|
+
if (ret < 0) throw cybozu::Exception("vformat:vasnprintf");
|
38
|
+
try {
|
39
|
+
str.assign(p, ret);
|
40
|
+
free(p);
|
41
|
+
} catch (...) {
|
42
|
+
free(p);
|
43
|
+
throw std::bad_alloc();
|
44
|
+
}
|
45
|
+
#else
|
46
|
+
// slow
|
47
|
+
va_list keep;
|
48
|
+
va_copy(keep, args);
|
49
|
+
int len = vsnprintf(0, 0, format, args); // len excludes the null byte
|
50
|
+
if (len < 0) throw cybozu::Exception("vformat:vasnprintf err1");
|
51
|
+
str.resize(len + 1);
|
52
|
+
len = vsnprintf(&str[0], str.size(), format, keep); // len incluedes the null byte
|
53
|
+
if (len < 0) throw cybozu::Exception("vformat:vasnprintf err2");
|
54
|
+
str.resize(len);
|
55
|
+
#endif
|
56
|
+
#endif
|
57
|
+
}
|
58
|
+
|
59
|
+
#ifdef _MSC_VER
|
60
|
+
#define CYBOZU_FORMAT_PRINTF _Printf_format_string_
|
61
|
+
#else
|
62
|
+
#define CYBOZU_FORMAT_PRINTF
|
63
|
+
#endif
|
64
|
+
|
65
|
+
#ifdef __GNUC__
|
66
|
+
__attribute__((format(printf, 2, 3)))
|
67
|
+
#endif
|
68
|
+
inline void format(std::string& str, CYBOZU_FORMAT_PRINTF const char *format, ...)
|
69
|
+
{
|
70
|
+
va_list args;
|
71
|
+
va_start(args, format);
|
72
|
+
cybozu::vformat(str, format, args);
|
73
|
+
va_end(args);
|
74
|
+
}
|
75
|
+
|
76
|
+
#ifdef __GNUC__
|
77
|
+
__attribute__((format(printf, 1, 2)))
|
78
|
+
#endif
|
79
|
+
inline std::string format(CYBOZU_FORMAT_PRINTF const char *format, ...)
|
80
|
+
{
|
81
|
+
std::string str;
|
82
|
+
va_list args;
|
83
|
+
va_start(args, format);
|
84
|
+
cybozu::vformat(str, format, args);
|
85
|
+
va_end(args);
|
86
|
+
return str;
|
87
|
+
}
|
88
|
+
|
89
|
+
} // cybozu
|
90
|
+
|
91
|
+
#ifdef CYBOZU_FORMAT_DISABLE_WARNING
|
92
|
+
#pragma GCC diagnostic push
|
93
|
+
#endif
|
@@ -0,0 +1,264 @@
|
|
1
|
+
#pragma once
|
2
|
+
/**
|
3
|
+
@file
|
4
|
+
@brief frequency of elements in a sequence
|
5
|
+
@author MITSUNARI Shigeo(@herumi)
|
6
|
+
@license modified new BSD license
|
7
|
+
http://opensource.org/licenses/BSD-3-Clause
|
8
|
+
*/
|
9
|
+
#include <assert.h>
|
10
|
+
#include <vector>
|
11
|
+
#include <algorithm>
|
12
|
+
#include <functional>
|
13
|
+
#include <iostream>
|
14
|
+
#include <cybozu/exception.hpp>
|
15
|
+
#include <cybozu/unordered_map.hpp>
|
16
|
+
#include <cybozu/serializer.hpp>
|
17
|
+
|
18
|
+
namespace cybozu {
|
19
|
+
|
20
|
+
namespace freq_local {
|
21
|
+
|
22
|
+
template<class Element, class Int = size_t>
|
23
|
+
class FrequencyVec {
|
24
|
+
static const size_t N = size_t(1) << (sizeof(Element) * 8);
|
25
|
+
size_t size_;
|
26
|
+
Int freqTbl_[N];
|
27
|
+
uint8_t char2idx_[N];
|
28
|
+
uint8_t idx2char_[N];
|
29
|
+
struct Greater {
|
30
|
+
const Int *p_;
|
31
|
+
explicit Greater(const Int *p) : p_(p) {}
|
32
|
+
bool operator()(uint8_t lhs, uint8_t rhs) const
|
33
|
+
{
|
34
|
+
Int a = p_[lhs];
|
35
|
+
Int b = p_[rhs];
|
36
|
+
if (a > b) return true;
|
37
|
+
if (a < b) return false;
|
38
|
+
return a > b;
|
39
|
+
}
|
40
|
+
};
|
41
|
+
public:
|
42
|
+
typedef Element value_type;
|
43
|
+
typedef Int size_type;
|
44
|
+
|
45
|
+
FrequencyVec() { clear(); }
|
46
|
+
template<class Iter>
|
47
|
+
FrequencyVec(Iter begin, Iter end)
|
48
|
+
{
|
49
|
+
clear();
|
50
|
+
init(begin, end);
|
51
|
+
}
|
52
|
+
void clear()
|
53
|
+
{
|
54
|
+
size_ = 0;
|
55
|
+
memset(freqTbl_, 0, sizeof(freqTbl_));
|
56
|
+
}
|
57
|
+
template<class Iter>
|
58
|
+
void init(Iter begin, Iter end)
|
59
|
+
{
|
60
|
+
while (begin != end) {
|
61
|
+
append(*begin);
|
62
|
+
++begin;
|
63
|
+
}
|
64
|
+
ready();
|
65
|
+
}
|
66
|
+
void append(const Element e)
|
67
|
+
{
|
68
|
+
freqTbl_[uint8_t(e)]++;
|
69
|
+
}
|
70
|
+
void ready()
|
71
|
+
{
|
72
|
+
for (size_t i = 0; i < N; i++) idx2char_[i] = uint8_t(i);
|
73
|
+
Greater greater(freqTbl_);
|
74
|
+
std::sort(idx2char_, idx2char_ + N, greater);
|
75
|
+
size_ = 0;
|
76
|
+
for (size_t i = 0; i < N; i++) {
|
77
|
+
uint8_t c = idx2char_[i];
|
78
|
+
char2idx_[c] = (uint8_t)i;
|
79
|
+
if (freqTbl_[c]) size_++;
|
80
|
+
}
|
81
|
+
}
|
82
|
+
/*
|
83
|
+
element -> freq
|
84
|
+
*/
|
85
|
+
Int getFrequency(Element e) const { return freqTbl_[uint8_t(e)]; }
|
86
|
+
/*
|
87
|
+
element -> idx
|
88
|
+
*/
|
89
|
+
Int getIndex(Element e) const { return char2idx_[uint8_t(e)]; }
|
90
|
+
/*
|
91
|
+
idx -> element
|
92
|
+
*/
|
93
|
+
Element getElement(size_t idx) const
|
94
|
+
{
|
95
|
+
// if (idx >= N) throw cybozu::Exception("Frequency:getElement:bad idx") << idx;
|
96
|
+
assert(idx < N);
|
97
|
+
return Element(idx2char_[idx]);
|
98
|
+
}
|
99
|
+
size_t size() const { return size_; }
|
100
|
+
template<class InputStream>
|
101
|
+
void load(InputStream& is)
|
102
|
+
{
|
103
|
+
cybozu::load(size_, is);
|
104
|
+
cybozu::loadRange(freqTbl_, N, is);
|
105
|
+
cybozu::loadRange(char2idx_, N, is);
|
106
|
+
cybozu::loadRange(idx2char_, N, is);
|
107
|
+
}
|
108
|
+
void save(std::ostream& os) const
|
109
|
+
{
|
110
|
+
cybozu::save(os, size_);
|
111
|
+
cybozu::saveRange(os, freqTbl_, N);
|
112
|
+
cybozu::saveRange(os, char2idx_, N);
|
113
|
+
cybozu::saveRange(os, idx2char_, N);
|
114
|
+
}
|
115
|
+
void put() const
|
116
|
+
{
|
117
|
+
for (size_t i = 0; i < size_; i++) {
|
118
|
+
uint8_t c = idx2char_[i];
|
119
|
+
printf("%d %d %d\n", (int)i, c, freqTbl_[c]);
|
120
|
+
}
|
121
|
+
}
|
122
|
+
};
|
123
|
+
|
124
|
+
} // cybozu::freq_local
|
125
|
+
|
126
|
+
/*
|
127
|
+
count Element
|
128
|
+
Element : type of element
|
129
|
+
Int : type of counter
|
130
|
+
*/
|
131
|
+
template<class Element, class Int = size_t>
|
132
|
+
class Frequency {
|
133
|
+
struct FreqIdx {
|
134
|
+
Int freq;
|
135
|
+
mutable Int idx;
|
136
|
+
template<class InputStream>
|
137
|
+
void load(InputStream& is)
|
138
|
+
{
|
139
|
+
cybozu::load(freq, is);
|
140
|
+
cybozu::load(idx, is);
|
141
|
+
}
|
142
|
+
template<class OutputStream>
|
143
|
+
void save(OutputStream& os) const
|
144
|
+
{
|
145
|
+
cybozu::save(os, freq);
|
146
|
+
cybozu::save(os, idx);
|
147
|
+
}
|
148
|
+
};
|
149
|
+
typedef CYBOZU_NAMESPACE_STD::unordered_map<Element, FreqIdx> Map;
|
150
|
+
typedef Element value_type;
|
151
|
+
typedef Int size_type;
|
152
|
+
typedef std::vector<typename Map::const_iterator> Idx2Ref;
|
153
|
+
static inline bool greater(typename Map::const_iterator i, typename Map::const_iterator j)
|
154
|
+
{
|
155
|
+
const Int a = i->second.freq;
|
156
|
+
const Int b = j->second.freq;
|
157
|
+
if (a > b) return true;
|
158
|
+
if (a < b) return false;
|
159
|
+
return i->first > j->first;
|
160
|
+
}
|
161
|
+
Map m_;
|
162
|
+
Idx2Ref idx2ref_;
|
163
|
+
void initIdx2Ref()
|
164
|
+
{
|
165
|
+
idx2ref_.resize(m_.size());
|
166
|
+
size_t pos = 0;
|
167
|
+
for (typename Map::const_iterator i = m_.begin(), ie = m_.end(); i != ie; ++i) {
|
168
|
+
idx2ref_[pos++] = i;
|
169
|
+
}
|
170
|
+
std::sort(idx2ref_.begin(), idx2ref_.end(), greater);
|
171
|
+
}
|
172
|
+
public:
|
173
|
+
Frequency(){ clear(); }
|
174
|
+
template<class Iter>
|
175
|
+
Frequency(Iter begin, Iter end)
|
176
|
+
{
|
177
|
+
clear();
|
178
|
+
init(begin, end);
|
179
|
+
}
|
180
|
+
void clear()
|
181
|
+
{
|
182
|
+
m_.clear();
|
183
|
+
idx2ref_.clear();
|
184
|
+
}
|
185
|
+
template<class Iter>
|
186
|
+
void init(Iter begin, Iter end)
|
187
|
+
{
|
188
|
+
while (begin != end) {
|
189
|
+
append(*begin);
|
190
|
+
++begin;
|
191
|
+
}
|
192
|
+
ready();
|
193
|
+
}
|
194
|
+
void append(const Element& e)
|
195
|
+
{
|
196
|
+
m_[e].freq++;
|
197
|
+
}
|
198
|
+
void ready()
|
199
|
+
{
|
200
|
+
initIdx2Ref();
|
201
|
+
for (size_t i = 0, ie = idx2ref_.size(); i < ie; i++) {
|
202
|
+
idx2ref_[i]->second.idx = (Int)i;
|
203
|
+
}
|
204
|
+
}
|
205
|
+
/*
|
206
|
+
element -> freq
|
207
|
+
*/
|
208
|
+
Int getFrequency(const Element& e) const
|
209
|
+
{
|
210
|
+
typename Map::const_iterator i = m_.find(e);
|
211
|
+
return (i != m_.end()) ? i->second.freq : 0;
|
212
|
+
}
|
213
|
+
/*
|
214
|
+
element -> idx
|
215
|
+
*/
|
216
|
+
Int getIndex(const Element& e) const
|
217
|
+
{
|
218
|
+
typename Map::const_iterator i = m_.find(e);
|
219
|
+
if (i == m_.end()) throw cybozu::Exception("Frequency:getIndex:not found") << e;
|
220
|
+
return i->second.idx;
|
221
|
+
}
|
222
|
+
/*
|
223
|
+
idx -> element
|
224
|
+
*/
|
225
|
+
const Element& getElement(size_t idx) const
|
226
|
+
{
|
227
|
+
if (idx >= idx2ref_.size()) throw cybozu::Exception("Frequency:getElement:bad idx") << idx;
|
228
|
+
return idx2ref_[idx]->first;
|
229
|
+
}
|
230
|
+
size_t size() const { return idx2ref_.size(); }
|
231
|
+
template<class InputStream>
|
232
|
+
void load(InputStream& is)
|
233
|
+
{
|
234
|
+
cybozu::load(m_, is);
|
235
|
+
initIdx2Ref();
|
236
|
+
}
|
237
|
+
template<class OutputStream>
|
238
|
+
void save(OutputStream& os) const
|
239
|
+
{
|
240
|
+
cybozu::save(os, m_);
|
241
|
+
}
|
242
|
+
void put() const
|
243
|
+
{
|
244
|
+
for (size_t i = 0, n = idx2ref_.size(); i < n; i++) {
|
245
|
+
typename Map::const_iterator j = idx2ref_[i];
|
246
|
+
std::cout << i << ' ' << j->first << ' ' << j->second.freq << std::endl;
|
247
|
+
}
|
248
|
+
}
|
249
|
+
};
|
250
|
+
|
251
|
+
template<class Int>
|
252
|
+
struct Frequency<uint8_t, Int> : freq_local::FrequencyVec<uint8_t, Int> {
|
253
|
+
Frequency() {}
|
254
|
+
template<class Iterator>
|
255
|
+
Frequency(Iterator begin, Iterator end) : freq_local::FrequencyVec<uint8_t, Int>(begin, end) {}
|
256
|
+
};
|
257
|
+
template<class Int>
|
258
|
+
struct Frequency<char, Int> : freq_local::FrequencyVec<char, Int> {
|
259
|
+
Frequency() {}
|
260
|
+
template<class Iterator>
|
261
|
+
Frequency(Iterator begin, Iterator end) : freq_local::FrequencyVec<char, Int>(begin, end) {}
|
262
|
+
};
|
263
|
+
|
264
|
+
} // cybozu
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#pragma once
|
2
|
+
#include <cybozu/inttype.hpp>
|
3
|
+
|
4
|
+
namespace cybozu {
|
5
|
+
|
6
|
+
template<class Iter>
|
7
|
+
uint32_t hash32(Iter begin, Iter end, uint32_t v = 0)
|
8
|
+
{
|
9
|
+
if (v == 0) v = 2166136261U;
|
10
|
+
while (begin != end) {
|
11
|
+
v ^= *begin++;
|
12
|
+
v *= 16777619;
|
13
|
+
}
|
14
|
+
return v;
|
15
|
+
}
|
16
|
+
template<class Iter>
|
17
|
+
uint64_t hash64(Iter begin, Iter end, uint64_t v = 0)
|
18
|
+
{
|
19
|
+
if (v == 0) v = 14695981039346656037ULL;
|
20
|
+
while (begin != end) {
|
21
|
+
v ^= *begin++;
|
22
|
+
v *= 1099511628211ULL;
|
23
|
+
}
|
24
|
+
v ^= v >> 32;
|
25
|
+
return v;
|
26
|
+
}
|
27
|
+
template<class T>
|
28
|
+
uint32_t hash32(const T *x, size_t n, uint32_t v = 0)
|
29
|
+
{
|
30
|
+
return hash32(x, x + n, v);
|
31
|
+
}
|
32
|
+
template<class T>
|
33
|
+
uint64_t hash64(const T *x, size_t n, uint64_t v = 0)
|
34
|
+
{
|
35
|
+
return hash64(x, x + n, v);
|
36
|
+
}
|
37
|
+
|
38
|
+
} // cybozu
|
39
|
+
|
40
|
+
namespace boost {
|
41
|
+
|
42
|
+
template<class T>
|
43
|
+
struct hash;
|
44
|
+
|
45
|
+
} // boost
|
46
|
+
|
47
|
+
#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
|
48
|
+
#include <functional>
|
49
|
+
#else
|
50
|
+
|
51
|
+
namespace std { CYBOZU_NAMESPACE_TR1_BEGIN
|
52
|
+
|
53
|
+
#ifdef _MSC_VER
|
54
|
+
#pragma warning(push)
|
55
|
+
#pragma warning(disable : 4099) // missmatch class and struct
|
56
|
+
#endif
|
57
|
+
#if !(defined(__APPLE__) && defined(__clang__))
|
58
|
+
template<class T>
|
59
|
+
struct hash;
|
60
|
+
#endif
|
61
|
+
#ifdef _MSC_VER
|
62
|
+
#pragma warning(pop)
|
63
|
+
#endif
|
64
|
+
|
65
|
+
CYBOZU_NAMESPACE_TR1_END } // std
|
66
|
+
|
67
|
+
#endif
|