ooxml_crypt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +58 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ext/ooxml_crypt/extconf.rb +18 -0
- data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
- data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
- data/lib/ooxml_crypt/version.rb +5 -0
- data/lib/ooxml_crypt.rb +75 -0
- data/vendor/cybozulib/.github/workflows/main.yml +12 -0
- data/vendor/cybozulib/.gitignore +5 -0
- data/vendor/cybozulib/CMakeLists.txt +6 -0
- data/vendor/cybozulib/COPYRIGHT +27 -0
- data/vendor/cybozulib/Makefile +26 -0
- data/vendor/cybozulib/bin/libeay32.dll +0 -0
- data/vendor/cybozulib/bin/libmecab.dll +0 -0
- data/vendor/cybozulib/bin/ssleay32.dll +0 -0
- data/vendor/cybozulib/common.mk +116 -0
- data/vendor/cybozulib/common.props +25 -0
- data/vendor/cybozulib/cybozulib.sln +286 -0
- data/vendor/cybozulib/debug.props +14 -0
- data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
- data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
- data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
- data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
- data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
- data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
- data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
- data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
- data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
- data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
- data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
- data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
- data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
- data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
- data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
- data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
- data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
- data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
- data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
- data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
- data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
- data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
- data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
- data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
- data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
- data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
- data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
- data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
- data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
- data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
- data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
- data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
- data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
- data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
- data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
- data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
- data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
- data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
- data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
- data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
- data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
- data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
- data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
- data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
- data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
- data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
- data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
- data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
- data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
- data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
- data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
- data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
- data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
- data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
- data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
- data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
- data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
- data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
- data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
- data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
- data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
- data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
- data/vendor/cybozulib/include/sais.hxx +364 -0
- data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
- data/vendor/cybozulib/mk.bat +37 -0
- data/vendor/cybozulib/readme.md +29 -0
- data/vendor/cybozulib/release.props +12 -0
- data/vendor/cybozulib/sample/Makefile +30 -0
- data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
- data/vendor/cybozulib/sample/data/svd/test1 +4 -0
- data/vendor/cybozulib/sample/data/svd/test2 +4 -0
- data/vendor/cybozulib/sample/desymbol.cpp +127 -0
- data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
- data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
- data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
- data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
- data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
- data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
- data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
- data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
- data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
- data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
- data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
- data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
- data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
- data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
- data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
- data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
- data/vendor/cybozulib/src/Makefile +8 -0
- data/vendor/cybozulib/src/base/Makefile +19 -0
- data/vendor/cybozulib/test/Makefile +12 -0
- data/vendor/cybozulib/test/base/Makefile +37 -0
- data/vendor/cybozulib/test/base/array_test.cpp +173 -0
- data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
- data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
- data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
- data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
- data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
- data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
- data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
- data/vendor/cybozulib/test/base/config_test.cpp +236 -0
- data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
- data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
- data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
- data/vendor/cybozulib/test/base/data/a.xml +26 -0
- data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
- data/vendor/cybozulib/test/base/env_test.cpp +22 -0
- data/vendor/cybozulib/test/base/event_test.cpp +41 -0
- data/vendor/cybozulib/test/base/file_test.cpp +233 -0
- data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
- data/vendor/cybozulib/test/base/format_test.cpp +12 -0
- data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
- data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
- data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
- data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
- data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
- data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
- data/vendor/cybozulib/test/base/option_test.cpp +487 -0
- data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
- data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
- data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
- data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
- data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
- data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
- data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
- data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
- data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
- data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
- data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
- data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
- data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
- data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
- data/vendor/cybozulib/test/base/time_test.cpp +164 -0
- data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
- data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
- data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
- data/vendor/cybozulib/test/nlp/Makefile +27 -0
- data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
- data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
- data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
- data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
- data/vendor/cybozulib/tool/create_vcproj.py +186 -0
- data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
- data/vendor/msoffice/COPYRIGHT +27 -0
- data/vendor/msoffice/Makefile +29 -0
- data/vendor/msoffice/bin/64/msoc.dll +0 -0
- data/vendor/msoffice/bin/64/msocsample.exe +0 -0
- data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/bin/msoc.dll +0 -0
- data/vendor/msoffice/bin/msocsample.exe +0 -0
- data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/common.mk +71 -0
- data/vendor/msoffice/common.props +26 -0
- data/vendor/msoffice/debug.props +14 -0
- data/vendor/msoffice/include/attack.hpp +211 -0
- data/vendor/msoffice/include/cfb.hpp +777 -0
- data/vendor/msoffice/include/crypto_util.hpp +450 -0
- data/vendor/msoffice/include/custom_sha1.hpp +342 -0
- data/vendor/msoffice/include/decode.hpp +240 -0
- data/vendor/msoffice/include/encode.hpp +221 -0
- data/vendor/msoffice/include/make_dataspace.hpp +316 -0
- data/vendor/msoffice/include/msoc.h +129 -0
- data/vendor/msoffice/include/resource.hpp +7 -0
- data/vendor/msoffice/include/standard_encryption.hpp +145 -0
- data/vendor/msoffice/include/uint32vec.hpp +179 -0
- data/vendor/msoffice/include/util.hpp +212 -0
- data/vendor/msoffice/lib/.emptydir +0 -0
- data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
- data/vendor/msoffice/mk.bat +1 -0
- data/vendor/msoffice/mkdll.bat +3 -0
- data/vendor/msoffice/msoc.def +13 -0
- data/vendor/msoffice/msocsample.py +178 -0
- data/vendor/msoffice/msoffice12.sln +31 -0
- data/vendor/msoffice/readme.md +110 -0
- data/vendor/msoffice/release.props +28 -0
- data/vendor/msoffice/src/Makefile +19 -0
- data/vendor/msoffice/src/attack.cpp +124 -0
- data/vendor/msoffice/src/cfb_test.cpp +77 -0
- data/vendor/msoffice/src/minisample.c +54 -0
- data/vendor/msoffice/src/msocdll.cpp +276 -0
- data/vendor/msoffice/src/msocsample.c +136 -0
- data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
- data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
- data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
- data/vendor/msoffice/src/sha1.cpp +234 -0
- data/vendor/msoffice/test/Makefile +20 -0
- data/vendor/msoffice/test/cfb_test.cpp +74 -0
- data/vendor/msoffice/test/hash_test.cpp +59 -0
- data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
- data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
- data/vendor/msoffice/test/sampl.bat +8 -0
- data/vendor/msoffice/test_all.py +46 -0
- data/vendor/update +4 -0
- metadata +351 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
/**
|
|
3
|
+
@file
|
|
4
|
+
@brief FM-index
|
|
5
|
+
@author MITSUNARI Shigeo(@herumi)
|
|
6
|
+
@license modified new BSD license
|
|
7
|
+
http://opensource.org/licenses/BSD-3-Clause
|
|
8
|
+
*/
|
|
9
|
+
#include <map>
|
|
10
|
+
#include <vector>
|
|
11
|
+
#include <fstream>
|
|
12
|
+
#include <stdio.h>
|
|
13
|
+
#ifdef CYBOZU_FMINDEX_USE_CSUCVECTOR
|
|
14
|
+
#include <cybozu/csucvector.hpp>
|
|
15
|
+
#endif
|
|
16
|
+
#include <cybozu/wavelet_matrix.hpp>
|
|
17
|
+
#include <cybozu/bitvector.hpp>
|
|
18
|
+
#include <cybozu/frequency.hpp>
|
|
19
|
+
|
|
20
|
+
#ifdef _MSC_VER
|
|
21
|
+
#pragma warning(push)
|
|
22
|
+
#pragma warning(disable:4244)
|
|
23
|
+
#pragma warning(disable:4389)
|
|
24
|
+
#pragma warning(disable:4018)
|
|
25
|
+
#endif
|
|
26
|
+
#include "sais.hxx"
|
|
27
|
+
#ifdef _MSC_VER
|
|
28
|
+
#pragma warning(pop)
|
|
29
|
+
#endif
|
|
30
|
+
|
|
31
|
+
#ifdef _MSC_VER
|
|
32
|
+
#pragma warning(push)
|
|
33
|
+
#pragma warning(disable:4127) // constant condition
|
|
34
|
+
#endif
|
|
35
|
+
|
|
36
|
+
namespace cybozu {
|
|
37
|
+
/*
|
|
38
|
+
T : type of alphabet
|
|
39
|
+
isRawData : deal with input data as is
|
|
40
|
+
T must be uint8_t or uint16_t if isRawData
|
|
41
|
+
*/
|
|
42
|
+
template<class T, bool isRawData = false>
|
|
43
|
+
class FMindexT {
|
|
44
|
+
public:
|
|
45
|
+
static const size_t maxCharNum = size_t(1) << (sizeof(T) * 8);
|
|
46
|
+
typedef std::vector<uint32_t> Vec32;
|
|
47
|
+
typedef std::vector<T> Vec;
|
|
48
|
+
#ifdef CYBOZU_FMINDEX_USE_CSUCVECTOR
|
|
49
|
+
typedef cybozu::CSucVector SucVector;
|
|
50
|
+
#else
|
|
51
|
+
typedef cybozu::SucVectorT<uint32_t, false> SucVector;
|
|
52
|
+
#endif
|
|
53
|
+
typedef cybozu::WaveletMatrixT<false, SucVector> WaveletMatrix;
|
|
54
|
+
Vec32 cf;
|
|
55
|
+
WaveletMatrix wm;
|
|
56
|
+
Vec32 alignedSa;
|
|
57
|
+
SucVector alignedPos;
|
|
58
|
+
cybozu::Frequency<T, uint32_t> freq;
|
|
59
|
+
int skip_;
|
|
60
|
+
size_t charNum_;
|
|
61
|
+
|
|
62
|
+
/*
|
|
63
|
+
setup freq, cf by [begin, end)
|
|
64
|
+
*/
|
|
65
|
+
template<class Iter>
|
|
66
|
+
void initCf(Vec& v, Iter begin, Iter end)
|
|
67
|
+
{
|
|
68
|
+
const size_t size = std::distance(begin, end);
|
|
69
|
+
if (size >= (uint64_t(1) << 32) - 1) {
|
|
70
|
+
throw cybozu::Exception("FMindexT:initCf:too large dataSize") << size;
|
|
71
|
+
}
|
|
72
|
+
v.resize(size + 1); // add NUL at the end of data
|
|
73
|
+
if (isRawData) {
|
|
74
|
+
assert(sizeof(T) <= 16);
|
|
75
|
+
charNum_ = size_t(1) << (sizeof(T) * 8);
|
|
76
|
+
std::vector<uint32_t> charNumTbl(charNum_);
|
|
77
|
+
charNumTbl[0] = 1;
|
|
78
|
+
for (size_t i = 0; i < size; i++) {
|
|
79
|
+
T c = *begin++;
|
|
80
|
+
if (c <= 0) throw cybozu::Exception("FMindext:initCf:zero alphabet") << c;
|
|
81
|
+
v[i] = c;
|
|
82
|
+
charNumTbl[c]++;
|
|
83
|
+
}
|
|
84
|
+
cf.resize(charNum_);
|
|
85
|
+
uint32_t sum = 0;
|
|
86
|
+
for (size_t i = 0; i < charNum_; i++) {
|
|
87
|
+
cf[i] = sum;
|
|
88
|
+
sum += charNumTbl[i];
|
|
89
|
+
}
|
|
90
|
+
} else {
|
|
91
|
+
freq.init(begin, end);
|
|
92
|
+
charNum_ = freq.size() + 1; // +1 means last zero
|
|
93
|
+
if (charNum_ > maxCharNum) throw cybozu::Exception("FMindexT:initCf:too many alphabet");
|
|
94
|
+
for (size_t i = 0; i < size; i++) {
|
|
95
|
+
v[i] = static_cast<T>(freq.getIndex(*begin++) + 1);
|
|
96
|
+
}
|
|
97
|
+
cf.resize(charNum_);
|
|
98
|
+
cf[0] = 0;
|
|
99
|
+
uint32_t sum = 1;
|
|
100
|
+
for (size_t i = 1; i < charNum_; i++) {
|
|
101
|
+
cf[i] = sum;
|
|
102
|
+
sum += freq.getFrequency(freq.getElement(i - 1));
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
void initBwt(Vec& bwt, const Vec& s, const Vec32& sa) const
|
|
107
|
+
{
|
|
108
|
+
const size_t size = sa.size();
|
|
109
|
+
bwt.resize(size);
|
|
110
|
+
for (size_t i = 0; i < size; i++) {
|
|
111
|
+
if (sa[i] > 0) {
|
|
112
|
+
bwt[i] = s[sa[i] - 1];
|
|
113
|
+
} else {
|
|
114
|
+
bwt[i] = s[size - 1];
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
size_t getBitLen(size_t x) const
|
|
119
|
+
{
|
|
120
|
+
if (x == 0) return 1;
|
|
121
|
+
size_t ret = 0;
|
|
122
|
+
while (x > 0) {
|
|
123
|
+
x >>= 1;
|
|
124
|
+
ret++;
|
|
125
|
+
}
|
|
126
|
+
return ret;
|
|
127
|
+
}
|
|
128
|
+
public:
|
|
129
|
+
FMindexT()
|
|
130
|
+
: skip_(8)
|
|
131
|
+
, charNum_(0)
|
|
132
|
+
{
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/*
|
|
136
|
+
[begin, end)
|
|
137
|
+
replace '\0' in [begin, end) with space
|
|
138
|
+
append '\0' at the end of [begin, end)
|
|
139
|
+
*/
|
|
140
|
+
template<class Iter>
|
|
141
|
+
void init(Iter begin, Iter end, int skip = 8)
|
|
142
|
+
{
|
|
143
|
+
if (skip <= 0) {
|
|
144
|
+
throw cybozu::Exception("FMindexT:buildFMindex:skip is positive") << skip;
|
|
145
|
+
}
|
|
146
|
+
skip_ = skip;
|
|
147
|
+
Vec v;
|
|
148
|
+
initCf(v, begin, end);
|
|
149
|
+
const size_t dataSize = v.size();
|
|
150
|
+
|
|
151
|
+
Vec32 sa;
|
|
152
|
+
sa.resize(dataSize);
|
|
153
|
+
if (saisxx(&v[0], &sa[0], (int)dataSize, (int)charNum_) == -1) {
|
|
154
|
+
throw cybozu::Exception("FMindexT:init:saisxx");
|
|
155
|
+
}
|
|
156
|
+
Vec bwt;
|
|
157
|
+
initBwt(bwt, v, sa);
|
|
158
|
+
wm.init(bwt, getBitLen(charNum_));
|
|
159
|
+
|
|
160
|
+
#if 1
|
|
161
|
+
cybozu::BitVector bv;
|
|
162
|
+
bv.resize(dataSize);
|
|
163
|
+
for (size_t i = 0; i < dataSize; i++) {
|
|
164
|
+
if ((sa[i] % skip) == 0) {
|
|
165
|
+
bv.set(i);
|
|
166
|
+
alignedSa.push_back(sa[i]);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
alignedPos.init(bv.getBlock(), bv.size());
|
|
170
|
+
#else
|
|
171
|
+
alignedPos.resize(dataSize);
|
|
172
|
+
for (size_t i = 0; i < dataSize; i++) {
|
|
173
|
+
if ((sa[i] % skip) == 0) {
|
|
174
|
+
alignedPos.set(i);
|
|
175
|
+
alignedSa.push_back(sa[i]);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
alignedPos.ready();
|
|
179
|
+
#endif
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/*
|
|
183
|
+
get range of bwt for key
|
|
184
|
+
*/
|
|
185
|
+
template<class Int, class Key>
|
|
186
|
+
bool getRange(Int* pbegin, Int* pend, const Key& _key) const
|
|
187
|
+
{
|
|
188
|
+
if (_key.empty()) return false;
|
|
189
|
+
const size_t keySize = _key.size();
|
|
190
|
+
const typename Key::value_type *key;
|
|
191
|
+
Key cvtKey;
|
|
192
|
+
if (isRawData) {
|
|
193
|
+
key = &_key[0];
|
|
194
|
+
} else {
|
|
195
|
+
cvtKey.resize(keySize);
|
|
196
|
+
for (size_t i = 0; i < keySize; i++) {
|
|
197
|
+
if (freq.getFrequency(_key[i]) == 0) return false;
|
|
198
|
+
cvtKey[i] = typename Key::value_type(freq.getIndex(_key[i]) + 1);
|
|
199
|
+
}
|
|
200
|
+
key = &cvtKey[0];
|
|
201
|
+
}
|
|
202
|
+
size_t i = keySize - 1;
|
|
203
|
+
size_t begin = 0;
|
|
204
|
+
size_t end = wm.size();
|
|
205
|
+
while (begin < end) {
|
|
206
|
+
const T c = key[i];
|
|
207
|
+
const uint32_t cfc = cf[c];
|
|
208
|
+
begin = cfc + wm.rank(c, begin);
|
|
209
|
+
end = cfc + wm.rank(c, end);
|
|
210
|
+
if (i == 0) break;
|
|
211
|
+
i--;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (begin < end) {
|
|
215
|
+
*pbegin = Int(begin);
|
|
216
|
+
*pend = Int(end);
|
|
217
|
+
return true;
|
|
218
|
+
}
|
|
219
|
+
return false;
|
|
220
|
+
}
|
|
221
|
+
template<class Int>
|
|
222
|
+
bool getRange(Int* pbegin, Int* pend, const char *key) const
|
|
223
|
+
{
|
|
224
|
+
return getRange(pbegin, pend, std::string(key));
|
|
225
|
+
}
|
|
226
|
+
size_t convertPosition(size_t bwtPos) const
|
|
227
|
+
{
|
|
228
|
+
size_t t = 0;
|
|
229
|
+
while (!alignedPos.get(bwtPos)) {
|
|
230
|
+
T c;
|
|
231
|
+
bwtPos = wm.get(&c, bwtPos);
|
|
232
|
+
bwtPos += cf[c];
|
|
233
|
+
t++;
|
|
234
|
+
}
|
|
235
|
+
return t + alignedSa[alignedPos.rank1(bwtPos)];
|
|
236
|
+
}
|
|
237
|
+
/*
|
|
238
|
+
get previous string at pos
|
|
239
|
+
@note assume T is vector or std::string
|
|
240
|
+
*/
|
|
241
|
+
template<class Str>
|
|
242
|
+
void getPrevString(Str& str, size_t bwtPos, size_t len) const
|
|
243
|
+
{
|
|
244
|
+
str.resize(len);
|
|
245
|
+
T c;
|
|
246
|
+
while (len > 0) {
|
|
247
|
+
bwtPos = wm.get(&c, bwtPos);
|
|
248
|
+
bwtPos += cf[c];
|
|
249
|
+
if (c == 0) {
|
|
250
|
+
str.erase(str.begin(), str.begin() + len);
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
len--;
|
|
254
|
+
str[len] = isRawData ? c : freq.getElement(c - 1);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
template<class OutputStream>
|
|
259
|
+
void save(OutputStream& os) const
|
|
260
|
+
{
|
|
261
|
+
cybozu::save(os, skip_);
|
|
262
|
+
cybozu::savePodVec(os, cf);
|
|
263
|
+
wm.save(os);
|
|
264
|
+
cybozu::savePodVec(os, alignedSa);
|
|
265
|
+
alignedPos.save(os);
|
|
266
|
+
if (!isRawData) freq.save(os);
|
|
267
|
+
}
|
|
268
|
+
template<class InputStream>
|
|
269
|
+
void load(InputStream& is)
|
|
270
|
+
{
|
|
271
|
+
cybozu::load(skip_, is);
|
|
272
|
+
cybozu::loadPodVec(cf, is);
|
|
273
|
+
wm.load(is);
|
|
274
|
+
cybozu::loadPodVec(alignedSa, is);
|
|
275
|
+
alignedPos.load(is);
|
|
276
|
+
if (isRawData) {
|
|
277
|
+
charNum_ = size_t(1) << (sizeof(T) * 8);
|
|
278
|
+
} else {
|
|
279
|
+
freq.load(is);
|
|
280
|
+
charNum_ = freq.size();
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
typedef FMindexT<uint8_t> FMindex;
|
|
286
|
+
|
|
287
|
+
} // cybozu
|
|
288
|
+
|
|
289
|
+
#ifdef _MSC_VER
|
|
290
|
+
#pragma warning(pop)
|
|
291
|
+
#endif
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
/**
|
|
3
|
+
@file
|
|
4
|
+
@brief format string
|
|
5
|
+
@author MITSUNARI Shigeo(@herumi)
|
|
6
|
+
*/
|
|
7
|
+
#include <string>
|
|
8
|
+
#include <stdio.h>
|
|
9
|
+
#include <stdarg.h>
|
|
10
|
+
#include <stdlib.h>
|
|
11
|
+
#include <cybozu/exception.hpp>
|
|
12
|
+
|
|
13
|
+
#if defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
|
|
14
|
+
#define CYBOZU_FORMAT_DISABLE_WARNING
|
|
15
|
+
#pragma GCC diagnostic push
|
|
16
|
+
#pragma GCC diagnostic ignored "-Wformat-nonliteral"
|
|
17
|
+
#endif
|
|
18
|
+
|
|
19
|
+
namespace cybozu {
|
|
20
|
+
|
|
21
|
+
inline void vformat(std::string& str, const char *format, va_list args)
|
|
22
|
+
{
|
|
23
|
+
#ifdef _MSC_VER
|
|
24
|
+
_locale_t curLoc = _get_current_locale();
|
|
25
|
+
int size = _vscprintf_l(format, curLoc, args);
|
|
26
|
+
if (size < 0 || size >= INT_MAX) throw cybozu::Exception("vformat:_vscprintf_l");
|
|
27
|
+
|
|
28
|
+
str.resize(size + 1);
|
|
29
|
+
|
|
30
|
+
int ret = _vsprintf_s_l(&str[0], size + 1, format, curLoc, args);
|
|
31
|
+
if (ret < 0) throw cybozu::Exception("vformat:_vsprintf_s_l");
|
|
32
|
+
str.resize(size);
|
|
33
|
+
#else
|
|
34
|
+
#if 1
|
|
35
|
+
char *p;
|
|
36
|
+
int ret = vasprintf(&p, format, args);
|
|
37
|
+
if (ret < 0) throw cybozu::Exception("vformat:vasnprintf");
|
|
38
|
+
try {
|
|
39
|
+
str.assign(p, ret);
|
|
40
|
+
free(p);
|
|
41
|
+
} catch (...) {
|
|
42
|
+
free(p);
|
|
43
|
+
throw std::bad_alloc();
|
|
44
|
+
}
|
|
45
|
+
#else
|
|
46
|
+
// slow
|
|
47
|
+
va_list keep;
|
|
48
|
+
va_copy(keep, args);
|
|
49
|
+
int len = vsnprintf(0, 0, format, args); // len excludes the null byte
|
|
50
|
+
if (len < 0) throw cybozu::Exception("vformat:vasnprintf err1");
|
|
51
|
+
str.resize(len + 1);
|
|
52
|
+
len = vsnprintf(&str[0], str.size(), format, keep); // len incluedes the null byte
|
|
53
|
+
if (len < 0) throw cybozu::Exception("vformat:vasnprintf err2");
|
|
54
|
+
str.resize(len);
|
|
55
|
+
#endif
|
|
56
|
+
#endif
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
#ifdef _MSC_VER
|
|
60
|
+
#define CYBOZU_FORMAT_PRINTF _Printf_format_string_
|
|
61
|
+
#else
|
|
62
|
+
#define CYBOZU_FORMAT_PRINTF
|
|
63
|
+
#endif
|
|
64
|
+
|
|
65
|
+
#ifdef __GNUC__
|
|
66
|
+
__attribute__((format(printf, 2, 3)))
|
|
67
|
+
#endif
|
|
68
|
+
inline void format(std::string& str, CYBOZU_FORMAT_PRINTF const char *format, ...)
|
|
69
|
+
{
|
|
70
|
+
va_list args;
|
|
71
|
+
va_start(args, format);
|
|
72
|
+
cybozu::vformat(str, format, args);
|
|
73
|
+
va_end(args);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
#ifdef __GNUC__
|
|
77
|
+
__attribute__((format(printf, 1, 2)))
|
|
78
|
+
#endif
|
|
79
|
+
inline std::string format(CYBOZU_FORMAT_PRINTF const char *format, ...)
|
|
80
|
+
{
|
|
81
|
+
std::string str;
|
|
82
|
+
va_list args;
|
|
83
|
+
va_start(args, format);
|
|
84
|
+
cybozu::vformat(str, format, args);
|
|
85
|
+
va_end(args);
|
|
86
|
+
return str;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
} // cybozu
|
|
90
|
+
|
|
91
|
+
#ifdef CYBOZU_FORMAT_DISABLE_WARNING
|
|
92
|
+
#pragma GCC diagnostic push
|
|
93
|
+
#endif
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
/**
|
|
3
|
+
@file
|
|
4
|
+
@brief frequency of elements in a sequence
|
|
5
|
+
@author MITSUNARI Shigeo(@herumi)
|
|
6
|
+
@license modified new BSD license
|
|
7
|
+
http://opensource.org/licenses/BSD-3-Clause
|
|
8
|
+
*/
|
|
9
|
+
#include <assert.h>
|
|
10
|
+
#include <vector>
|
|
11
|
+
#include <algorithm>
|
|
12
|
+
#include <functional>
|
|
13
|
+
#include <iostream>
|
|
14
|
+
#include <cybozu/exception.hpp>
|
|
15
|
+
#include <cybozu/unordered_map.hpp>
|
|
16
|
+
#include <cybozu/serializer.hpp>
|
|
17
|
+
|
|
18
|
+
namespace cybozu {
|
|
19
|
+
|
|
20
|
+
namespace freq_local {
|
|
21
|
+
|
|
22
|
+
template<class Element, class Int = size_t>
|
|
23
|
+
class FrequencyVec {
|
|
24
|
+
static const size_t N = size_t(1) << (sizeof(Element) * 8);
|
|
25
|
+
size_t size_;
|
|
26
|
+
Int freqTbl_[N];
|
|
27
|
+
uint8_t char2idx_[N];
|
|
28
|
+
uint8_t idx2char_[N];
|
|
29
|
+
struct Greater {
|
|
30
|
+
const Int *p_;
|
|
31
|
+
explicit Greater(const Int *p) : p_(p) {}
|
|
32
|
+
bool operator()(uint8_t lhs, uint8_t rhs) const
|
|
33
|
+
{
|
|
34
|
+
Int a = p_[lhs];
|
|
35
|
+
Int b = p_[rhs];
|
|
36
|
+
if (a > b) return true;
|
|
37
|
+
if (a < b) return false;
|
|
38
|
+
return a > b;
|
|
39
|
+
}
|
|
40
|
+
};
|
|
41
|
+
public:
|
|
42
|
+
typedef Element value_type;
|
|
43
|
+
typedef Int size_type;
|
|
44
|
+
|
|
45
|
+
FrequencyVec() { clear(); }
|
|
46
|
+
template<class Iter>
|
|
47
|
+
FrequencyVec(Iter begin, Iter end)
|
|
48
|
+
{
|
|
49
|
+
clear();
|
|
50
|
+
init(begin, end);
|
|
51
|
+
}
|
|
52
|
+
void clear()
|
|
53
|
+
{
|
|
54
|
+
size_ = 0;
|
|
55
|
+
memset(freqTbl_, 0, sizeof(freqTbl_));
|
|
56
|
+
}
|
|
57
|
+
template<class Iter>
|
|
58
|
+
void init(Iter begin, Iter end)
|
|
59
|
+
{
|
|
60
|
+
while (begin != end) {
|
|
61
|
+
append(*begin);
|
|
62
|
+
++begin;
|
|
63
|
+
}
|
|
64
|
+
ready();
|
|
65
|
+
}
|
|
66
|
+
void append(const Element e)
|
|
67
|
+
{
|
|
68
|
+
freqTbl_[uint8_t(e)]++;
|
|
69
|
+
}
|
|
70
|
+
void ready()
|
|
71
|
+
{
|
|
72
|
+
for (size_t i = 0; i < N; i++) idx2char_[i] = uint8_t(i);
|
|
73
|
+
Greater greater(freqTbl_);
|
|
74
|
+
std::sort(idx2char_, idx2char_ + N, greater);
|
|
75
|
+
size_ = 0;
|
|
76
|
+
for (size_t i = 0; i < N; i++) {
|
|
77
|
+
uint8_t c = idx2char_[i];
|
|
78
|
+
char2idx_[c] = (uint8_t)i;
|
|
79
|
+
if (freqTbl_[c]) size_++;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/*
|
|
83
|
+
element -> freq
|
|
84
|
+
*/
|
|
85
|
+
Int getFrequency(Element e) const { return freqTbl_[uint8_t(e)]; }
|
|
86
|
+
/*
|
|
87
|
+
element -> idx
|
|
88
|
+
*/
|
|
89
|
+
Int getIndex(Element e) const { return char2idx_[uint8_t(e)]; }
|
|
90
|
+
/*
|
|
91
|
+
idx -> element
|
|
92
|
+
*/
|
|
93
|
+
Element getElement(size_t idx) const
|
|
94
|
+
{
|
|
95
|
+
// if (idx >= N) throw cybozu::Exception("Frequency:getElement:bad idx") << idx;
|
|
96
|
+
assert(idx < N);
|
|
97
|
+
return Element(idx2char_[idx]);
|
|
98
|
+
}
|
|
99
|
+
size_t size() const { return size_; }
|
|
100
|
+
template<class InputStream>
|
|
101
|
+
void load(InputStream& is)
|
|
102
|
+
{
|
|
103
|
+
cybozu::load(size_, is);
|
|
104
|
+
cybozu::loadRange(freqTbl_, N, is);
|
|
105
|
+
cybozu::loadRange(char2idx_, N, is);
|
|
106
|
+
cybozu::loadRange(idx2char_, N, is);
|
|
107
|
+
}
|
|
108
|
+
void save(std::ostream& os) const
|
|
109
|
+
{
|
|
110
|
+
cybozu::save(os, size_);
|
|
111
|
+
cybozu::saveRange(os, freqTbl_, N);
|
|
112
|
+
cybozu::saveRange(os, char2idx_, N);
|
|
113
|
+
cybozu::saveRange(os, idx2char_, N);
|
|
114
|
+
}
|
|
115
|
+
void put() const
|
|
116
|
+
{
|
|
117
|
+
for (size_t i = 0; i < size_; i++) {
|
|
118
|
+
uint8_t c = idx2char_[i];
|
|
119
|
+
printf("%d %d %d\n", (int)i, c, freqTbl_[c]);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
} // cybozu::freq_local
|
|
125
|
+
|
|
126
|
+
/*
|
|
127
|
+
count Element
|
|
128
|
+
Element : type of element
|
|
129
|
+
Int : type of counter
|
|
130
|
+
*/
|
|
131
|
+
template<class Element, class Int = size_t>
|
|
132
|
+
class Frequency {
|
|
133
|
+
struct FreqIdx {
|
|
134
|
+
Int freq;
|
|
135
|
+
mutable Int idx;
|
|
136
|
+
template<class InputStream>
|
|
137
|
+
void load(InputStream& is)
|
|
138
|
+
{
|
|
139
|
+
cybozu::load(freq, is);
|
|
140
|
+
cybozu::load(idx, is);
|
|
141
|
+
}
|
|
142
|
+
template<class OutputStream>
|
|
143
|
+
void save(OutputStream& os) const
|
|
144
|
+
{
|
|
145
|
+
cybozu::save(os, freq);
|
|
146
|
+
cybozu::save(os, idx);
|
|
147
|
+
}
|
|
148
|
+
};
|
|
149
|
+
typedef CYBOZU_NAMESPACE_STD::unordered_map<Element, FreqIdx> Map;
|
|
150
|
+
typedef Element value_type;
|
|
151
|
+
typedef Int size_type;
|
|
152
|
+
typedef std::vector<typename Map::const_iterator> Idx2Ref;
|
|
153
|
+
static inline bool greater(typename Map::const_iterator i, typename Map::const_iterator j)
|
|
154
|
+
{
|
|
155
|
+
const Int a = i->second.freq;
|
|
156
|
+
const Int b = j->second.freq;
|
|
157
|
+
if (a > b) return true;
|
|
158
|
+
if (a < b) return false;
|
|
159
|
+
return i->first > j->first;
|
|
160
|
+
}
|
|
161
|
+
Map m_;
|
|
162
|
+
Idx2Ref idx2ref_;
|
|
163
|
+
void initIdx2Ref()
|
|
164
|
+
{
|
|
165
|
+
idx2ref_.resize(m_.size());
|
|
166
|
+
size_t pos = 0;
|
|
167
|
+
for (typename Map::const_iterator i = m_.begin(), ie = m_.end(); i != ie; ++i) {
|
|
168
|
+
idx2ref_[pos++] = i;
|
|
169
|
+
}
|
|
170
|
+
std::sort(idx2ref_.begin(), idx2ref_.end(), greater);
|
|
171
|
+
}
|
|
172
|
+
public:
|
|
173
|
+
Frequency(){ clear(); }
|
|
174
|
+
template<class Iter>
|
|
175
|
+
Frequency(Iter begin, Iter end)
|
|
176
|
+
{
|
|
177
|
+
clear();
|
|
178
|
+
init(begin, end);
|
|
179
|
+
}
|
|
180
|
+
void clear()
|
|
181
|
+
{
|
|
182
|
+
m_.clear();
|
|
183
|
+
idx2ref_.clear();
|
|
184
|
+
}
|
|
185
|
+
template<class Iter>
|
|
186
|
+
void init(Iter begin, Iter end)
|
|
187
|
+
{
|
|
188
|
+
while (begin != end) {
|
|
189
|
+
append(*begin);
|
|
190
|
+
++begin;
|
|
191
|
+
}
|
|
192
|
+
ready();
|
|
193
|
+
}
|
|
194
|
+
void append(const Element& e)
|
|
195
|
+
{
|
|
196
|
+
m_[e].freq++;
|
|
197
|
+
}
|
|
198
|
+
void ready()
|
|
199
|
+
{
|
|
200
|
+
initIdx2Ref();
|
|
201
|
+
for (size_t i = 0, ie = idx2ref_.size(); i < ie; i++) {
|
|
202
|
+
idx2ref_[i]->second.idx = (Int)i;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
/*
|
|
206
|
+
element -> freq
|
|
207
|
+
*/
|
|
208
|
+
Int getFrequency(const Element& e) const
|
|
209
|
+
{
|
|
210
|
+
typename Map::const_iterator i = m_.find(e);
|
|
211
|
+
return (i != m_.end()) ? i->second.freq : 0;
|
|
212
|
+
}
|
|
213
|
+
/*
|
|
214
|
+
element -> idx
|
|
215
|
+
*/
|
|
216
|
+
Int getIndex(const Element& e) const
|
|
217
|
+
{
|
|
218
|
+
typename Map::const_iterator i = m_.find(e);
|
|
219
|
+
if (i == m_.end()) throw cybozu::Exception("Frequency:getIndex:not found") << e;
|
|
220
|
+
return i->second.idx;
|
|
221
|
+
}
|
|
222
|
+
/*
|
|
223
|
+
idx -> element
|
|
224
|
+
*/
|
|
225
|
+
const Element& getElement(size_t idx) const
|
|
226
|
+
{
|
|
227
|
+
if (idx >= idx2ref_.size()) throw cybozu::Exception("Frequency:getElement:bad idx") << idx;
|
|
228
|
+
return idx2ref_[idx]->first;
|
|
229
|
+
}
|
|
230
|
+
size_t size() const { return idx2ref_.size(); }
|
|
231
|
+
template<class InputStream>
|
|
232
|
+
void load(InputStream& is)
|
|
233
|
+
{
|
|
234
|
+
cybozu::load(m_, is);
|
|
235
|
+
initIdx2Ref();
|
|
236
|
+
}
|
|
237
|
+
template<class OutputStream>
|
|
238
|
+
void save(OutputStream& os) const
|
|
239
|
+
{
|
|
240
|
+
cybozu::save(os, m_);
|
|
241
|
+
}
|
|
242
|
+
void put() const
|
|
243
|
+
{
|
|
244
|
+
for (size_t i = 0, n = idx2ref_.size(); i < n; i++) {
|
|
245
|
+
typename Map::const_iterator j = idx2ref_[i];
|
|
246
|
+
std::cout << i << ' ' << j->first << ' ' << j->second.freq << std::endl;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
template<class Int>
|
|
252
|
+
struct Frequency<uint8_t, Int> : freq_local::FrequencyVec<uint8_t, Int> {
|
|
253
|
+
Frequency() {}
|
|
254
|
+
template<class Iterator>
|
|
255
|
+
Frequency(Iterator begin, Iterator end) : freq_local::FrequencyVec<uint8_t, Int>(begin, end) {}
|
|
256
|
+
};
|
|
257
|
+
template<class Int>
|
|
258
|
+
struct Frequency<char, Int> : freq_local::FrequencyVec<char, Int> {
|
|
259
|
+
Frequency() {}
|
|
260
|
+
template<class Iterator>
|
|
261
|
+
Frequency(Iterator begin, Iterator end) : freq_local::FrequencyVec<char, Int>(begin, end) {}
|
|
262
|
+
};
|
|
263
|
+
|
|
264
|
+
} // cybozu
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <cybozu/inttype.hpp>
|
|
3
|
+
|
|
4
|
+
namespace cybozu {
|
|
5
|
+
|
|
6
|
+
template<class Iter>
|
|
7
|
+
uint32_t hash32(Iter begin, Iter end, uint32_t v = 0)
|
|
8
|
+
{
|
|
9
|
+
if (v == 0) v = 2166136261U;
|
|
10
|
+
while (begin != end) {
|
|
11
|
+
v ^= *begin++;
|
|
12
|
+
v *= 16777619;
|
|
13
|
+
}
|
|
14
|
+
return v;
|
|
15
|
+
}
|
|
16
|
+
template<class Iter>
|
|
17
|
+
uint64_t hash64(Iter begin, Iter end, uint64_t v = 0)
|
|
18
|
+
{
|
|
19
|
+
if (v == 0) v = 14695981039346656037ULL;
|
|
20
|
+
while (begin != end) {
|
|
21
|
+
v ^= *begin++;
|
|
22
|
+
v *= 1099511628211ULL;
|
|
23
|
+
}
|
|
24
|
+
v ^= v >> 32;
|
|
25
|
+
return v;
|
|
26
|
+
}
|
|
27
|
+
template<class T>
|
|
28
|
+
uint32_t hash32(const T *x, size_t n, uint32_t v = 0)
|
|
29
|
+
{
|
|
30
|
+
return hash32(x, x + n, v);
|
|
31
|
+
}
|
|
32
|
+
template<class T>
|
|
33
|
+
uint64_t hash64(const T *x, size_t n, uint64_t v = 0)
|
|
34
|
+
{
|
|
35
|
+
return hash64(x, x + n, v);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
} // cybozu
|
|
39
|
+
|
|
40
|
+
namespace boost {
|
|
41
|
+
|
|
42
|
+
template<class T>
|
|
43
|
+
struct hash;
|
|
44
|
+
|
|
45
|
+
} // boost
|
|
46
|
+
|
|
47
|
+
#if CYBOZU_CPP_VERSION >= CYBOZU_CPP_VERSION_CPP11
|
|
48
|
+
#include <functional>
|
|
49
|
+
#else
|
|
50
|
+
|
|
51
|
+
namespace std { CYBOZU_NAMESPACE_TR1_BEGIN
|
|
52
|
+
|
|
53
|
+
#ifdef _MSC_VER
|
|
54
|
+
#pragma warning(push)
|
|
55
|
+
#pragma warning(disable : 4099) // missmatch class and struct
|
|
56
|
+
#endif
|
|
57
|
+
#if !(defined(__APPLE__) && defined(__clang__))
|
|
58
|
+
template<class T>
|
|
59
|
+
struct hash;
|
|
60
|
+
#endif
|
|
61
|
+
#ifdef _MSC_VER
|
|
62
|
+
#pragma warning(pop)
|
|
63
|
+
#endif
|
|
64
|
+
|
|
65
|
+
CYBOZU_NAMESPACE_TR1_END } // std
|
|
66
|
+
|
|
67
|
+
#endif
|