ooxml_crypt 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +58 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ext/ooxml_crypt/extconf.rb +18 -0
- data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
- data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
- data/lib/ooxml_crypt/version.rb +5 -0
- data/lib/ooxml_crypt.rb +75 -0
- data/vendor/cybozulib/.github/workflows/main.yml +12 -0
- data/vendor/cybozulib/.gitignore +5 -0
- data/vendor/cybozulib/CMakeLists.txt +6 -0
- data/vendor/cybozulib/COPYRIGHT +27 -0
- data/vendor/cybozulib/Makefile +26 -0
- data/vendor/cybozulib/bin/libeay32.dll +0 -0
- data/vendor/cybozulib/bin/libmecab.dll +0 -0
- data/vendor/cybozulib/bin/ssleay32.dll +0 -0
- data/vendor/cybozulib/common.mk +116 -0
- data/vendor/cybozulib/common.props +25 -0
- data/vendor/cybozulib/cybozulib.sln +286 -0
- data/vendor/cybozulib/debug.props +14 -0
- data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
- data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
- data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
- data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
- data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
- data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
- data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
- data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
- data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
- data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
- data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
- data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
- data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
- data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
- data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
- data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
- data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
- data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
- data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
- data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
- data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
- data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
- data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
- data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
- data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
- data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
- data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
- data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
- data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
- data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
- data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
- data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
- data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
- data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
- data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
- data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
- data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
- data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
- data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
- data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
- data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
- data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
- data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
- data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
- data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
- data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
- data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
- data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
- data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
- data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
- data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
- data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
- data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
- data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
- data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
- data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
- data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
- data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
- data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
- data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
- data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
- data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
- data/vendor/cybozulib/include/sais.hxx +364 -0
- data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
- data/vendor/cybozulib/mk.bat +37 -0
- data/vendor/cybozulib/readme.md +29 -0
- data/vendor/cybozulib/release.props +12 -0
- data/vendor/cybozulib/sample/Makefile +30 -0
- data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
- data/vendor/cybozulib/sample/data/svd/test1 +4 -0
- data/vendor/cybozulib/sample/data/svd/test2 +4 -0
- data/vendor/cybozulib/sample/desymbol.cpp +127 -0
- data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
- data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
- data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
- data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
- data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
- data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
- data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
- data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
- data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
- data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
- data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
- data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
- data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
- data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
- data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
- data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
- data/vendor/cybozulib/src/Makefile +8 -0
- data/vendor/cybozulib/src/base/Makefile +19 -0
- data/vendor/cybozulib/test/Makefile +12 -0
- data/vendor/cybozulib/test/base/Makefile +37 -0
- data/vendor/cybozulib/test/base/array_test.cpp +173 -0
- data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
- data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
- data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
- data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
- data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
- data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
- data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
- data/vendor/cybozulib/test/base/config_test.cpp +236 -0
- data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
- data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
- data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
- data/vendor/cybozulib/test/base/data/a.xml +26 -0
- data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
- data/vendor/cybozulib/test/base/env_test.cpp +22 -0
- data/vendor/cybozulib/test/base/event_test.cpp +41 -0
- data/vendor/cybozulib/test/base/file_test.cpp +233 -0
- data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
- data/vendor/cybozulib/test/base/format_test.cpp +12 -0
- data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
- data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
- data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
- data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
- data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
- data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
- data/vendor/cybozulib/test/base/option_test.cpp +487 -0
- data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
- data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
- data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
- data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
- data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
- data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
- data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
- data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
- data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
- data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
- data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
- data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
- data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
- data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
- data/vendor/cybozulib/test/base/time_test.cpp +164 -0
- data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
- data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
- data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
- data/vendor/cybozulib/test/nlp/Makefile +27 -0
- data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
- data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
- data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
- data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
- data/vendor/cybozulib/tool/create_vcproj.py +186 -0
- data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
- data/vendor/msoffice/COPYRIGHT +27 -0
- data/vendor/msoffice/Makefile +29 -0
- data/vendor/msoffice/bin/64/msoc.dll +0 -0
- data/vendor/msoffice/bin/64/msocsample.exe +0 -0
- data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/bin/msoc.dll +0 -0
- data/vendor/msoffice/bin/msocsample.exe +0 -0
- data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/common.mk +71 -0
- data/vendor/msoffice/common.props +26 -0
- data/vendor/msoffice/debug.props +14 -0
- data/vendor/msoffice/include/attack.hpp +211 -0
- data/vendor/msoffice/include/cfb.hpp +777 -0
- data/vendor/msoffice/include/crypto_util.hpp +450 -0
- data/vendor/msoffice/include/custom_sha1.hpp +342 -0
- data/vendor/msoffice/include/decode.hpp +240 -0
- data/vendor/msoffice/include/encode.hpp +221 -0
- data/vendor/msoffice/include/make_dataspace.hpp +316 -0
- data/vendor/msoffice/include/msoc.h +129 -0
- data/vendor/msoffice/include/resource.hpp +7 -0
- data/vendor/msoffice/include/standard_encryption.hpp +145 -0
- data/vendor/msoffice/include/uint32vec.hpp +179 -0
- data/vendor/msoffice/include/util.hpp +212 -0
- data/vendor/msoffice/lib/.emptydir +0 -0
- data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
- data/vendor/msoffice/mk.bat +1 -0
- data/vendor/msoffice/mkdll.bat +3 -0
- data/vendor/msoffice/msoc.def +13 -0
- data/vendor/msoffice/msocsample.py +178 -0
- data/vendor/msoffice/msoffice12.sln +31 -0
- data/vendor/msoffice/readme.md +110 -0
- data/vendor/msoffice/release.props +28 -0
- data/vendor/msoffice/src/Makefile +19 -0
- data/vendor/msoffice/src/attack.cpp +124 -0
- data/vendor/msoffice/src/cfb_test.cpp +77 -0
- data/vendor/msoffice/src/minisample.c +54 -0
- data/vendor/msoffice/src/msocdll.cpp +276 -0
- data/vendor/msoffice/src/msocsample.c +136 -0
- data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
- data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
- data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
- data/vendor/msoffice/src/sha1.cpp +234 -0
- data/vendor/msoffice/test/Makefile +20 -0
- data/vendor/msoffice/test/cfb_test.cpp +74 -0
- data/vendor/msoffice/test/hash_test.cpp +59 -0
- data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
- data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
- data/vendor/msoffice/test/sampl.bat +8 -0
- data/vendor/msoffice/test_all.py +46 -0
- data/vendor/update +4 -0
- metadata +351 -0
@@ -0,0 +1,529 @@
|
|
1
|
+
#pragma once
|
2
|
+
/**
|
3
|
+
@file
|
4
|
+
@brief sparse vector
|
5
|
+
|
6
|
+
@author MITSUNARI Shigeo(@herumi)
|
7
|
+
@author MITSUNARI Shigeo
|
8
|
+
*/
|
9
|
+
#include <vector>
|
10
|
+
#include <cybozu/exception.hpp>
|
11
|
+
#include <cybozu/serializer.hpp>
|
12
|
+
#include <assert.h>
|
13
|
+
|
14
|
+
namespace cybozu { namespace nlp {
|
15
|
+
|
16
|
+
namespace option {
|
17
|
+
|
18
|
+
class PositionTbl {
|
19
|
+
std::vector<unsigned int> v_;
|
20
|
+
public:
|
21
|
+
struct Curr {
|
22
|
+
size_t vecPos_;
|
23
|
+
Curr(const PositionTbl&)
|
24
|
+
: vecPos_(0)
|
25
|
+
{
|
26
|
+
}
|
27
|
+
Curr(size_t vecPos)
|
28
|
+
: vecPos_(vecPos)
|
29
|
+
{
|
30
|
+
}
|
31
|
+
};
|
32
|
+
void reserve(size_t size)
|
33
|
+
{
|
34
|
+
v_.reserve(size);
|
35
|
+
}
|
36
|
+
void set(size_t pos)
|
37
|
+
{
|
38
|
+
if (!v_.empty() && pos <= v_[v_.size() - 1]) {
|
39
|
+
throw cybozu::Exception("SparseException:PositionTbl:set:bad order pos") << pos;
|
40
|
+
}
|
41
|
+
if (pos > 0xffffffffU) {
|
42
|
+
throw cybozu::Exception("SparseException:PositionTbl:set:too large pos") << pos;
|
43
|
+
}
|
44
|
+
v_.push_back((unsigned int)pos);
|
45
|
+
}
|
46
|
+
void clear()
|
47
|
+
{
|
48
|
+
v_.clear();
|
49
|
+
}
|
50
|
+
void next(Curr& curr) const
|
51
|
+
{
|
52
|
+
curr.vecPos_++;
|
53
|
+
}
|
54
|
+
size_t get(const Curr& curr) const
|
55
|
+
{
|
56
|
+
return v_[curr.vecPos_];
|
57
|
+
}
|
58
|
+
void swap(PositionTbl& rhs)
|
59
|
+
{
|
60
|
+
v_.swap(rhs.v_);
|
61
|
+
}
|
62
|
+
};
|
63
|
+
|
64
|
+
/*
|
65
|
+
max difference between previous position and current position < 0x40000000 = (1 << 30)
|
66
|
+
data format
|
67
|
+
input x:
|
68
|
+
v[0] = (x & 0x3f) | (y << 6) ; len = y + 1 for y = 0, 1, 2, 3
|
69
|
+
v[1] = x >> 6
|
70
|
+
v[2] = x >> (6 + 8)
|
71
|
+
v[3] = x >> (6 + 8 + 8)
|
72
|
+
*/
|
73
|
+
|
74
|
+
class CompressedPositionTbl {
|
75
|
+
mutable std::vector<unsigned char> v_;
|
76
|
+
size_t lastPos_;
|
77
|
+
mutable bool addDummy_; // add last dummy data into v_ to get speed and avoid buffer overrun
|
78
|
+
friend struct Curr;
|
79
|
+
public:
|
80
|
+
struct Curr {
|
81
|
+
size_t vecPos_;
|
82
|
+
size_t val_;
|
83
|
+
unsigned int pos_;
|
84
|
+
Curr(const CompressedPositionTbl& tbl)
|
85
|
+
: vecPos_(0)
|
86
|
+
, val_(0)
|
87
|
+
, pos_(0)
|
88
|
+
{
|
89
|
+
if (!tbl.addDummy_) {
|
90
|
+
tbl.v_.push_back(0);
|
91
|
+
tbl.addDummy_ = true;
|
92
|
+
}
|
93
|
+
tbl.setup(*this);
|
94
|
+
}
|
95
|
+
Curr(size_t vecPos)
|
96
|
+
: vecPos_(vecPos)
|
97
|
+
{
|
98
|
+
}
|
99
|
+
};
|
100
|
+
CompressedPositionTbl()
|
101
|
+
: lastPos_(0)
|
102
|
+
, addDummy_(false)
|
103
|
+
{
|
104
|
+
}
|
105
|
+
void clear()
|
106
|
+
{
|
107
|
+
v_.clear();
|
108
|
+
lastPos_ = 0;
|
109
|
+
addDummy_ = false;
|
110
|
+
}
|
111
|
+
void reserve(size_t size)
|
112
|
+
{
|
113
|
+
v_.reserve(size * 2); // ad hoc
|
114
|
+
}
|
115
|
+
void set(size_t pos)
|
116
|
+
{
|
117
|
+
if (addDummy_) {
|
118
|
+
v_.resize(v_.size() - 1);
|
119
|
+
addDummy_ = false;
|
120
|
+
}
|
121
|
+
if (!v_.empty() && pos <= lastPos_) {
|
122
|
+
throw cybozu::Exception("SparseException:CompressedPositionTbl:set:bad order pos") << pos;
|
123
|
+
}
|
124
|
+
if (pos - lastPos_ >= (1 << 30)) {
|
125
|
+
throw cybozu::Exception("SparseException:CompressedPositionTbl:set:too large pos") << pos;
|
126
|
+
}
|
127
|
+
unsigned int diff = (unsigned int)(pos - lastPos_);
|
128
|
+
lastPos_ = pos;
|
129
|
+
if (diff < (1 << 6)) {
|
130
|
+
v_.push_back((unsigned char)(diff));
|
131
|
+
} else if (diff < (1 << 14)) {
|
132
|
+
v_.push_back((unsigned char)(diff & 0x3f) | (1 << 6));
|
133
|
+
v_.push_back((unsigned char)(diff >> 6));
|
134
|
+
} else if (diff < (1 << 22)) {
|
135
|
+
v_.push_back((unsigned char)(diff & 0x3f) | (2 << 6));
|
136
|
+
v_.push_back((unsigned char)(diff >> 6));
|
137
|
+
v_.push_back((unsigned char)(diff >> 14));
|
138
|
+
} else {
|
139
|
+
assert(diff < (1 << 30));
|
140
|
+
v_.push_back((unsigned char)(diff & 0x3f) | (3 << 6));
|
141
|
+
v_.push_back((unsigned char)(diff >> 6));
|
142
|
+
v_.push_back((unsigned char)(diff >> 14));
|
143
|
+
v_.push_back((unsigned char)(diff >> 22));
|
144
|
+
}
|
145
|
+
}
|
146
|
+
void next(Curr& curr) const
|
147
|
+
{
|
148
|
+
setup(curr);
|
149
|
+
curr.vecPos_++;
|
150
|
+
}
|
151
|
+
size_t get(const Curr& curr) const { return curr.val_; }
|
152
|
+
void swap(CompressedPositionTbl& rhs)
|
153
|
+
{
|
154
|
+
v_.swap(rhs.v_);
|
155
|
+
std::swap(lastPos_, rhs.lastPos_);
|
156
|
+
std::swap(addDummy_, rhs.addDummy_);
|
157
|
+
}
|
158
|
+
private:
|
159
|
+
void setup(Curr& curr) const
|
160
|
+
{
|
161
|
+
unsigned int diff = v_[curr.pos_];
|
162
|
+
unsigned int t = diff >> 6;
|
163
|
+
unsigned int pos = curr.pos_;
|
164
|
+
if (t > 0) {
|
165
|
+
diff &= (1 << 6) - 1;
|
166
|
+
if (t == 1) {
|
167
|
+
diff |= (v_[pos + 1] << 6);
|
168
|
+
} else if (t == 2) {
|
169
|
+
diff |= (v_[pos + 1] << 6) | (v_[pos + 2] << 14);
|
170
|
+
} else {
|
171
|
+
diff |= (v_[pos + 1] << 6) | (v_[pos + 2] << 14) | (v_[pos + 3] << 22);
|
172
|
+
}
|
173
|
+
}
|
174
|
+
curr.val_ += diff;
|
175
|
+
curr.pos_ += t + 1;
|
176
|
+
}
|
177
|
+
};
|
178
|
+
|
179
|
+
} // cybozu::nlp::option
|
180
|
+
|
181
|
+
template<class T, class PosTbl = option::PositionTbl, int dummy = 0>
|
182
|
+
class SparseVector {
|
183
|
+
PosTbl posTbl_;
|
184
|
+
std::vector<T> vec_;
|
185
|
+
|
186
|
+
template<class S>
|
187
|
+
class ConstIterator {
|
188
|
+
typename PosTbl::Curr cur_;
|
189
|
+
const SparseVector<S, PosTbl> *self_;
|
190
|
+
public:
|
191
|
+
// for begin
|
192
|
+
ConstIterator(const SparseVector<S, PosTbl>* self)
|
193
|
+
: cur_(self->posTbl_)
|
194
|
+
, self_(self)
|
195
|
+
{
|
196
|
+
}
|
197
|
+
// for end
|
198
|
+
ConstIterator(size_t vecPos)
|
199
|
+
: cur_(vecPos)
|
200
|
+
{
|
201
|
+
}
|
202
|
+
size_t pos() const { return self_->posTbl_.get(cur_); }
|
203
|
+
S val() const { return (self_->vec_)[cur_.vecPos_]; }
|
204
|
+
|
205
|
+
const ConstIterator *operator->() const { return this; }
|
206
|
+
const ConstIterator& operator*() const { return *this; }
|
207
|
+
void operator++()
|
208
|
+
{
|
209
|
+
self_->posTbl_.next(cur_);
|
210
|
+
}
|
211
|
+
bool operator==(const ConstIterator& rhs) const { return cur_.vecPos_ == rhs.cur_.vecPos_; }
|
212
|
+
bool operator!=(const ConstIterator& rhs) const { return !operator==(rhs); }
|
213
|
+
};
|
214
|
+
public:
|
215
|
+
typedef ConstIterator<T> const_iterator;
|
216
|
+
typedef T value_type;
|
217
|
+
SparseVector()
|
218
|
+
{
|
219
|
+
}
|
220
|
+
template<class Map>
|
221
|
+
void set(const Map& m)
|
222
|
+
{
|
223
|
+
reserve(m.size());
|
224
|
+
for (typename Map::const_iterator i = m.begin(), ie = m.end(); i != ie; ++i) {
|
225
|
+
push_back(i->first, i->second);
|
226
|
+
}
|
227
|
+
}
|
228
|
+
void reserve(size_t size)
|
229
|
+
{
|
230
|
+
posTbl_.reserve(size);
|
231
|
+
vec_.reserve(size);
|
232
|
+
}
|
233
|
+
void push_back(size_t pos, const T& x)
|
234
|
+
{
|
235
|
+
posTbl_.set(pos);
|
236
|
+
vec_.push_back(x);
|
237
|
+
}
|
238
|
+
void clear()
|
239
|
+
{
|
240
|
+
posTbl_.clear();
|
241
|
+
vec_.clear();
|
242
|
+
}
|
243
|
+
const_iterator begin() const { return const_iterator(this); }
|
244
|
+
const_iterator end() const { return const_iterator(vec_.size()); }
|
245
|
+
size_t size() const { return vec_.size(); }
|
246
|
+
bool empty() const { return vec_.empty(); }
|
247
|
+
bool operator==(const SparseVector& rhs) const
|
248
|
+
{
|
249
|
+
if (size() != rhs.size()) return false;
|
250
|
+
for (const_iterator i1 = begin(), i2 = rhs.begin(), ie = end(); i1 != ie; ++i1, ++i2) {
|
251
|
+
if (i1->pos() != i2->pos()) return false;
|
252
|
+
if (i1->val() != i2->val()) return false;
|
253
|
+
}
|
254
|
+
return true;
|
255
|
+
}
|
256
|
+
bool operator!=(const SparseVector& rhs) const { return !operator==(rhs); }
|
257
|
+
|
258
|
+
void swap(SparseVector& rhs)
|
259
|
+
{
|
260
|
+
posTbl_.swap(rhs.posTbl_);
|
261
|
+
vec_.swap(rhs.vec_);
|
262
|
+
}
|
263
|
+
double norm() const
|
264
|
+
{
|
265
|
+
double ret = 0;
|
266
|
+
for (const_iterator i = begin(), ie = end(); i != ie; ++i) {
|
267
|
+
double v = i->val();
|
268
|
+
ret += v * v;
|
269
|
+
}
|
270
|
+
return ret;
|
271
|
+
}
|
272
|
+
template<class InputStream>
|
273
|
+
void load(InputStream& is)
|
274
|
+
{
|
275
|
+
size_t size;
|
276
|
+
cybozu::load(size, is);
|
277
|
+
clear();
|
278
|
+
reserve(size);
|
279
|
+
for (size_t i = 0; i < size; i++) {
|
280
|
+
size_t pos;
|
281
|
+
T val;
|
282
|
+
cybozu::load(pos, is);
|
283
|
+
cybozu::load(val, is);
|
284
|
+
push_back(pos, val);
|
285
|
+
}
|
286
|
+
}
|
287
|
+
|
288
|
+
template<class OutputStream>
|
289
|
+
void save(OutputStream& os) const
|
290
|
+
{
|
291
|
+
cybozu::save(os, size());
|
292
|
+
for (const_iterator i = begin(), ie = end(); i != ie; ++i) {
|
293
|
+
cybozu::save(os, i->pos());
|
294
|
+
cybozu::save(os, i->val());
|
295
|
+
}
|
296
|
+
}
|
297
|
+
};
|
298
|
+
|
299
|
+
template<class V1, class V2>
|
300
|
+
class Intersection {
|
301
|
+
Intersection(const Intersection&);
|
302
|
+
void operator=(const Intersection&);
|
303
|
+
const V1& v1_;
|
304
|
+
const V2& v2_;
|
305
|
+
|
306
|
+
template<class S1, class S2>
|
307
|
+
class ConstIterator {
|
308
|
+
typename S1::const_iterator begin1_;
|
309
|
+
typename S1::const_iterator end1_;
|
310
|
+
typename S2::const_iterator begin2_;
|
311
|
+
typename S2::const_iterator end2_;
|
312
|
+
bool isEnd_;
|
313
|
+
public:
|
314
|
+
ConstIterator(typename S1::const_iterator begin1, typename S1::const_iterator end1, typename S2::const_iterator begin2, typename S2::const_iterator end2, bool isEnd)
|
315
|
+
: begin1_(begin1)
|
316
|
+
, end1_(end1)
|
317
|
+
, begin2_(begin2)
|
318
|
+
, end2_(end2)
|
319
|
+
, isEnd_(isEnd)
|
320
|
+
{
|
321
|
+
if (isEnd_) return;
|
322
|
+
isEnd_ = begin1_ == end1_ || begin2_ == end2_;
|
323
|
+
if (isEnd_) return;
|
324
|
+
if (begin1_->pos() != begin2_->pos()) {
|
325
|
+
operator++();
|
326
|
+
}
|
327
|
+
}
|
328
|
+
size_t pos() const { return begin1_->pos(); }
|
329
|
+
typename S1::value_type val1() const { return begin1_->val(); }
|
330
|
+
typename S2::value_type val2() const { return begin2_->val(); }
|
331
|
+
|
332
|
+
const ConstIterator *operator->() const { return this; }
|
333
|
+
const ConstIterator& operator*() const { return *this; }
|
334
|
+
void operator++()
|
335
|
+
{
|
336
|
+
if (isEnd_) return;
|
337
|
+
if (begin1_->pos() == begin2_->pos()) {
|
338
|
+
++begin1_;
|
339
|
+
isEnd_ = begin1_ == end1_;
|
340
|
+
if (isEnd_) return;
|
341
|
+
}
|
342
|
+
for (;;) {
|
343
|
+
while (begin1_->pos() < begin2_->pos()) {
|
344
|
+
++begin1_;
|
345
|
+
if (begin1_ == end1_) {
|
346
|
+
isEnd_ = true;
|
347
|
+
return;
|
348
|
+
}
|
349
|
+
}
|
350
|
+
if (begin1_->pos() == begin2_->pos()) return;
|
351
|
+
|
352
|
+
while (begin1_->pos() > begin2_->pos()) {
|
353
|
+
++begin2_;
|
354
|
+
if (begin2_ == end2_) {
|
355
|
+
isEnd_ = true;
|
356
|
+
return;
|
357
|
+
}
|
358
|
+
}
|
359
|
+
if (begin1_->pos() == begin2_->pos()) return;
|
360
|
+
}
|
361
|
+
}
|
362
|
+
bool operator==(const ConstIterator&) const { return isEnd_ == true; }
|
363
|
+
bool operator!=(const ConstIterator& rhs) const { return !operator==(rhs); }
|
364
|
+
};
|
365
|
+
public:
|
366
|
+
typedef ConstIterator<V1, V2> const_iterator;
|
367
|
+
Intersection(const V1& v1, const V2& v2)
|
368
|
+
: v1_(v1)
|
369
|
+
, v2_(v2)
|
370
|
+
{
|
371
|
+
}
|
372
|
+
const_iterator begin() const { return const_iterator(v1_.begin(), v1_.end(), v2_.begin(), v2_.end(), false); }
|
373
|
+
const_iterator end() const { return const_iterator(v1_.begin(), v1_.end(), v2_.begin(), v2_.end(), true); }
|
374
|
+
};
|
375
|
+
|
376
|
+
template<class V1, class V2>
|
377
|
+
class Union {
|
378
|
+
Union(const Union&);
|
379
|
+
void operator=(const Union&);
|
380
|
+
const V1& v1_;
|
381
|
+
const V2& v2_;
|
382
|
+
|
383
|
+
template<class S1, class S2>
|
384
|
+
class ConstIterator {
|
385
|
+
public:
|
386
|
+
/*
|
387
|
+
pos1 pos2 ; next action
|
388
|
+
End x x ; end
|
389
|
+
Only1 o x ; ++p1
|
390
|
+
Only2 x o ; ++p2
|
391
|
+
Small o < o ; ++p1
|
392
|
+
Equal o = o ; ++p1, ++p2
|
393
|
+
Large o > o ; ++p2
|
394
|
+
*/
|
395
|
+
enum Mode {
|
396
|
+
End,
|
397
|
+
Only1,
|
398
|
+
Only2,
|
399
|
+
Small,
|
400
|
+
Equal,
|
401
|
+
Large
|
402
|
+
};
|
403
|
+
private:
|
404
|
+
typename S1::const_iterator begin1_;
|
405
|
+
typename S1::const_iterator end1_;
|
406
|
+
typename S2::const_iterator begin2_;
|
407
|
+
typename S2::const_iterator end2_;
|
408
|
+
Mode mode_;
|
409
|
+
size_t pos_;
|
410
|
+
|
411
|
+
void detectMode()
|
412
|
+
{
|
413
|
+
if (begin1_ != end1_) {
|
414
|
+
const size_t pos1 = begin1_->pos();
|
415
|
+
if (begin2_ != end2_) {
|
416
|
+
size_t pos2 = begin2_->pos();
|
417
|
+
if (pos1 < pos2) {
|
418
|
+
mode_ = Small;
|
419
|
+
pos_ = pos1;
|
420
|
+
} else if (pos1 == pos2) {
|
421
|
+
mode_ = Equal;
|
422
|
+
pos_ = pos1;
|
423
|
+
} else {
|
424
|
+
mode_ = Large;
|
425
|
+
pos_ = pos2;
|
426
|
+
}
|
427
|
+
} else {
|
428
|
+
mode_ = Only1;
|
429
|
+
pos_ = pos1;
|
430
|
+
}
|
431
|
+
} else {
|
432
|
+
if (begin2_ != end2_) {
|
433
|
+
mode_ = Only2;
|
434
|
+
pos_ = begin2_->pos();
|
435
|
+
} else {
|
436
|
+
mode_ = End;
|
437
|
+
}
|
438
|
+
}
|
439
|
+
}
|
440
|
+
public:
|
441
|
+
ConstIterator(typename S1::const_iterator begin1, typename S1::const_iterator end1, typename S2::const_iterator begin2, typename S2::const_iterator end2, bool isEnd)
|
442
|
+
: begin1_(begin1)
|
443
|
+
, end1_(end1)
|
444
|
+
, begin2_(begin2)
|
445
|
+
, end2_(end2)
|
446
|
+
, mode_(End)
|
447
|
+
, pos_(0)
|
448
|
+
{
|
449
|
+
if (isEnd) return;
|
450
|
+
detectMode();
|
451
|
+
}
|
452
|
+
Mode getMode() const { mode_; }
|
453
|
+
size_t pos() const { return pos_; }
|
454
|
+
bool hasVal1() const { return mode_ == Only1 || mode_ == Small || mode_ == Equal; }
|
455
|
+
bool hasVal2() const { return mode_ == Only2 || mode_ == Equal || mode_ == Large; }
|
456
|
+
|
457
|
+
/**
|
458
|
+
return value if exists otherwise 0
|
459
|
+
*/
|
460
|
+
typename S1::value_type val1() const { return hasVal1() ? begin1_->val() : 0; }
|
461
|
+
typename S2::value_type val2() const { return hasVal2() ? begin2_->val() : 0; }
|
462
|
+
|
463
|
+
const ConstIterator *operator->() const { return this; }
|
464
|
+
const ConstIterator& operator*() const { return *this; }
|
465
|
+
void operator++()
|
466
|
+
{
|
467
|
+
if (mode_ == End) return;
|
468
|
+
switch (mode_) {
|
469
|
+
case Only1:
|
470
|
+
case Small:
|
471
|
+
case Equal:
|
472
|
+
++begin1_;
|
473
|
+
break;
|
474
|
+
default:
|
475
|
+
break;
|
476
|
+
}
|
477
|
+
switch (mode_) {
|
478
|
+
case Only2:
|
479
|
+
case Equal:
|
480
|
+
case Large:
|
481
|
+
++begin2_;
|
482
|
+
break;
|
483
|
+
default:
|
484
|
+
break;
|
485
|
+
}
|
486
|
+
detectMode();
|
487
|
+
}
|
488
|
+
bool operator==(const ConstIterator& rhs) const { return mode_ == rhs.mode_; }
|
489
|
+
bool operator!=(const ConstIterator& rhs) const { return !operator==(rhs); }
|
490
|
+
};
|
491
|
+
public:
|
492
|
+
typedef ConstIterator<V1, V2> const_iterator;
|
493
|
+
Union(const V1& v1, const V2& v2)
|
494
|
+
: v1_(v1)
|
495
|
+
, v2_(v2)
|
496
|
+
{
|
497
|
+
}
|
498
|
+
const_iterator begin() const { return const_iterator(v1_.begin(), v1_.end(), v2_.begin(), v2_.end(), false); }
|
499
|
+
const_iterator end() const { return const_iterator(v1_.begin(), v1_.end(), v2_.begin(), v2_.end(), true); }
|
500
|
+
};
|
501
|
+
|
502
|
+
/**
|
503
|
+
inner product of lhs and rhs
|
504
|
+
retval is the type of lhs::value_type or rhs::value_type
|
505
|
+
*/
|
506
|
+
template<class Ret, class L, class Ltbl, class R, class Rtbl>
|
507
|
+
void InnerProduct(Ret *pret, const SparseVector<L, Ltbl>& lhs, const SparseVector<R, Rtbl>& rhs)
|
508
|
+
{
|
509
|
+
typedef SparseVector<L, Ltbl> Lvec;
|
510
|
+
typedef SparseVector<R, Rtbl> Rvec;
|
511
|
+
|
512
|
+
Ret ret = 0;
|
513
|
+
if ((const void*)&lhs == (const void*)&rhs) {
|
514
|
+
for (typename Lvec::const_iterator i = lhs.begin(), ie = lhs.end(); i != ie; ++i) {
|
515
|
+
ret += (Ret)i->val() * (Ret)i->val();
|
516
|
+
}
|
517
|
+
} else {
|
518
|
+
typedef Intersection<Lvec, Rvec> Inter;
|
519
|
+
Inter inter(lhs, rhs);
|
520
|
+
for (typename Inter::const_iterator i = inter.begin(), ie = inter.end(); i != ie; ++i) {
|
521
|
+
ret += (Ret)i->val1() * (Ret)i->val2();
|
522
|
+
}
|
523
|
+
}
|
524
|
+
*pret = ret;
|
525
|
+
}
|
526
|
+
|
527
|
+
} // cybozu::nlp
|
528
|
+
|
529
|
+
} // cybozu
|