ooxml_crypt 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +58 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/ext/ooxml_crypt/extconf.rb +18 -0
- data/ext/ooxml_crypt/ooxml_crypt.c +27 -0
- data/ext/ooxml_crypt/ooxml_crypt.h +7 -0
- data/lib/ooxml_crypt/version.rb +5 -0
- data/lib/ooxml_crypt.rb +75 -0
- data/vendor/cybozulib/.github/workflows/main.yml +12 -0
- data/vendor/cybozulib/.gitignore +5 -0
- data/vendor/cybozulib/CMakeLists.txt +6 -0
- data/vendor/cybozulib/COPYRIGHT +27 -0
- data/vendor/cybozulib/Makefile +26 -0
- data/vendor/cybozulib/bin/libeay32.dll +0 -0
- data/vendor/cybozulib/bin/libmecab.dll +0 -0
- data/vendor/cybozulib/bin/ssleay32.dll +0 -0
- data/vendor/cybozulib/common.mk +116 -0
- data/vendor/cybozulib/common.props +25 -0
- data/vendor/cybozulib/cybozulib.sln +286 -0
- data/vendor/cybozulib/debug.props +14 -0
- data/vendor/cybozulib/include/cybozu/array.hpp +197 -0
- data/vendor/cybozulib/include/cybozu/atoi.hpp +238 -0
- data/vendor/cybozulib/include/cybozu/atomic.hpp +146 -0
- data/vendor/cybozulib/include/cybozu/base64.hpp +210 -0
- data/vendor/cybozulib/include/cybozu/benchmark.hpp +212 -0
- data/vendor/cybozulib/include/cybozu/bfd.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/bit_operation.hpp +139 -0
- data/vendor/cybozulib/include/cybozu/bitvector.hpp +358 -0
- data/vendor/cybozulib/include/cybozu/condition_variable.hpp +113 -0
- data/vendor/cybozulib/include/cybozu/condition_variable_cs.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/config.hpp +392 -0
- data/vendor/cybozulib/include/cybozu/critical_section.hpp +60 -0
- data/vendor/cybozulib/include/cybozu/crypto.hpp +321 -0
- data/vendor/cybozulib/include/cybozu/csucvector.hpp +624 -0
- data/vendor/cybozulib/include/cybozu/csv.hpp +294 -0
- data/vendor/cybozulib/include/cybozu/data_type.hpp +27 -0
- data/vendor/cybozulib/include/cybozu/endian.hpp +224 -0
- data/vendor/cybozulib/include/cybozu/env.hpp +63 -0
- data/vendor/cybozulib/include/cybozu/event.hpp +122 -0
- data/vendor/cybozulib/include/cybozu/exception.hpp +253 -0
- data/vendor/cybozulib/include/cybozu/file.hpp +626 -0
- data/vendor/cybozulib/include/cybozu/fmindex.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/format.hpp +93 -0
- data/vendor/cybozulib/include/cybozu/frequency.hpp +264 -0
- data/vendor/cybozulib/include/cybozu/hash.hpp +67 -0
- data/vendor/cybozulib/include/cybozu/inttype.hpp +174 -0
- data/vendor/cybozulib/include/cybozu/itoa.hpp +336 -0
- data/vendor/cybozulib/include/cybozu/json.hpp +120 -0
- data/vendor/cybozulib/include/cybozu/line_stream.hpp +149 -0
- data/vendor/cybozulib/include/cybozu/link_libeay32.hpp +21 -0
- data/vendor/cybozulib/include/cybozu/link_mpir.hpp +18 -0
- data/vendor/cybozulib/include/cybozu/link_ssleay32.hpp +19 -0
- data/vendor/cybozulib/include/cybozu/log.hpp +237 -0
- data/vendor/cybozulib/include/cybozu/minixml.hpp +452 -0
- data/vendor/cybozulib/include/cybozu/mmap.hpp +143 -0
- data/vendor/cybozulib/include/cybozu/mutex.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/nlp/mecab.hpp +96 -0
- data/vendor/cybozulib/include/cybozu/nlp/plsi.hpp +315 -0
- data/vendor/cybozulib/include/cybozu/nlp/random.hpp +74 -0
- data/vendor/cybozulib/include/cybozu/nlp/sparse.hpp +529 -0
- data/vendor/cybozulib/include/cybozu/nlp/svd.hpp +486 -0
- data/vendor/cybozulib/include/cybozu/nlp/tfidf.hpp +226 -0
- data/vendor/cybozulib/include/cybozu/nlp/top_score.hpp +75 -0
- data/vendor/cybozulib/include/cybozu/option.hpp +743 -0
- data/vendor/cybozulib/include/cybozu/parallel.hpp +88 -0
- data/vendor/cybozulib/include/cybozu/pcg.hpp +72 -0
- data/vendor/cybozulib/include/cybozu/process.hpp +324 -0
- data/vendor/cybozulib/include/cybozu/quit_signal_handler.hpp +66 -0
- data/vendor/cybozulib/include/cybozu/random_generator.hpp +144 -0
- data/vendor/cybozulib/include/cybozu/regex.hpp +463 -0
- data/vendor/cybozulib/include/cybozu/select8.hpp +279 -0
- data/vendor/cybozulib/include/cybozu/serializer.hpp +363 -0
- data/vendor/cybozulib/include/cybozu/sha1.hpp +209 -0
- data/vendor/cybozulib/include/cybozu/sha2.hpp +506 -0
- data/vendor/cybozulib/include/cybozu/siphash.hpp +105 -0
- data/vendor/cybozulib/include/cybozu/socket.hpp +785 -0
- data/vendor/cybozulib/include/cybozu/ssl.hpp +203 -0
- data/vendor/cybozulib/include/cybozu/stacktrace.hpp +291 -0
- data/vendor/cybozulib/include/cybozu/stream.hpp +269 -0
- data/vendor/cybozulib/include/cybozu/string.hpp +1746 -0
- data/vendor/cybozulib/include/cybozu/string_operation.hpp +365 -0
- data/vendor/cybozulib/include/cybozu/sucvector.hpp +378 -0
- data/vendor/cybozulib/include/cybozu/test.hpp +373 -0
- data/vendor/cybozulib/include/cybozu/thread.hpp +229 -0
- data/vendor/cybozulib/include/cybozu/time.hpp +281 -0
- data/vendor/cybozulib/include/cybozu/tls.hpp +115 -0
- data/vendor/cybozulib/include/cybozu/unordered_map.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/unordered_set.hpp +13 -0
- data/vendor/cybozulib/include/cybozu/v128.hpp +376 -0
- data/vendor/cybozulib/include/cybozu/wavelet_matrix.hpp +345 -0
- data/vendor/cybozulib/include/cybozu/xorshift.hpp +189 -0
- data/vendor/cybozulib/include/cybozu/zlib.hpp +325 -0
- data/vendor/cybozulib/include/sais.hxx +364 -0
- data/vendor/cybozulib/misc/make_select8tbl.cpp +26 -0
- data/vendor/cybozulib/mk.bat +37 -0
- data/vendor/cybozulib/readme.md +29 -0
- data/vendor/cybozulib/release.props +12 -0
- data/vendor/cybozulib/sample/Makefile +30 -0
- data/vendor/cybozulib/sample/csucvector_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.S +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.U +4 -0
- data/vendor/cybozulib/sample/data/svd/org/test1.V +6 -0
- data/vendor/cybozulib/sample/data/svd/test1 +4 -0
- data/vendor/cybozulib/sample/data/svd/test2 +4 -0
- data/vendor/cybozulib/sample/desymbol.cpp +127 -0
- data/vendor/cybozulib/sample/exception_smpl.cpp +46 -0
- data/vendor/cybozulib/sample/fmindex_smpl.cpp +231 -0
- data/vendor/cybozulib/sample/log_smpl.cpp +19 -0
- data/vendor/cybozulib/sample/mecab_smpl.cpp +37 -0
- data/vendor/cybozulib/sample/option2_smpl.cpp +68 -0
- data/vendor/cybozulib/sample/option_smpl.cpp +42 -0
- data/vendor/cybozulib/sample/plsi_smpl.cpp +207 -0
- data/vendor/cybozulib/sample/proj/exception_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/mecab_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl/ssl_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/ssl_smpl.vcproj +347 -0
- data/vendor/cybozulib/sample/proj/stacktrace_smpl/stacktrace_smpl.vcxproj +85 -0
- data/vendor/cybozulib/sample/proj/svd_smpl.vcproj +184 -0
- data/vendor/cybozulib/sample/quit_signal_handler.cpp +30 -0
- data/vendor/cybozulib/sample/serializer_smpl.cpp +196 -0
- data/vendor/cybozulib/sample/socket_smpl.cpp +82 -0
- data/vendor/cybozulib/sample/ssl_smpl.cpp +39 -0
- data/vendor/cybozulib/sample/stacktrace_smpl.cpp +52 -0
- data/vendor/cybozulib/sample/svd_bench_smpl.cpp +143 -0
- data/vendor/cybozulib/sample/svd_smpl.cpp +94 -0
- data/vendor/cybozulib/sample/wm_bench_smpl.cpp +182 -0
- data/vendor/cybozulib/sample/zlib_smpl.cpp +41 -0
- data/vendor/cybozulib/src/Makefile +8 -0
- data/vendor/cybozulib/src/base/Makefile +19 -0
- data/vendor/cybozulib/test/Makefile +12 -0
- data/vendor/cybozulib/test/base/Makefile +37 -0
- data/vendor/cybozulib/test/base/array_test.cpp +173 -0
- data/vendor/cybozulib/test/base/atoi_test.cpp +774 -0
- data/vendor/cybozulib/test/base/atomic_test.cpp +49 -0
- data/vendor/cybozulib/test/base/base64_test.cpp +113 -0
- data/vendor/cybozulib/test/base/bit_operation_test.cpp +134 -0
- data/vendor/cybozulib/test/base/bitvector_test.cpp +204 -0
- data/vendor/cybozulib/test/base/condition_variable_cs_test.cpp +92 -0
- data/vendor/cybozulib/test/base/condition_variable_test.cpp +88 -0
- data/vendor/cybozulib/test/base/config_test.cpp +236 -0
- data/vendor/cybozulib/test/base/crypto_test.cpp +122 -0
- data/vendor/cybozulib/test/base/csucvector_test.cpp +63 -0
- data/vendor/cybozulib/test/base/csv_test.cpp +182 -0
- data/vendor/cybozulib/test/base/data/a.xml +26 -0
- data/vendor/cybozulib/test/base/endian_test.cpp +56 -0
- data/vendor/cybozulib/test/base/env_test.cpp +22 -0
- data/vendor/cybozulib/test/base/event_test.cpp +41 -0
- data/vendor/cybozulib/test/base/file_test.cpp +233 -0
- data/vendor/cybozulib/test/base/fmindex_test.cpp +118 -0
- data/vendor/cybozulib/test/base/format_test.cpp +12 -0
- data/vendor/cybozulib/test/base/frequency_test.cpp +104 -0
- data/vendor/cybozulib/test/base/itoa_test.cpp +522 -0
- data/vendor/cybozulib/test/base/line_stream_test.cpp +208 -0
- data/vendor/cybozulib/test/base/mecab_test.cpp +41 -0
- data/vendor/cybozulib/test/base/minixml_test.cpp +103 -0
- data/vendor/cybozulib/test/base/mmap_test.cpp +15 -0
- data/vendor/cybozulib/test/base/option_test.cpp +487 -0
- data/vendor/cybozulib/test/base/parallel_test.cpp +48 -0
- data/vendor/cybozulib/test/base/proj/array_test/array_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atoi_test/atoi_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/atomic_test/atomic_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/base64_test/base64_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_cs_test/condition_variable_cs_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/condition_variable_test/condition_variable_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/config_test/config_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/csv_test/csv_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/endian_test/endian_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/env_test/env_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/event_test/event_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/file_test/file_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/itoa_test/itoa_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mecab_test/mecab_test.vcxproj +88 -0
- data/vendor/cybozulib/test/base/proj/minixml_test/minixml_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/mmap_test/mmap_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/serializer_test/serializer_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/sha1_test/sha1_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/stream_test/stream_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_operation_test/string_operation_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/string_test/string_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/thread_test/thread_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/time_test/time_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/tls_test/tls_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/proj/zlib_test/zlib_test.vcxproj +86 -0
- data/vendor/cybozulib/test/base/random_generator_test.cpp +28 -0
- data/vendor/cybozulib/test/base/regex_test.cpp +74 -0
- data/vendor/cybozulib/test/base/serializer_test.cpp +483 -0
- data/vendor/cybozulib/test/base/sha1_test.cpp +61 -0
- data/vendor/cybozulib/test/base/sha2_test.cpp +191 -0
- data/vendor/cybozulib/test/base/siphash_test.cpp +33 -0
- data/vendor/cybozulib/test/base/socket_test.cpp +76 -0
- data/vendor/cybozulib/test/base/stream_test.cpp +101 -0
- data/vendor/cybozulib/test/base/string_operation_test.cpp +340 -0
- data/vendor/cybozulib/test/base/string_test.cpp +1705 -0
- data/vendor/cybozulib/test/base/sucvector_test.cpp +312 -0
- data/vendor/cybozulib/test/base/thread_test.cpp +62 -0
- data/vendor/cybozulib/test/base/time_test.cpp +164 -0
- data/vendor/cybozulib/test/base/tls_test.cpp +50 -0
- data/vendor/cybozulib/test/base/wavelet_matrix_test.cpp +145 -0
- data/vendor/cybozulib/test/base/zlib_test.cpp +371 -0
- data/vendor/cybozulib/test/nlp/Makefile +27 -0
- data/vendor/cybozulib/test/nlp/proj/random_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/sparse_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/proj/svd_test.vcproj +184 -0
- data/vendor/cybozulib/test/nlp/random_test.cpp +62 -0
- data/vendor/cybozulib/test/nlp/sparse_test.cpp +347 -0
- data/vendor/cybozulib/test/nlp/svd_test.cpp +234 -0
- data/vendor/cybozulib/test/nlp/top_score_test.cpp +40 -0
- data/vendor/cybozulib/tool/create_vcproj.py +186 -0
- data/vendor/cybozulib/tool/vcproj_tmpl.py +185 -0
- data/vendor/msoffice/COPYRIGHT +27 -0
- data/vendor/msoffice/Makefile +29 -0
- data/vendor/msoffice/bin/64/msoc.dll +0 -0
- data/vendor/msoffice/bin/64/msocsample.exe +0 -0
- data/vendor/msoffice/bin/64/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/bin/msoc.dll +0 -0
- data/vendor/msoffice/bin/msocsample.exe +0 -0
- data/vendor/msoffice/bin/msoffice-crypt.exe +0 -0
- data/vendor/msoffice/common.mk +71 -0
- data/vendor/msoffice/common.props +26 -0
- data/vendor/msoffice/debug.props +14 -0
- data/vendor/msoffice/include/attack.hpp +211 -0
- data/vendor/msoffice/include/cfb.hpp +777 -0
- data/vendor/msoffice/include/crypto_util.hpp +450 -0
- data/vendor/msoffice/include/custom_sha1.hpp +342 -0
- data/vendor/msoffice/include/decode.hpp +240 -0
- data/vendor/msoffice/include/encode.hpp +221 -0
- data/vendor/msoffice/include/make_dataspace.hpp +316 -0
- data/vendor/msoffice/include/msoc.h +129 -0
- data/vendor/msoffice/include/resource.hpp +7 -0
- data/vendor/msoffice/include/standard_encryption.hpp +145 -0
- data/vendor/msoffice/include/uint32vec.hpp +179 -0
- data/vendor/msoffice/include/util.hpp +212 -0
- data/vendor/msoffice/lib/.emptydir +0 -0
- data/vendor/msoffice/misc/decrypt-xls.vbs +46 -0
- data/vendor/msoffice/mk.bat +1 -0
- data/vendor/msoffice/mkdll.bat +3 -0
- data/vendor/msoffice/msoc.def +13 -0
- data/vendor/msoffice/msocsample.py +178 -0
- data/vendor/msoffice/msoffice12.sln +31 -0
- data/vendor/msoffice/readme.md +110 -0
- data/vendor/msoffice/release.props +28 -0
- data/vendor/msoffice/src/Makefile +19 -0
- data/vendor/msoffice/src/attack.cpp +124 -0
- data/vendor/msoffice/src/cfb_test.cpp +77 -0
- data/vendor/msoffice/src/minisample.c +54 -0
- data/vendor/msoffice/src/msocdll.cpp +276 -0
- data/vendor/msoffice/src/msocsample.c +136 -0
- data/vendor/msoffice/src/msoffice-crypt.cpp +219 -0
- data/vendor/msoffice/src/proj/attack/attack.vcxproj +88 -0
- data/vendor/msoffice/src/proj/main/msoffice-crypt.vcxproj +88 -0
- data/vendor/msoffice/src/sha1.cpp +234 -0
- data/vendor/msoffice/test/Makefile +20 -0
- data/vendor/msoffice/test/cfb_test.cpp +74 -0
- data/vendor/msoffice/test/hash_test.cpp +59 -0
- data/vendor/msoffice/test/proj/cfb/cfb_test.vcxproj +90 -0
- data/vendor/msoffice/test/proj/hash/hash_test.vcxproj +90 -0
- data/vendor/msoffice/test/sampl.bat +8 -0
- data/vendor/msoffice/test_all.py +46 -0
- data/vendor/update +4 -0
- metadata +351 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
#include <cybozu/exception.hpp>
|
2
|
+
#include <cybozu/string.hpp>
|
3
|
+
#include <cybozu/atoi.hpp>
|
4
|
+
#include <iostream>
|
5
|
+
#include <assert.h>
|
6
|
+
#include <cybozu/stacktrace.hpp>
|
7
|
+
|
8
|
+
struct MailException : cybozu::Exception {
|
9
|
+
MailException() : cybozu::Exception("mail") { }
|
10
|
+
};
|
11
|
+
|
12
|
+
void f2()
|
13
|
+
{
|
14
|
+
const char *msg = "HTTP/...";
|
15
|
+
std::string abc = "abc";
|
16
|
+
char c = 'x';
|
17
|
+
int port = 80;
|
18
|
+
unsigned int s = 90;
|
19
|
+
MailException e;
|
20
|
+
e << "can't send" << msg << abc << c << port << s << '\n';
|
21
|
+
cybozu::StackTrace st;
|
22
|
+
e << st;
|
23
|
+
throw e;
|
24
|
+
}
|
25
|
+
|
26
|
+
void f1()
|
27
|
+
{
|
28
|
+
f2();
|
29
|
+
}
|
30
|
+
|
31
|
+
void f0()
|
32
|
+
{
|
33
|
+
f1();
|
34
|
+
}
|
35
|
+
|
36
|
+
int main()
|
37
|
+
{
|
38
|
+
try {
|
39
|
+
f0();
|
40
|
+
} catch (cybozu::Exception &e) {
|
41
|
+
std::cout << "for user" << std::endl;
|
42
|
+
std::cout << e.toString() << std::endl;
|
43
|
+
} catch (...) {
|
44
|
+
std::cout << "Error!" << std::endl;
|
45
|
+
}
|
46
|
+
}
|
@@ -0,0 +1,231 @@
|
|
1
|
+
#include <fstream>
|
2
|
+
#include <cybozu/time.hpp>
|
3
|
+
#include <cybozu/fmindex.hpp>
|
4
|
+
#include <cybozu/mmap.hpp>
|
5
|
+
#include <cybozu/string.hpp>
|
6
|
+
#include <cybozu/hash.hpp>
|
7
|
+
#include <cybozu/benchmark.hpp>
|
8
|
+
#include <set>
|
9
|
+
|
10
|
+
#ifdef USE_UTF32
|
11
|
+
typedef cybozu::FMindexT<cybozu::Char> FMindex;
|
12
|
+
typedef cybozu::String String;
|
13
|
+
#else
|
14
|
+
typedef cybozu::FMindex FMindex;
|
15
|
+
typedef std::string String;
|
16
|
+
#endif
|
17
|
+
|
18
|
+
typedef std::set<int> Set;
|
19
|
+
|
20
|
+
void putSet(const Set& set)
|
21
|
+
{
|
22
|
+
for (Set::const_iterator i = set.begin(), ie = set.end(); i != ie; ++i) {
|
23
|
+
std::cout << *i << ' ';
|
24
|
+
}
|
25
|
+
std::cout << std::endl;
|
26
|
+
}
|
27
|
+
|
28
|
+
template<class STRING>
|
29
|
+
void simpleSearch(const std::string& inName, const std::string& queryFile, bool putHash)
|
30
|
+
{
|
31
|
+
cybozu::Mmap m(inName);
|
32
|
+
STRING text(m.get(), m.size());
|
33
|
+
|
34
|
+
double beginTime = cybozu::GetCurrentTimeSec();
|
35
|
+
|
36
|
+
std::ifstream qs(queryFile.c_str(), std::ios::binary);
|
37
|
+
STRING key;
|
38
|
+
uint64_t hash = 0;
|
39
|
+
while (qs >> key) {
|
40
|
+
if (!putHash) std::cout << "query " << key << std::endl;
|
41
|
+
size_t p = 0;
|
42
|
+
Set set;
|
43
|
+
for (;;) {
|
44
|
+
size_t q = text.find(key, p);
|
45
|
+
if (q == std::string::npos) break;
|
46
|
+
set.insert((int)q);
|
47
|
+
p = q + 1;
|
48
|
+
}
|
49
|
+
if (putHash) {
|
50
|
+
hash = cybozu::hash64(set.begin(), set.end(), hash);
|
51
|
+
} else {
|
52
|
+
putSet(set);
|
53
|
+
}
|
54
|
+
}
|
55
|
+
if (putHash) printf("hash=%llx\n", (long long)hash);
|
56
|
+
|
57
|
+
double endTime = cybozu::GetCurrentTimeSec();
|
58
|
+
fprintf(stderr, "time: %gsec\n", endTime - beginTime);
|
59
|
+
}
|
60
|
+
|
61
|
+
template<class FMINDEX, class STRING>
|
62
|
+
void recover(const std::string& inName, const std::string& outName)
|
63
|
+
{
|
64
|
+
std::ifstream is(inName.c_str(), std::ios::binary);
|
65
|
+
FMINDEX f;
|
66
|
+
f.load(is);
|
67
|
+
|
68
|
+
double beginTime = cybozu::GetCurrentTimeSec();
|
69
|
+
|
70
|
+
STRING str;
|
71
|
+
f.getPrevString(str, 0, f.wm.size() - 1);
|
72
|
+
double endTime = cybozu::GetCurrentTimeSec();
|
73
|
+
fprintf(stderr, "time: %gsec\n", endTime - beginTime);
|
74
|
+
std::ofstream os(outName.c_str(), std::ios::binary);
|
75
|
+
os << str;
|
76
|
+
}
|
77
|
+
|
78
|
+
template<class FMINDEX, class STRING>
|
79
|
+
void search(const std::string& inName, const std::string& queryFile, bool putHash, bool bench)
|
80
|
+
{
|
81
|
+
std::ifstream is(inName.c_str(), std::ios::binary);
|
82
|
+
FMINDEX f;
|
83
|
+
f.load(is);
|
84
|
+
|
85
|
+
double beginTime = cybozu::GetCurrentTimeSec();
|
86
|
+
|
87
|
+
std::ifstream qs(queryFile.c_str(), std::ios::binary);
|
88
|
+
STRING key;
|
89
|
+
uint64_t hash = 0;
|
90
|
+
cybozu::CpuClock clkRange;
|
91
|
+
cybozu::CpuClock clkPos;
|
92
|
+
while (qs >> key) {
|
93
|
+
if (!putHash) std::cout << "query " << key << std::endl;
|
94
|
+
size_t begin, end = 0;
|
95
|
+
if (bench) clkRange.begin();
|
96
|
+
bool found = f.getRange(&begin, &end, key);
|
97
|
+
if (bench) clkRange.end();
|
98
|
+
Set set;
|
99
|
+
if (found) {
|
100
|
+
while (begin != end) {
|
101
|
+
if (bench) clkPos.begin();
|
102
|
+
int pos = (int)f.convertPosition(begin);
|
103
|
+
if (bench) clkPos.end();
|
104
|
+
set.insert(pos);
|
105
|
+
begin++;
|
106
|
+
}
|
107
|
+
}
|
108
|
+
if (putHash) {
|
109
|
+
hash = cybozu::hash64(set.begin(), set.end(), hash);
|
110
|
+
} else {
|
111
|
+
putSet(set);
|
112
|
+
}
|
113
|
+
}
|
114
|
+
if (putHash) printf("hash=%llx\n", (long long)hash);
|
115
|
+
|
116
|
+
double endTime = cybozu::GetCurrentTimeSec();
|
117
|
+
fprintf(stderr, "time: %gsec\n", endTime - beginTime);
|
118
|
+
if (bench) {
|
119
|
+
int rangeNum = (int)clkRange.getCount();
|
120
|
+
int posNum = (int)clkPos.getCount();
|
121
|
+
fprintf(stderr, "getRange %.2f(%d) pos %.2f(%d)\n", clkRange.getClock() / double(rangeNum), rangeNum, clkPos.getClock() / double(posNum), posNum);
|
122
|
+
}
|
123
|
+
}
|
124
|
+
|
125
|
+
template<class FMINDEX, class STRING>
|
126
|
+
static void create(const std::string& inName, const std::string& outName, int skip)
|
127
|
+
{
|
128
|
+
fprintf(stderr, "inName=%s, outName=%s, skip=%d\n", inName.c_str(), outName.c_str(), skip);
|
129
|
+
|
130
|
+
double beginTime = cybozu::GetCurrentTimeSec();
|
131
|
+
|
132
|
+
cybozu::Mmap m(inName);
|
133
|
+
FMINDEX f;
|
134
|
+
STRING text(m.get(), m.get() + m.size());
|
135
|
+
f.init(text.begin(), text.end(), skip);
|
136
|
+
|
137
|
+
double endTime = cybozu::GetCurrentTimeSec();
|
138
|
+
fprintf(stderr, "create time %gsec\n", endTime - beginTime);
|
139
|
+
std::ofstream os(outName.c_str(), std::ios::binary);
|
140
|
+
f.save(os);
|
141
|
+
}
|
142
|
+
|
143
|
+
void usage()
|
144
|
+
{
|
145
|
+
printf("fmindex_smpl.exe (-c|-s|-r|-ss) file1 file2 [-skip skip][-hash][-time]\n");
|
146
|
+
printf(" -c : create index file\n");
|
147
|
+
printf(" file1 : any UTF-8 string file\n");
|
148
|
+
printf(" file2 : output index file\n");
|
149
|
+
printf(" -skip skip : skip to sampling(default 8)\n");
|
150
|
+
printf(" -hash : put position hash\n");
|
151
|
+
printf(" -time : benchmark\n");
|
152
|
+
printf(" -s : search mode\n");
|
153
|
+
printf(" file1 : index file\n");
|
154
|
+
printf(" file2 : query string file\n");
|
155
|
+
printf(" -r : recover mode\n");
|
156
|
+
printf(" file1 : index file\n");
|
157
|
+
printf(" file2 : org index file\n");
|
158
|
+
printf(" -ss: simple search\n");
|
159
|
+
printf(" file1 : any UTF-8 string file\n");
|
160
|
+
printf(" file2 : query string file\n");
|
161
|
+
exit(1);
|
162
|
+
}
|
163
|
+
|
164
|
+
int main(int argc, char* argv[])
|
165
|
+
try
|
166
|
+
{
|
167
|
+
argc--, argv++;
|
168
|
+
std::string fName1;
|
169
|
+
std::string fName2;
|
170
|
+
std::string mode;
|
171
|
+
int skip = 8;
|
172
|
+
bool putHash = false;
|
173
|
+
bool bench = false;
|
174
|
+
|
175
|
+
while (argc > 0) {
|
176
|
+
if (strcmp(*argv, "-c") == 0) {
|
177
|
+
mode = *argv;
|
178
|
+
} else
|
179
|
+
if (strcmp(*argv, "-s") == 0) {
|
180
|
+
mode = *argv;
|
181
|
+
} else
|
182
|
+
if (strcmp(*argv, "-r") == 0) {
|
183
|
+
mode = *argv;
|
184
|
+
} else
|
185
|
+
if (strcmp(*argv, "-ss") == 0) {
|
186
|
+
mode = *argv;
|
187
|
+
} else
|
188
|
+
if (argc > 1 && strcmp(*argv, "-skip") == 0) {
|
189
|
+
argc--, argv++;
|
190
|
+
skip = atoi(*argv);
|
191
|
+
} else
|
192
|
+
if (strcmp(*argv, "-hash") == 0) {
|
193
|
+
putHash = true;
|
194
|
+
} else
|
195
|
+
if (strcmp(*argv, "-time") == 0) {
|
196
|
+
bench = true;
|
197
|
+
} else
|
198
|
+
if (**argv != '-' && fName1.empty()) {
|
199
|
+
fName1 = *argv;
|
200
|
+
} else
|
201
|
+
if (**argv != '-' && fName2.empty()) {
|
202
|
+
fName2 = *argv;
|
203
|
+
} else
|
204
|
+
{
|
205
|
+
usage();
|
206
|
+
}
|
207
|
+
argc--, argv++;
|
208
|
+
}
|
209
|
+
if (fName1.empty() || fName2.empty() || mode.empty()) {
|
210
|
+
usage();
|
211
|
+
}
|
212
|
+
if (mode == "-c") {
|
213
|
+
create<FMindex, String>(fName1, fName2, skip);
|
214
|
+
} else
|
215
|
+
if (mode == "-s") {
|
216
|
+
search<FMindex, String>(fName1, fName2, putHash, bench);
|
217
|
+
} else
|
218
|
+
if (mode == "-r") {
|
219
|
+
recover<FMindex, String>(fName1, fName2);
|
220
|
+
} else
|
221
|
+
if (mode == "-ss") {
|
222
|
+
simpleSearch<String>(fName1, fName2, putHash);
|
223
|
+
} else
|
224
|
+
{
|
225
|
+
usage();
|
226
|
+
}
|
227
|
+
} catch (std::exception& e) {
|
228
|
+
printf("ERR %s\n", e.what());
|
229
|
+
return 1;
|
230
|
+
}
|
231
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include <cybozu/log.hpp>
|
3
|
+
|
4
|
+
int main()
|
5
|
+
{
|
6
|
+
cybozu::PutLog(cybozu::LogInfo, "this is a pen1");
|
7
|
+
cybozu::useSyslog(false);
|
8
|
+
cybozu::SetLogUseMsec();
|
9
|
+
cybozu::PutLog(cybozu::LogInfo, "this is a pen2");
|
10
|
+
cybozu::OpenLogFile("test.log");
|
11
|
+
cybozu::PutLog(cybozu::LogInfo, "this is a pen3");
|
12
|
+
cybozu::useSyslog(true);
|
13
|
+
cybozu::PutLog(cybozu::LogInfo, "this is a pen4");
|
14
|
+
|
15
|
+
cybozu::PutLog(cybozu::LogInfo, "AAtest");
|
16
|
+
cybozu::SetLogPriority(cybozu::LogInfo);
|
17
|
+
cybozu::PutLog(cybozu::LogInfo, "AAtest2");
|
18
|
+
cybozu::PutLog(cybozu::LogDebug, "not print");
|
19
|
+
}
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#include <vector>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <cybozu/nlp/mecab.hpp>
|
4
|
+
#include <cybozu/mmap.hpp>
|
5
|
+
|
6
|
+
int main(int argc, char *argv[])
|
7
|
+
{
|
8
|
+
argc--, argv++;
|
9
|
+
if (argc == 0) {
|
10
|
+
fprintf(stderr, "mecab_smpl filename\n");
|
11
|
+
return 1;
|
12
|
+
}
|
13
|
+
try {
|
14
|
+
const std::string fileName = argv[0];
|
15
|
+
cybozu::Mmap mmap(fileName);
|
16
|
+
if (mmap.size() > (1 << 30)) {
|
17
|
+
fprintf(stderr, "file is too large %lld\n", (long long)mmap.size());
|
18
|
+
return 1;
|
19
|
+
}
|
20
|
+
|
21
|
+
cybozu::nlp::Mecab mecab;
|
22
|
+
typedef std::vector<std::string> StrVec;
|
23
|
+
StrVec sv;
|
24
|
+
if (mecab.parse(sv, mmap.get(), (int)mmap.size())) {
|
25
|
+
for (size_t i = 0, n = sv.size(); i < n; i++) {
|
26
|
+
printf("%s ", sv[i].c_str());
|
27
|
+
}
|
28
|
+
printf("\n");
|
29
|
+
}
|
30
|
+
return 0;
|
31
|
+
} catch (std::exception& e) {
|
32
|
+
fprintf(stderr, "exception %s\n", e.what());
|
33
|
+
} catch (...) {
|
34
|
+
fprintf(stderr, "unknown exception\n");
|
35
|
+
}
|
36
|
+
return 1;
|
37
|
+
}
|
@@ -0,0 +1,68 @@
|
|
1
|
+
/*
|
2
|
+
how to use two step option parser
|
3
|
+
*/
|
4
|
+
#include <stdio.h>
|
5
|
+
#include <cybozu/option.hpp>
|
6
|
+
#include <vector>
|
7
|
+
|
8
|
+
struct Opt {
|
9
|
+
// common option
|
10
|
+
int x;
|
11
|
+
cybozu::Option opt1;
|
12
|
+
|
13
|
+
// cmd option
|
14
|
+
std::string cmd;
|
15
|
+
|
16
|
+
std::string init_s;
|
17
|
+
double run_d;
|
18
|
+
char status_c;
|
19
|
+
cybozu::Option opt2;
|
20
|
+
|
21
|
+
int parse1(int argc, char *argv[])
|
22
|
+
{
|
23
|
+
opt1.appendOpt(&x, 5, "x", " :value");
|
24
|
+
opt1.appendDelimiter("init");
|
25
|
+
opt1.appendDelimiter("run");
|
26
|
+
opt1.appendDelimiter("status");
|
27
|
+
opt1.appendHelp("h");
|
28
|
+
opt1.setUsage("option2 [opt] (init|run|status)", true);
|
29
|
+
|
30
|
+
if (!opt1.parse(argc, argv)) return false;
|
31
|
+
const int pos = opt1.getNextPositionOfDelimiter();
|
32
|
+
if (pos == 0) return 0;
|
33
|
+
cmd = argv[pos - 1];
|
34
|
+
if (cmd == "init") {
|
35
|
+
opt2.appendOpt(&init_s, "abc", "s", " :string");
|
36
|
+
} else if (cmd == "run") {
|
37
|
+
opt2.appendOpt(&run_d, 1.2, "d", " :double");
|
38
|
+
} else if (cmd == "status") {
|
39
|
+
opt2.appendOpt(&status_c, 'X', "c", " :char");
|
40
|
+
} else {
|
41
|
+
return 0;
|
42
|
+
}
|
43
|
+
opt2.appendHelp("h");
|
44
|
+
return pos;
|
45
|
+
}
|
46
|
+
void parse(int argc, char *argv[])
|
47
|
+
{
|
48
|
+
int pos = parse1(argc, argv);
|
49
|
+
if (pos == 0) {
|
50
|
+
opt1.usage();
|
51
|
+
exit(1);
|
52
|
+
}
|
53
|
+
if (!opt2.parse(argc, argv, pos)) {
|
54
|
+
opt2.usage();
|
55
|
+
exit(1);
|
56
|
+
}
|
57
|
+
puts("common");
|
58
|
+
opt1.put();
|
59
|
+
printf("opt for %s\n", cmd.c_str());
|
60
|
+
opt2.put();
|
61
|
+
}
|
62
|
+
};
|
63
|
+
|
64
|
+
int main(int argc, char *argv[])
|
65
|
+
{
|
66
|
+
Opt opt;
|
67
|
+
opt.parse(argc, argv);
|
68
|
+
}
|
@@ -0,0 +1,42 @@
|
|
1
|
+
/*
|
2
|
+
how to use
|
3
|
+
*/
|
4
|
+
#include <stdio.h>
|
5
|
+
#include <cybozu/option.hpp>
|
6
|
+
#include <vector>
|
7
|
+
|
8
|
+
int main(int argc, char *argv[])
|
9
|
+
try
|
10
|
+
{
|
11
|
+
int x;
|
12
|
+
bool b;
|
13
|
+
double d;
|
14
|
+
std::string y;
|
15
|
+
std::vector<int> z;
|
16
|
+
std::vector<std::string> w;
|
17
|
+
std::string inName;
|
18
|
+
std::vector<std::string> r;
|
19
|
+
std::vector<std::string> vi;
|
20
|
+
uint64_t u;
|
21
|
+
|
22
|
+
cybozu::Option opt;
|
23
|
+
|
24
|
+
opt.appendOpt(&x, 5, "x", "int");
|
25
|
+
opt.appendBoolOpt(&b, "b", "bool");
|
26
|
+
opt.appendMust(&d, "d", "double");
|
27
|
+
opt.appendMust(&y, "y", "string");
|
28
|
+
opt.appendVec(&z, "z", "int int int ...");
|
29
|
+
opt.appendVec(&w, "w", "str str str ...");
|
30
|
+
opt.appendOpt(&u, 0, "u", "uint64 val");
|
31
|
+
opt.appendParam(&inName, "input-file", "text file");
|
32
|
+
opt.appendParamVec(&vi, "remains", "sss");
|
33
|
+
opt.appendHelp("h");
|
34
|
+
|
35
|
+
if (opt.parse(argc, argv)) {
|
36
|
+
opt.put();
|
37
|
+
} else {
|
38
|
+
opt.usage();
|
39
|
+
}
|
40
|
+
} catch (std::exception& e) {
|
41
|
+
printf("ERR %s\n", e.what());
|
42
|
+
}
|
@@ -0,0 +1,207 @@
|
|
1
|
+
/**
|
2
|
+
pLSI(probabilistic latent semantic indexing)
|
3
|
+
@author MITSUNARI Shigeo(@herumi)
|
4
|
+
*/
|
5
|
+
|
6
|
+
#include <stdio.h>
|
7
|
+
#include <map>
|
8
|
+
#include <cybozu/file.hpp>
|
9
|
+
#include <cybozu/csv.hpp>
|
10
|
+
#include <cybozu/nlp/plsi.hpp>
|
11
|
+
#include <cybozu/string_operation.hpp>
|
12
|
+
#include <cybozu/time.hpp>
|
13
|
+
#include <iostream>
|
14
|
+
|
15
|
+
void load(cybozu::nlp::Plsi& plsi, const std::string& filepath)
|
16
|
+
{
|
17
|
+
cybozu::CsvReader csv(filepath, ' ');
|
18
|
+
std::vector<std::string> line;
|
19
|
+
while (csv.read(line)) {
|
20
|
+
cybozu::nlp::Plsi::ITEM_TYPE item_key = cybozu::atoi(line[0]);
|
21
|
+
size_t size = line.size();
|
22
|
+
if (size < 2) continue;
|
23
|
+
std::map<size_t, bool> map;
|
24
|
+
for (size_t i = 1; i < line.size(); ++i) {
|
25
|
+
cybozu::nlp::Plsi::USER_TYPE user_key = cybozu::atoi(line[i]);
|
26
|
+
map[plsi.get_user_id(user_key)] = true;
|
27
|
+
}
|
28
|
+
plsi.getItem(item_key).set(map);
|
29
|
+
}
|
30
|
+
}
|
31
|
+
|
32
|
+
void usage()
|
33
|
+
{
|
34
|
+
printf("usage: plsi [option] -f [dataset filename]\n");
|
35
|
+
printf(" -k [num] : # of latent classes");
|
36
|
+
printf(" -i [num] : # of iterations");
|
37
|
+
exit(1);
|
38
|
+
}
|
39
|
+
|
40
|
+
/**
|
41
|
+
@brief Atnd Data
|
42
|
+
*/
|
43
|
+
struct AtndData {
|
44
|
+
std::string date; // for only event
|
45
|
+
std::string name; // user or event name
|
46
|
+
};
|
47
|
+
|
48
|
+
/**
|
49
|
+
@brief Atnd Information (Users / Events)
|
50
|
+
*/
|
51
|
+
struct AtndInfo {
|
52
|
+
typedef std::map<int, AtndData> Int2Data;
|
53
|
+
typedef std::map<std::string, int> Str2Int;
|
54
|
+
Int2Data int2data_;
|
55
|
+
Str2Int name2id_;
|
56
|
+
/**
|
57
|
+
@brief load list of Atnd Users / Events
|
58
|
+
@param[in] name filename of list
|
59
|
+
@param[in] isEvent Is it an event list?
|
60
|
+
*/
|
61
|
+
bool loadList(const std::string& name, bool isEvent)
|
62
|
+
{
|
63
|
+
std::ifstream ifs(name.c_str(), std::ios::binary);
|
64
|
+
if (!ifs) return false;
|
65
|
+
for (;;) {
|
66
|
+
AtndData t;
|
67
|
+
int id;
|
68
|
+
if (!(ifs >> id)) break;
|
69
|
+
if (isEvent) {
|
70
|
+
std::string str;
|
71
|
+
ifs >> str;
|
72
|
+
if (str.empty()) return false;
|
73
|
+
if (str.size() < 6) {
|
74
|
+
fprintf(stderr, "bad format %s\n", str.c_str());
|
75
|
+
return false;
|
76
|
+
}
|
77
|
+
str = str.substr(0, str.size() - 6); // "+09:00"
|
78
|
+
cybozu::Time time(str);
|
79
|
+
time.setTime(time.getTime() + 9 * 3600);
|
80
|
+
time.toString(t.date, "%Y/%m/%d", false);
|
81
|
+
}
|
82
|
+
std::getline(ifs, t.name);
|
83
|
+
cybozu::Trim(t.name);
|
84
|
+
if (!ifs) break;
|
85
|
+
int2data_[id] = t;
|
86
|
+
name2id_[t.name] = id;
|
87
|
+
}
|
88
|
+
return true;
|
89
|
+
}
|
90
|
+
/**
|
91
|
+
@brief load list of Atnd Users / Events. (generates filename from isEvent parameter)
|
92
|
+
@param[in] dir directory name where list exists
|
93
|
+
@param[in] isEvent Is it an event list?
|
94
|
+
*/
|
95
|
+
bool load(const std::string& dir, bool isEvent)
|
96
|
+
{
|
97
|
+
const std::string key = isEvent ? "event" : "user";
|
98
|
+
std::string name;
|
99
|
+
name = dir + "/atnd-" + key + ".txt";
|
100
|
+
if (!loadList(name, isEvent)) {
|
101
|
+
fprintf(stderr, "can't read %s (%d)\n", name.c_str(), isEvent);
|
102
|
+
return false;
|
103
|
+
}
|
104
|
+
return true;
|
105
|
+
}
|
106
|
+
};
|
107
|
+
|
108
|
+
int main(int argc, char** argv)
|
109
|
+
{
|
110
|
+
std::string data_dir = cybozu::GetExePath() + "../sample/data/plsi/";
|
111
|
+
|
112
|
+
int K = 20;
|
113
|
+
int Iter = 100;
|
114
|
+
argc--, argv++;
|
115
|
+
while (argc > 0) {
|
116
|
+
if (argc > 1 && strcmp(*argv, "-d") == 0) {
|
117
|
+
argc--, argv++;
|
118
|
+
data_dir = *argv;
|
119
|
+
} else if (argc > 1 && strcmp(*argv, "-k") == 0) {
|
120
|
+
argc--, argv++;
|
121
|
+
K = cybozu::atoi(*argv);
|
122
|
+
} else if (argc > 1 && strcmp(*argv, "-i") == 0) {
|
123
|
+
argc--, argv++;
|
124
|
+
Iter = cybozu::atoi(*argv);
|
125
|
+
} else {
|
126
|
+
usage();
|
127
|
+
}
|
128
|
+
argc--, argv++;
|
129
|
+
}
|
130
|
+
const std::string name = data_dir + "/atnd-user-matrix.txt";
|
131
|
+
|
132
|
+
cybozu::nlp::Plsi plsi;
|
133
|
+
try {
|
134
|
+
AtndInfo event_master, user_master;
|
135
|
+
event_master.load(data_dir, true);
|
136
|
+
user_master.load(data_dir, false);
|
137
|
+
|
138
|
+
load(plsi, name);
|
139
|
+
plsi.startLearning(K);
|
140
|
+
{
|
141
|
+
puts("learning");
|
142
|
+
double pre_likelihood = -1e30;
|
143
|
+
double beta = 1;
|
144
|
+
for (int i = 0; i < Iter; ++i) {
|
145
|
+
double likelihood = plsi.step();
|
146
|
+
printf("%d : %.3f %.3f %.3f\n", i, beta, likelihood, likelihood - pre_likelihood);
|
147
|
+
if (likelihood - pre_likelihood < 1) {
|
148
|
+
beta *= 0.9;
|
149
|
+
if (beta < 0.01) break;
|
150
|
+
}
|
151
|
+
pre_likelihood = likelihood;
|
152
|
+
}
|
153
|
+
}
|
154
|
+
|
155
|
+
int mode = 0;
|
156
|
+
cybozu::nlp::Plsi::SEARCH_TYPE search_type = cybozu::nlp::Plsi::JOINT;
|
157
|
+
|
158
|
+
for(;;) {
|
159
|
+
std::string st;
|
160
|
+
std::cin >> st;
|
161
|
+
if (st == "") break;
|
162
|
+
if (st == "ui") {
|
163
|
+
mode = 0;
|
164
|
+
printf("user => items\n");
|
165
|
+
continue;
|
166
|
+
}
|
167
|
+
if (st == "ii") {
|
168
|
+
mode = 1;
|
169
|
+
printf("item => items\n");
|
170
|
+
continue;
|
171
|
+
}
|
172
|
+
if (st == "sj") {
|
173
|
+
search_type = cybozu::nlp::Plsi::JOINT;
|
174
|
+
printf("search type: JOINT probability\n");
|
175
|
+
continue;
|
176
|
+
}
|
177
|
+
if (st == "sc") {
|
178
|
+
search_type = cybozu::nlp::Plsi::CONDITIONAL;
|
179
|
+
printf("search type: CONDITIONAL probability\n");
|
180
|
+
continue;
|
181
|
+
}
|
182
|
+
if (st == "sp") {
|
183
|
+
search_type = cybozu::nlp::Plsi::POSTERIOR;
|
184
|
+
printf("search type: POSTERIOR probability\n");
|
185
|
+
continue;
|
186
|
+
}
|
187
|
+
|
188
|
+
cybozu::nlp::TopScore<size_t>::Table tbl;
|
189
|
+
switch(mode) {
|
190
|
+
case 0:
|
191
|
+
tbl = plsi.search_items(cybozu::atoi(st), 10);
|
192
|
+
break;
|
193
|
+
case 1:
|
194
|
+
tbl = plsi.similar_items(cybozu::atoi(st), search_type, 10);
|
195
|
+
break;
|
196
|
+
}
|
197
|
+
|
198
|
+
for (size_t i = 0; i < tbl.size(); i++) {
|
199
|
+
cybozu::nlp::Plsi::ITEM_TYPE key = plsi.get_item_key(tbl[i].idx);
|
200
|
+
printf("%1.3f %d:%s\n", log(tbl[i].score), key, event_master.int2data_[key].name.c_str());
|
201
|
+
}
|
202
|
+
}
|
203
|
+
|
204
|
+
} catch (std::exception& e) {
|
205
|
+
printf("error : %s\n", e.what());
|
206
|
+
}
|
207
|
+
}
|