cppjieba_rb 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +26 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +81 -0
  8. data/Rakefile +20 -0
  9. data/cppjieba_rb.gemspec +50 -0
  10. data/ext/cppjieba/.gitignore +17 -0
  11. data/ext/cppjieba/.travis.yml +22 -0
  12. data/ext/cppjieba/CMakeLists.txt +28 -0
  13. data/ext/cppjieba/ChangeLog.md +236 -0
  14. data/ext/cppjieba/README.md +285 -0
  15. data/ext/cppjieba/README_EN.md +111 -0
  16. data/ext/cppjieba/appveyor.yml +32 -0
  17. data/ext/cppjieba/deps/CMakeLists.txt +1 -0
  18. data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
  28. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
  29. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
  41. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
  42. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
  43. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  44. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
  45. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
  46. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  47. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
  48. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
  49. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
  50. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
  51. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
  52. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
  53. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
  54. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
  55. data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
  56. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
  57. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
  58. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
  59. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
  60. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
  61. data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
  62. data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
  63. data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
  64. data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
  65. data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
  66. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
  67. data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
  68. data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
  69. data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
  70. data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
  71. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
  72. data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
  73. data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
  74. data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
  75. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
  76. data/ext/cppjieba/dict/README.md +31 -0
  77. data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
  78. data/ext/cppjieba/dict/idf.utf8 +258826 -0
  79. data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
  80. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
  81. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
  82. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
  83. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
  84. data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
  85. data/ext/cppjieba/dict/user.dict.utf8 +4 -0
  86. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
  87. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
  88. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
  89. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
  90. data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
  91. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
  92. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
  93. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
  94. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
  95. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
  96. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
  97. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
  98. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
  99. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
  100. data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
  101. data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
  102. data/ext/cppjieba/test/CMakeLists.txt +5 -0
  103. data/ext/cppjieba/test/demo.cpp +80 -0
  104. data/ext/cppjieba/test/load_test.cpp +54 -0
  105. data/ext/cppjieba/test/testdata/curl.res +1 -0
  106. data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
  107. data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
  108. data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
  109. data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
  110. data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
  111. data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
  112. data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
  113. data/ext/cppjieba/test/testdata/load_test.urls +2 -0
  114. data/ext/cppjieba/test/testdata/review.100 +100 -0
  115. data/ext/cppjieba/test/testdata/review.100.res +200 -0
  116. data/ext/cppjieba/test/testdata/server.conf +19 -0
  117. data/ext/cppjieba/test/testdata/testlines.gbk +9 -0
  118. data/ext/cppjieba/test/testdata/testlines.utf8 +8 -0
  119. data/ext/cppjieba/test/testdata/userdict.2.utf8 +1 -0
  120. data/ext/cppjieba/test/testdata/userdict.english +2 -0
  121. data/ext/cppjieba/test/testdata/userdict.utf8 +8 -0
  122. data/ext/cppjieba/test/testdata/weicheng.utf8 +247 -0
  123. data/ext/cppjieba/test/unittest/CMakeLists.txt +24 -0
  124. data/ext/cppjieba/test/unittest/gtest_main.cpp +39 -0
  125. data/ext/cppjieba/test/unittest/jieba_test.cpp +133 -0
  126. data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
  127. data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
  128. data/ext/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
  129. data/ext/cppjieba/test/unittest/segments_test.cpp +256 -0
  130. data/ext/cppjieba/test/unittest/textrank_test.cpp +86 -0
  131. data/ext/cppjieba/test/unittest/trie_test.cpp +177 -0
  132. data/ext/cppjieba/test/unittest/unicode_test.cpp +43 -0
  133. data/ext/cppjieba_rb/cppjieba_rb.c +10 -0
  134. data/ext/cppjieba_rb/extconf.rb +26 -0
  135. data/ext/cppjieba_rb/internal.cc +148 -0
  136. data/lib/cppjieba_rb/segment.rb +20 -0
  137. data/lib/cppjieba_rb/version.rb +3 -0
  138. data/lib/cppjieba_rb.rb +34 -0
  139. data/test/test_keyword.rb +17 -0
  140. data/test/test_segment.rb +24 -0
  141. data/test/test_tagging.rb +19 -0
  142. metadata +244 -0
@@ -0,0 +1,74 @@
1
+ #ifndef LIMONP_FILELOCK_HPP
2
+ #define LIMONP_FILELOCK_HPP
3
+
4
+ #include <unistd.h>
5
+ #include <stdlib.h>
6
+ #include <stdio.h>
7
+ #include <fcntl.h>
8
+ #include <errno.h>
9
+ #include <string>
10
+ #include <string.h>
11
+ #include <assert.h>
12
+
13
+ namespace limonp {
14
+
15
+ using std::string;
16
+
17
+ class FileLock {
18
+ public:
19
+ FileLock() : fd_(-1), ok_(true) {
20
+ }
21
+ ~FileLock() {
22
+ if(fd_ > 0) {
23
+ Close();
24
+ }
25
+ }
26
+ void Open(const string& fname) {
27
+ assert(fd_ == -1);
28
+ fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
29
+ if(fd_ < 0) {
30
+ ok_ = false;
31
+ err_ = strerror(errno);
32
+ }
33
+ }
34
+ void Close() {
35
+ ::close(fd_);
36
+ }
37
+ void Lock() {
38
+ if(LockOrUnlock(fd_, true) < 0) {
39
+ ok_ = false;
40
+ err_ = strerror(errno);
41
+ }
42
+ }
43
+ void UnLock() {
44
+ if(LockOrUnlock(fd_, false) < 0) {
45
+ ok_ = false;
46
+ err_ = strerror(errno);
47
+ }
48
+ }
49
+ bool Ok() const {
50
+ return ok_;
51
+ }
52
+ string Error() const {
53
+ return err_;
54
+ }
55
+ private:
56
+ static int LockOrUnlock(int fd, bool lock) {
57
+ errno = 0;
58
+ struct flock f;
59
+ memset(&f, 0, sizeof(f));
60
+ f.l_type = (lock ? F_WRLCK : F_UNLCK);
61
+ f.l_whence = SEEK_SET;
62
+ f.l_start = 0;
63
+ f.l_len = 0; // Lock/unlock entire file
64
+ return fcntl(fd, F_SETLK, &f);
65
+ }
66
+
67
+ int fd_;
68
+ bool ok_;
69
+ string err_;
70
+ }; // class FileLock
71
+
72
+ }// namespace limonp
73
+
74
+ #endif // LIMONP_FILELOCK_HPP
@@ -0,0 +1,7 @@
1
+ #ifndef LIMONP_FORCE_PUBLIC_H
2
+ #define LIMONP_FORCE_PUBLIC_H
3
+
4
+ #define private public
5
+ #define protected public
6
+
7
+ #endif // LIMONP_FORCE_PUBLIC_H
@@ -0,0 +1,139 @@
1
+ #ifndef LIMONP_LOCAL_VECTOR_HPP
2
+ #define LIMONP_LOCAL_VECTOR_HPP
3
+
4
+ #include <iostream>
5
+ #include <stdlib.h>
6
+ #include <assert.h>
7
+ #include <string.h>
8
+
9
+ namespace limonp {
10
+ using namespace std;
11
+ /*
12
+ * LocalVector<T> : T must be primitive type (char , int, size_t), if T is struct or class, LocalVector<T> may be dangerous..
13
+ * LocalVector<T> is simple and not well-tested.
14
+ */
15
+ const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
16
+ template <class T>
17
+ class LocalVector {
18
+ public:
19
+ typedef const T* const_iterator ;
20
+ typedef T value_type;
21
+ typedef size_t size_type;
22
+ private:
23
+ T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
24
+ T * ptr_;
25
+ size_t size_;
26
+ size_t capacity_;
27
+ public:
28
+ LocalVector() {
29
+ init_();
30
+ };
31
+ LocalVector(const LocalVector<T>& vec) {
32
+ init_();
33
+ *this = vec;
34
+ }
35
+ LocalVector(const_iterator begin, const_iterator end) { // TODO: make it faster
36
+ init_();
37
+ while(begin != end) {
38
+ push_back(*begin++);
39
+ }
40
+ }
41
+ LocalVector(size_t size, const T& t) { // TODO: make it faster
42
+ init_();
43
+ while(size--) {
44
+ push_back(t);
45
+ }
46
+ }
47
+ ~LocalVector() {
48
+ if(ptr_ != buffer_) {
49
+ free(ptr_);
50
+ }
51
+ };
52
+ public:
53
+ LocalVector<T>& operator = (const LocalVector<T>& vec) {
54
+ clear();
55
+ size_ = vec.size();
56
+ capacity_ = vec.capacity();
57
+ if(vec.buffer_ == vec.ptr_) {
58
+ memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
59
+ ptr_ = buffer_;
60
+ } else {
61
+ ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
62
+ assert(ptr_);
63
+ memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
64
+ }
65
+ return *this;
66
+ }
67
+ private:
68
+ void init_() {
69
+ ptr_ = buffer_;
70
+ size_ = 0;
71
+ capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
72
+ }
73
+ public:
74
+ T& operator [] (size_t i) {
75
+ return ptr_[i];
76
+ }
77
+ const T& operator [] (size_t i) const {
78
+ return ptr_[i];
79
+ }
80
+ void push_back(const T& t) {
81
+ if(size_ == capacity_) {
82
+ assert(capacity_);
83
+ reserve(capacity_ * 2);
84
+ }
85
+ ptr_[size_ ++ ] = t;
86
+ }
87
+ void reserve(size_t size) {
88
+ if(size <= capacity_) {
89
+ return;
90
+ }
91
+ T * next = (T*)malloc(sizeof(T) * size);
92
+ assert(next);
93
+ T * old = ptr_;
94
+ ptr_ = next;
95
+ memcpy(ptr_, old, sizeof(T) * capacity_);
96
+ capacity_ = size;
97
+ if(old != buffer_) {
98
+ free(old);
99
+ }
100
+ }
101
+ bool empty() const {
102
+ return 0 == size();
103
+ }
104
+ size_t size() const {
105
+ return size_;
106
+ }
107
+ size_t capacity() const {
108
+ return capacity_;
109
+ }
110
+ const_iterator begin() const {
111
+ return ptr_;
112
+ }
113
+ const_iterator end() const {
114
+ return ptr_ + size_;
115
+ }
116
+ void clear() {
117
+ if(ptr_ != buffer_) {
118
+ free(ptr_);
119
+ }
120
+ init_();
121
+ }
122
+ };
123
+
124
+ template <class T>
125
+ ostream & operator << (ostream& os, const LocalVector<T>& vec) {
126
+ if(vec.empty()) {
127
+ return os << "[]";
128
+ }
129
+ os<<"[\""<<vec[0];
130
+ for(size_t i = 1; i < vec.size(); i++) {
131
+ os<<"\", \""<<vec[i];
132
+ }
133
+ os<<"\"]";
134
+ return os;
135
+ }
136
+
137
+ }
138
+
139
+ #endif
@@ -0,0 +1,76 @@
1
+ #ifndef LIMONP_LOGGING_HPP
2
+ #define LIMONP_LOGGING_HPP
3
+
4
+ #include <sstream>
5
+ #include <iostream>
6
+ #include <cassert>
7
+ #include <cstdlib>
8
+ #include <ctime>
9
+
10
+ #ifdef XLOG
11
+ #error "XLOG has been defined already"
12
+ #endif // XLOG
13
+ #ifdef XCHECK
14
+ #error "XCHECK has been defined already"
15
+ #endif // XCHECK
16
+
17
+ #define XLOG(level) limonp::Logger(limonp::LL_##level, __FILE__, __LINE__).Stream()
18
+ #define XCHECK(exp) if(!(exp)) XLOG(FATAL) << "exp: ["#exp << "] false. "
19
+
20
+ namespace limonp {
21
+
22
+ enum {
23
+ LL_DEBUG = 0,
24
+ LL_INFO = 1,
25
+ LL_WARNING = 2,
26
+ LL_ERROR = 3,
27
+ LL_FATAL = 4,
28
+ }; // enum
29
+
30
+ static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"};
31
+ static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S";
32
+
33
+ class Logger {
34
+ public:
35
+ Logger(size_t level, const char* filename, int lineno)
36
+ : level_(level) {
37
+ #ifdef LOGGING_LEVEL
38
+ if (level_ < LOGGING_LEVEL) {
39
+ return;
40
+ }
41
+ #endif
42
+ assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY));
43
+ char buf[32];
44
+ time_t now;
45
+ time(&now);
46
+ strftime(buf, sizeof(buf), LOG_TIME_FORMAT, localtime(&now));
47
+ stream_ << buf
48
+ << " " << filename
49
+ << ":" << lineno
50
+ << " " << LOG_LEVEL_ARRAY[level_]
51
+ << " ";
52
+ }
53
+ ~Logger() {
54
+ #ifdef LOGGING_LEVEL
55
+ if (level_ < LOGGING_LEVEL) {
56
+ return;
57
+ }
58
+ #endif
59
+ std::cerr << stream_.str() << std::endl;
60
+ if (level_ == LL_FATAL) {
61
+ abort();
62
+ }
63
+ }
64
+
65
+ std::ostream& Stream() {
66
+ return stream_;
67
+ }
68
+
69
+ private:
70
+ std::ostringstream stream_;
71
+ size_t level_;
72
+ }; // class Logger
73
+
74
+ } // namespace limonp
75
+
76
+ #endif // LIMONP_LOGGING_HPP
@@ -0,0 +1,411 @@
1
+ #ifndef __MD5_H__
2
+ #define __MD5_H__
3
+
4
+ // Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
5
+ // rights reserved.
6
+
7
+ // License to copy and use this software is granted provided that it
8
+ // is identified as the "RSA Data Security, Inc. MD5 Message-Digest
9
+ // Algorithm" in all material mentioning or referencing this software
10
+ // or this function.
11
+ //
12
+ // License is also granted to make and use derivative works provided
13
+ // that such works are identified as "derived from the RSA Data
14
+ // Security, Inc. MD5 Message-Digest Algorithm" in all material
15
+ // mentioning or referencing the derived work.
16
+ //
17
+ // RSA Data Security, Inc. makes no representations concerning either
18
+ // the merchantability of this software or the suitability of this
19
+ // software for any particular purpose. It is provided "as is"
20
+ // without express or implied warranty of any kind.
21
+ //
22
+ // These notices must be retained in any copies of any part of this
23
+ // documentation and/or software.
24
+
25
+
26
+
27
+ // The original md5 implementation avoids external libraries.
28
+ // This version has dependency on stdio.h for file input and
29
+ // string.h for memcpy.
30
+ #include <cstdio>
31
+ #include <cstring>
32
+ #include <iostream>
33
+
34
+ namespace limonp {
35
+
36
+ //#pragma region MD5 defines
37
+ // Constants for MD5Transform routine.
38
+ #define S11 7
39
+ #define S12 12
40
+ #define S13 17
41
+ #define S14 22
42
+ #define S21 5
43
+ #define S22 9
44
+ #define S23 14
45
+ #define S24 20
46
+ #define S31 4
47
+ #define S32 11
48
+ #define S33 16
49
+ #define S34 23
50
+ #define S41 6
51
+ #define S42 10
52
+ #define S43 15
53
+ #define S44 21
54
+
55
+
56
+ // F, G, H and I are basic MD5 functions.
57
+ #define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
58
+ #define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
59
+ #define H(x, y, z) ((x) ^ (y) ^ (z))
60
+ #define I(x, y, z) ((y) ^ ((x) | (~z)))
61
+
62
+ // ROTATE_LEFT rotates x left n bits.
63
+ #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
64
+
65
+ // FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
66
+ // Rotation is separate from addition to prevent recomputation.
67
+ #define FF(a, b, c, d, x, s, ac) { \
68
+ (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
69
+ (a) = ROTATE_LEFT ((a), (s)); \
70
+ (a) += (b); \
71
+ }
72
+ #define GG(a, b, c, d, x, s, ac) { \
73
+ (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
74
+ (a) = ROTATE_LEFT ((a), (s)); \
75
+ (a) += (b); \
76
+ }
77
+ #define HH(a, b, c, d, x, s, ac) { \
78
+ (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
79
+ (a) = ROTATE_LEFT ((a), (s)); \
80
+ (a) += (b); \
81
+ }
82
+ #define II(a, b, c, d, x, s, ac) { \
83
+ (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
84
+ (a) = ROTATE_LEFT ((a), (s)); \
85
+ (a) += (b); \
86
+ }
87
+ //#pragma endregion
88
+
89
+
90
+ typedef unsigned char BYTE ;
91
+
92
+ // POINTER defines a generic pointer type
93
+ typedef unsigned char *POINTER;
94
+
95
+ // UINT2 defines a two byte word
96
+ typedef unsigned short int UINT2;
97
+
98
+ // UINT4 defines a four byte word
99
+ typedef unsigned int UINT4;
100
+
101
+ static unsigned char PADDING[64] = {
102
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
103
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
105
+ };
106
+ // convenient object that wraps
107
+ // the C-functions for use in C++ only
108
+ class MD5 {
109
+ private:
110
+ struct __context_t {
111
+ UINT4 state[4]; /* state (ABCD) */
112
+ UINT4 count[2]; /* number of bits, modulo 2^64 (lsb first) */
113
+ unsigned char buffer[64]; /* input buffer */
114
+ } context ;
115
+
116
+ //#pragma region static helper functions
117
+ // The core of the MD5 algorithm is here.
118
+ // MD5 basic transformation. Transforms state based on block.
119
+ static void MD5Transform( UINT4 state[4], unsigned char block[64] ) {
120
+ UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
121
+
122
+ Decode (x, block, 64);
123
+
124
+ /* Round 1 */
125
+ FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
126
+ FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
127
+ FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
128
+ FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
129
+ FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
130
+ FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
131
+ FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
132
+ FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
133
+ FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
134
+ FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
135
+ FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
136
+ FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
137
+ FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
138
+ FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
139
+ FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
140
+ FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
141
+
142
+ /* Round 2 */
143
+ GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
144
+ GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
145
+ GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
146
+ GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
147
+ GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
148
+ GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
149
+ GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
150
+ GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
151
+ GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
152
+ GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
153
+ GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
154
+ GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
155
+ GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
156
+ GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
157
+ GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
158
+ GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
159
+
160
+ /* Round 3 */
161
+ HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
162
+ HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
163
+ HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
164
+ HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
165
+ HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
166
+ HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
167
+ HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
168
+ HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
169
+ HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
170
+ HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
171
+ HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
172
+ HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
173
+ HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
174
+ HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
175
+ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
176
+ HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
177
+
178
+ /* Round 4 */
179
+ II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
180
+ II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
181
+ II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
182
+ II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
183
+ II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
184
+ II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
185
+ II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
186
+ II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
187
+ II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
188
+ II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
189
+ II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
190
+ II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
191
+ II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
192
+ II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
193
+ II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
194
+ II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
195
+
196
+ state[0] += a;
197
+ state[1] += b;
198
+ state[2] += c;
199
+ state[3] += d;
200
+
201
+ // Zeroize sensitive information.
202
+ memset((POINTER)x, 0, sizeof (x));
203
+ }
204
+
205
+ // Encodes input (UINT4) into output (unsigned char). Assumes len is
206
+ // a multiple of 4.
207
+ static void Encode( unsigned char *output, UINT4 *input, unsigned int len ) {
208
+ unsigned int i, j;
209
+
210
+ for (i = 0, j = 0; j < len; i++, j += 4) {
211
+ output[j] = (unsigned char)(input[i] & 0xff);
212
+ output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
213
+ output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
214
+ output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
215
+ }
216
+ }
217
+
218
+ // Decodes input (unsigned char) into output (UINT4). Assumes len is
219
+ // a multiple of 4.
220
+ static void Decode( UINT4 *output, unsigned char *input, unsigned int len ) {
221
+ unsigned int i, j;
222
+
223
+ for (i = 0, j = 0; j < len; i++, j += 4)
224
+ output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
225
+ (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
226
+ }
227
+ //#pragma endregion
228
+
229
+
230
+ public:
231
+ // MAIN FUNCTIONS
232
+ MD5() {
233
+ Init() ;
234
+ }
235
+
236
+ // MD5 initialization. Begins an MD5 operation, writing a new context.
237
+ void Init() {
238
+ context.count[0] = context.count[1] = 0;
239
+
240
+ // Load magic initialization constants.
241
+ context.state[0] = 0x67452301;
242
+ context.state[1] = 0xefcdab89;
243
+ context.state[2] = 0x98badcfe;
244
+ context.state[3] = 0x10325476;
245
+ }
246
+
247
+ // MD5 block update operation. Continues an MD5 message-digest
248
+ // operation, processing another message block, and updating the
249
+ // context.
250
+ void Update(
251
+ unsigned char *input, // input block
252
+ unsigned int inputLen ) { // length of input block
253
+ unsigned int i, index, partLen;
254
+
255
+ // Compute number of bytes mod 64
256
+ index = (unsigned int)((context.count[0] >> 3) & 0x3F);
257
+
258
+ // Update number of bits
259
+ if ((context.count[0] += ((UINT4)inputLen << 3))
260
+ < ((UINT4)inputLen << 3))
261
+ context.count[1]++;
262
+ context.count[1] += ((UINT4)inputLen >> 29);
263
+
264
+ partLen = 64 - index;
265
+
266
+ // Transform as many times as possible.
267
+ if (inputLen >= partLen) {
268
+ memcpy((POINTER)&context.buffer[index], (POINTER)input, partLen);
269
+ MD5Transform (context.state, context.buffer);
270
+
271
+ for (i = partLen; i + 63 < inputLen; i += 64)
272
+ MD5Transform (context.state, &input[i]);
273
+
274
+ index = 0;
275
+ } else
276
+ i = 0;
277
+
278
+ /* Buffer remaining input */
279
+ memcpy((POINTER)&context.buffer[index], (POINTER)&input[i], inputLen-i);
280
+ }
281
+
282
+ // MD5 finalization. Ends an MD5 message-digest operation, writing the
283
+ // the message digest and zeroizing the context.
284
+ // Writes to digestRaw
285
+ void Final() {
286
+ unsigned char bits[8];
287
+ unsigned int index, padLen;
288
+
289
+ // Save number of bits
290
+ Encode( bits, context.count, 8 );
291
+
292
+ // Pad out to 56 mod 64.
293
+ index = (unsigned int)((context.count[0] >> 3) & 0x3f);
294
+ padLen = (index < 56) ? (56 - index) : (120 - index);
295
+ Update( PADDING, padLen );
296
+
297
+ // Append length (before padding)
298
+ Update( bits, 8 );
299
+
300
+ // Store state in digest
301
+ Encode( digestRaw, context.state, 16);
302
+
303
+ // Zeroize sensitive information.
304
+ memset((POINTER)&context, 0, sizeof (context));
305
+
306
+ writeToString() ;
307
+ }
308
+
309
+ /// Buffer must be 32+1 (nul) = 33 chars long at least
310
+ void writeToString() {
311
+ int pos ;
312
+
313
+ for( pos = 0 ; pos < 16 ; pos++ )
314
+ sprintf( digestChars+(pos*2), "%02x", digestRaw[pos] ) ;
315
+ }
316
+
317
+
318
+ public:
319
+ // an MD5 digest is a 16-byte number (32 hex digits)
320
+ BYTE digestRaw[ 16 ] ;
321
+
322
+ // This version of the digest is actually
323
+ // a "printf'd" version of the digest.
324
+ char digestChars[ 33 ] ;
325
+
326
+ /// Load a file from disk and digest it
327
+ // Digests a file and returns the result.
328
+ const char* digestFile( const char *filename ) {
329
+ if (NULL == filename || strcmp(filename, "") == 0)
330
+ return NULL;
331
+
332
+ Init() ;
333
+
334
+ FILE *file;
335
+
336
+ unsigned char buffer[1024] ;
337
+
338
+ if((file = fopen (filename, "rb")) == NULL) {
339
+ return NULL;
340
+ }
341
+ int len;
342
+ while( (len = fread( buffer, 1, 1024, file )) )
343
+ Update( buffer, len ) ;
344
+ Final();
345
+
346
+ fclose( file );
347
+
348
+ return digestChars ;
349
+ }
350
+
351
+ /// Digests a byte-array already in memory
352
+ const char* digestMemory( BYTE *memchunk, int len ) {
353
+ if (NULL == memchunk)
354
+ return NULL;
355
+
356
+ Init() ;
357
+ Update( memchunk, len ) ;
358
+ Final() ;
359
+
360
+ return digestChars ;
361
+ }
362
+
363
+ // Digests a string and prints the result.
364
+ const char* digestString(const char *string ) {
365
+ if (string == NULL)
366
+ return NULL;
367
+
368
+ Init() ;
369
+ Update( (unsigned char*)string, strlen(string) ) ;
370
+ Final() ;
371
+
372
+ return digestChars ;
373
+ }
374
+ };
375
+
376
+ inline bool md5String(const char* str, std::string& res) {
377
+ if (NULL == str) {
378
+ res = "";
379
+ return false;
380
+ }
381
+
382
+ MD5 md5;
383
+ const char *pRes = md5.digestString(str);
384
+ if (NULL == pRes) {
385
+ res = "";
386
+ return false;
387
+ }
388
+
389
+ res = pRes;
390
+ return true;
391
+ }
392
+
393
+ inline bool md5File(const char* filepath, std::string& res) {
394
+ if (NULL == filepath || strcmp(filepath, "") == 0) {
395
+ res = "";
396
+ return false;
397
+ }
398
+
399
+ MD5 md5;
400
+ const char *pRes = md5.digestFile(filepath);
401
+
402
+ if (NULL == pRes) {
403
+ res = "";
404
+ return false;
405
+ }
406
+
407
+ res = pRes;
408
+ return true;
409
+ }
410
+ }
411
+ #endif