cppjieba_rb 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +26 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +81 -0
  8. data/Rakefile +20 -0
  9. data/cppjieba_rb.gemspec +50 -0
  10. data/ext/cppjieba/.gitignore +17 -0
  11. data/ext/cppjieba/.travis.yml +22 -0
  12. data/ext/cppjieba/CMakeLists.txt +28 -0
  13. data/ext/cppjieba/ChangeLog.md +236 -0
  14. data/ext/cppjieba/README.md +285 -0
  15. data/ext/cppjieba/README_EN.md +111 -0
  16. data/ext/cppjieba/appveyor.yml +32 -0
  17. data/ext/cppjieba/deps/CMakeLists.txt +1 -0
  18. data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
  28. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
  29. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
  41. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
  42. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
  43. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  44. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
  45. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
  46. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  47. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
  48. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
  49. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
  50. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
  51. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
  52. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
  53. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
  54. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
  55. data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
  56. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
  57. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
  58. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
  59. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
  60. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
  61. data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
  62. data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
  63. data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
  64. data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
  65. data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
  66. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
  67. data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
  68. data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
  69. data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
  70. data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
  71. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
  72. data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
  73. data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
  74. data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
  75. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
  76. data/ext/cppjieba/dict/README.md +31 -0
  77. data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
  78. data/ext/cppjieba/dict/idf.utf8 +258826 -0
  79. data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
  80. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
  81. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
  82. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
  83. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
  84. data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
  85. data/ext/cppjieba/dict/user.dict.utf8 +4 -0
  86. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
  87. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
  88. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
  89. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
  90. data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
  91. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
  92. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
  93. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
  94. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
  95. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
  96. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
  97. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
  98. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
  99. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
  100. data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
  101. data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
  102. data/ext/cppjieba/test/CMakeLists.txt +5 -0
  103. data/ext/cppjieba/test/demo.cpp +80 -0
  104. data/ext/cppjieba/test/load_test.cpp +54 -0
  105. data/ext/cppjieba/test/testdata/curl.res +1 -0
  106. data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
  107. data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
  108. data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
  109. data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
  110. data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
  111. data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
  112. data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
  113. data/ext/cppjieba/test/testdata/load_test.urls +2 -0
  114. data/ext/cppjieba/test/testdata/review.100 +100 -0
  115. data/ext/cppjieba/test/testdata/review.100.res +200 -0
  116. data/ext/cppjieba/test/testdata/server.conf +19 -0
  117. data/ext/cppjieba/test/testdata/testlines.gbk +9 -0
  118. data/ext/cppjieba/test/testdata/testlines.utf8 +8 -0
  119. data/ext/cppjieba/test/testdata/userdict.2.utf8 +1 -0
  120. data/ext/cppjieba/test/testdata/userdict.english +2 -0
  121. data/ext/cppjieba/test/testdata/userdict.utf8 +8 -0
  122. data/ext/cppjieba/test/testdata/weicheng.utf8 +247 -0
  123. data/ext/cppjieba/test/unittest/CMakeLists.txt +24 -0
  124. data/ext/cppjieba/test/unittest/gtest_main.cpp +39 -0
  125. data/ext/cppjieba/test/unittest/jieba_test.cpp +133 -0
  126. data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
  127. data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
  128. data/ext/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
  129. data/ext/cppjieba/test/unittest/segments_test.cpp +256 -0
  130. data/ext/cppjieba/test/unittest/textrank_test.cpp +86 -0
  131. data/ext/cppjieba/test/unittest/trie_test.cpp +177 -0
  132. data/ext/cppjieba/test/unittest/unicode_test.cpp +43 -0
  133. data/ext/cppjieba_rb/cppjieba_rb.c +10 -0
  134. data/ext/cppjieba_rb/extconf.rb +26 -0
  135. data/ext/cppjieba_rb/internal.cc +148 -0
  136. data/lib/cppjieba_rb/segment.rb +20 -0
  137. data/lib/cppjieba_rb/version.rb +3 -0
  138. data/lib/cppjieba_rb.rb +34 -0
  139. data/test/test_keyword.rb +17 -0
  140. data/test/test_segment.rb +24 -0
  141. data/test/test_tagging.rb +19 -0
  142. metadata +244 -0
@@ -0,0 +1,74 @@
1
+ #ifndef LIMONP_FILELOCK_HPP
2
+ #define LIMONP_FILELOCK_HPP
3
+
4
+ #include <unistd.h>
5
+ #include <stdlib.h>
6
+ #include <stdio.h>
7
+ #include <fcntl.h>
8
+ #include <errno.h>
9
+ #include <string>
10
+ #include <string.h>
11
+ #include <assert.h>
12
+
13
+ namespace limonp {
14
+
15
+ using std::string;
16
+
17
+ class FileLock {
18
+ public:
19
+ FileLock() : fd_(-1), ok_(true) {
20
+ }
21
+ ~FileLock() {
22
+ if(fd_ > 0) {
23
+ Close();
24
+ }
25
+ }
26
+ void Open(const string& fname) {
27
+ assert(fd_ == -1);
28
+ fd_ = open(fname.c_str(), O_RDWR | O_CREAT, 0644);
29
+ if(fd_ < 0) {
30
+ ok_ = false;
31
+ err_ = strerror(errno);
32
+ }
33
+ }
34
+ void Close() {
35
+ ::close(fd_);
36
+ }
37
+ void Lock() {
38
+ if(LockOrUnlock(fd_, true) < 0) {
39
+ ok_ = false;
40
+ err_ = strerror(errno);
41
+ }
42
+ }
43
+ void UnLock() {
44
+ if(LockOrUnlock(fd_, false) < 0) {
45
+ ok_ = false;
46
+ err_ = strerror(errno);
47
+ }
48
+ }
49
+ bool Ok() const {
50
+ return ok_;
51
+ }
52
+ string Error() const {
53
+ return err_;
54
+ }
55
+ private:
56
+ static int LockOrUnlock(int fd, bool lock) {
57
+ errno = 0;
58
+ struct flock f;
59
+ memset(&f, 0, sizeof(f));
60
+ f.l_type = (lock ? F_WRLCK : F_UNLCK);
61
+ f.l_whence = SEEK_SET;
62
+ f.l_start = 0;
63
+ f.l_len = 0; // Lock/unlock entire file
64
+ return fcntl(fd, F_SETLK, &f);
65
+ }
66
+
67
+ int fd_;
68
+ bool ok_;
69
+ string err_;
70
+ }; // class FileLock
71
+
72
+ }// namespace limonp
73
+
74
+ #endif // LIMONP_FILELOCK_HPP
@@ -0,0 +1,7 @@
1
+ #ifndef LIMONP_FORCE_PUBLIC_H
2
+ #define LIMONP_FORCE_PUBLIC_H
3
+
4
+ #define private public
5
+ #define protected public
6
+
7
+ #endif // LIMONP_FORCE_PUBLIC_H
@@ -0,0 +1,139 @@
1
+ #ifndef LIMONP_LOCAL_VECTOR_HPP
2
+ #define LIMONP_LOCAL_VECTOR_HPP
3
+
4
+ #include <iostream>
5
+ #include <stdlib.h>
6
+ #include <assert.h>
7
+ #include <string.h>
8
+
9
+ namespace limonp {
10
+ using namespace std;
11
+ /*
12
+ * LocalVector<T> : T must be primitive type (char , int, size_t), if T is struct or class, LocalVector<T> may be dangerous..
13
+ * LocalVector<T> is simple and not well-tested.
14
+ */
15
+ const size_t LOCAL_VECTOR_BUFFER_SIZE = 16;
16
+ template <class T>
17
+ class LocalVector {
18
+ public:
19
+ typedef const T* const_iterator ;
20
+ typedef T value_type;
21
+ typedef size_t size_type;
22
+ private:
23
+ T buffer_[LOCAL_VECTOR_BUFFER_SIZE];
24
+ T * ptr_;
25
+ size_t size_;
26
+ size_t capacity_;
27
+ public:
28
+ LocalVector() {
29
+ init_();
30
+ };
31
+ LocalVector(const LocalVector<T>& vec) {
32
+ init_();
33
+ *this = vec;
34
+ }
35
+ LocalVector(const_iterator begin, const_iterator end) { // TODO: make it faster
36
+ init_();
37
+ while(begin != end) {
38
+ push_back(*begin++);
39
+ }
40
+ }
41
+ LocalVector(size_t size, const T& t) { // TODO: make it faster
42
+ init_();
43
+ while(size--) {
44
+ push_back(t);
45
+ }
46
+ }
47
+ ~LocalVector() {
48
+ if(ptr_ != buffer_) {
49
+ free(ptr_);
50
+ }
51
+ };
52
+ public:
53
+ LocalVector<T>& operator = (const LocalVector<T>& vec) {
54
+ clear();
55
+ size_ = vec.size();
56
+ capacity_ = vec.capacity();
57
+ if(vec.buffer_ == vec.ptr_) {
58
+ memcpy(buffer_, vec.buffer_, sizeof(T) * size_);
59
+ ptr_ = buffer_;
60
+ } else {
61
+ ptr_ = (T*) malloc(vec.capacity() * sizeof(T));
62
+ assert(ptr_);
63
+ memcpy(ptr_, vec.ptr_, vec.size() * sizeof(T));
64
+ }
65
+ return *this;
66
+ }
67
+ private:
68
+ void init_() {
69
+ ptr_ = buffer_;
70
+ size_ = 0;
71
+ capacity_ = LOCAL_VECTOR_BUFFER_SIZE;
72
+ }
73
+ public:
74
+ T& operator [] (size_t i) {
75
+ return ptr_[i];
76
+ }
77
+ const T& operator [] (size_t i) const {
78
+ return ptr_[i];
79
+ }
80
+ void push_back(const T& t) {
81
+ if(size_ == capacity_) {
82
+ assert(capacity_);
83
+ reserve(capacity_ * 2);
84
+ }
85
+ ptr_[size_ ++ ] = t;
86
+ }
87
+ void reserve(size_t size) {
88
+ if(size <= capacity_) {
89
+ return;
90
+ }
91
+ T * next = (T*)malloc(sizeof(T) * size);
92
+ assert(next);
93
+ T * old = ptr_;
94
+ ptr_ = next;
95
+ memcpy(ptr_, old, sizeof(T) * capacity_);
96
+ capacity_ = size;
97
+ if(old != buffer_) {
98
+ free(old);
99
+ }
100
+ }
101
+ bool empty() const {
102
+ return 0 == size();
103
+ }
104
+ size_t size() const {
105
+ return size_;
106
+ }
107
+ size_t capacity() const {
108
+ return capacity_;
109
+ }
110
+ const_iterator begin() const {
111
+ return ptr_;
112
+ }
113
+ const_iterator end() const {
114
+ return ptr_ + size_;
115
+ }
116
+ void clear() {
117
+ if(ptr_ != buffer_) {
118
+ free(ptr_);
119
+ }
120
+ init_();
121
+ }
122
+ };
123
+
124
+ template <class T>
125
+ ostream & operator << (ostream& os, const LocalVector<T>& vec) {
126
+ if(vec.empty()) {
127
+ return os << "[]";
128
+ }
129
+ os<<"[\""<<vec[0];
130
+ for(size_t i = 1; i < vec.size(); i++) {
131
+ os<<"\", \""<<vec[i];
132
+ }
133
+ os<<"\"]";
134
+ return os;
135
+ }
136
+
137
+ }
138
+
139
+ #endif
@@ -0,0 +1,76 @@
1
+ #ifndef LIMONP_LOGGING_HPP
2
+ #define LIMONP_LOGGING_HPP
3
+
4
+ #include <sstream>
5
+ #include <iostream>
6
+ #include <cassert>
7
+ #include <cstdlib>
8
+ #include <ctime>
9
+
10
+ #ifdef XLOG
11
+ #error "XLOG has been defined already"
12
+ #endif // XLOG
13
+ #ifdef XCHECK
14
+ #error "XCHECK has been defined already"
15
+ #endif // XCHECK
16
+
17
+ #define XLOG(level) limonp::Logger(limonp::LL_##level, __FILE__, __LINE__).Stream()
18
+ #define XCHECK(exp) if(!(exp)) XLOG(FATAL) << "exp: ["#exp << "] false. "
19
+
20
+ namespace limonp {
21
+
22
+ enum {
23
+ LL_DEBUG = 0,
24
+ LL_INFO = 1,
25
+ LL_WARNING = 2,
26
+ LL_ERROR = 3,
27
+ LL_FATAL = 4,
28
+ }; // enum
29
+
30
+ static const char * LOG_LEVEL_ARRAY[] = {"DEBUG","INFO","WARN","ERROR","FATAL"};
31
+ static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S";
32
+
33
+ class Logger {
34
+ public:
35
+ Logger(size_t level, const char* filename, int lineno)
36
+ : level_(level) {
37
+ #ifdef LOGGING_LEVEL
38
+ if (level_ < LOGGING_LEVEL) {
39
+ return;
40
+ }
41
+ #endif
42
+ assert(level_ <= sizeof(LOG_LEVEL_ARRAY)/sizeof(*LOG_LEVEL_ARRAY));
43
+ char buf[32];
44
+ time_t now;
45
+ time(&now);
46
+ strftime(buf, sizeof(buf), LOG_TIME_FORMAT, localtime(&now));
47
+ stream_ << buf
48
+ << " " << filename
49
+ << ":" << lineno
50
+ << " " << LOG_LEVEL_ARRAY[level_]
51
+ << " ";
52
+ }
53
+ ~Logger() {
54
+ #ifdef LOGGING_LEVEL
55
+ if (level_ < LOGGING_LEVEL) {
56
+ return;
57
+ }
58
+ #endif
59
+ std::cerr << stream_.str() << std::endl;
60
+ if (level_ == LL_FATAL) {
61
+ abort();
62
+ }
63
+ }
64
+
65
+ std::ostream& Stream() {
66
+ return stream_;
67
+ }
68
+
69
+ private:
70
+ std::ostringstream stream_;
71
+ size_t level_;
72
+ }; // class Logger
73
+
74
+ } // namespace limonp
75
+
76
+ #endif // LIMONP_LOGGING_HPP
@@ -0,0 +1,411 @@
1
+ #ifndef __MD5_H__
2
+ #define __MD5_H__
3
+
4
+ // Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
5
+ // rights reserved.
6
+
7
+ // License to copy and use this software is granted provided that it
8
+ // is identified as the "RSA Data Security, Inc. MD5 Message-Digest
9
+ // Algorithm" in all material mentioning or referencing this software
10
+ // or this function.
11
+ //
12
+ // License is also granted to make and use derivative works provided
13
+ // that such works are identified as "derived from the RSA Data
14
+ // Security, Inc. MD5 Message-Digest Algorithm" in all material
15
+ // mentioning or referencing the derived work.
16
+ //
17
+ // RSA Data Security, Inc. makes no representations concerning either
18
+ // the merchantability of this software or the suitability of this
19
+ // software for any particular purpose. It is provided "as is"
20
+ // without express or implied warranty of any kind.
21
+ //
22
+ // These notices must be retained in any copies of any part of this
23
+ // documentation and/or software.
24
+
25
+
26
+
27
+ // The original md5 implementation avoids external libraries.
28
+ // This version has dependency on stdio.h for file input and
29
+ // string.h for memcpy.
30
+ #include <cstdio>
31
+ #include <cstring>
32
+ #include <iostream>
33
+
34
+ namespace limonp {
35
+
36
+ //#pragma region MD5 defines
37
+ // Constants for MD5Transform routine.
38
+ #define S11 7
39
+ #define S12 12
40
+ #define S13 17
41
+ #define S14 22
42
+ #define S21 5
43
+ #define S22 9
44
+ #define S23 14
45
+ #define S24 20
46
+ #define S31 4
47
+ #define S32 11
48
+ #define S33 16
49
+ #define S34 23
50
+ #define S41 6
51
+ #define S42 10
52
+ #define S43 15
53
+ #define S44 21
54
+
55
+
56
+ // F, G, H and I are basic MD5 functions.
57
+ #define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
58
+ #define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
59
+ #define H(x, y, z) ((x) ^ (y) ^ (z))
60
+ #define I(x, y, z) ((y) ^ ((x) | (~z)))
61
+
62
+ // ROTATE_LEFT rotates x left n bits.
63
+ #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
64
+
65
+ // FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
66
+ // Rotation is separate from addition to prevent recomputation.
67
+ #define FF(a, b, c, d, x, s, ac) { \
68
+ (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
69
+ (a) = ROTATE_LEFT ((a), (s)); \
70
+ (a) += (b); \
71
+ }
72
+ #define GG(a, b, c, d, x, s, ac) { \
73
+ (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
74
+ (a) = ROTATE_LEFT ((a), (s)); \
75
+ (a) += (b); \
76
+ }
77
+ #define HH(a, b, c, d, x, s, ac) { \
78
+ (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
79
+ (a) = ROTATE_LEFT ((a), (s)); \
80
+ (a) += (b); \
81
+ }
82
+ #define II(a, b, c, d, x, s, ac) { \
83
+ (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
84
+ (a) = ROTATE_LEFT ((a), (s)); \
85
+ (a) += (b); \
86
+ }
87
+ //#pragma endregion
88
+
89
+
90
+ typedef unsigned char BYTE ;
91
+
92
+ // POINTER defines a generic pointer type
93
+ typedef unsigned char *POINTER;
94
+
95
+ // UINT2 defines a two byte word
96
+ typedef unsigned short int UINT2;
97
+
98
+ // UINT4 defines a four byte word
99
+ typedef unsigned int UINT4;
100
+
101
+ static unsigned char PADDING[64] = {
102
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
103
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
105
+ };
106
+ // convenient object that wraps
107
+ // the C-functions for use in C++ only
108
+ class MD5 {
109
+ private:
110
+ struct __context_t {
111
+ UINT4 state[4]; /* state (ABCD) */
112
+ UINT4 count[2]; /* number of bits, modulo 2^64 (lsb first) */
113
+ unsigned char buffer[64]; /* input buffer */
114
+ } context ;
115
+
116
+ //#pragma region static helper functions
117
+ // The core of the MD5 algorithm is here.
118
+ // MD5 basic transformation. Transforms state based on block.
119
+ static void MD5Transform( UINT4 state[4], unsigned char block[64] ) {
120
+ UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
121
+
122
+ Decode (x, block, 64);
123
+
124
+ /* Round 1 */
125
+ FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
126
+ FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
127
+ FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
128
+ FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
129
+ FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
130
+ FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
131
+ FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
132
+ FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
133
+ FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
134
+ FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
135
+ FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
136
+ FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
137
+ FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
138
+ FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
139
+ FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
140
+ FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
141
+
142
+ /* Round 2 */
143
+ GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
144
+ GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
145
+ GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
146
+ GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
147
+ GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
148
+ GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
149
+ GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
150
+ GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
151
+ GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
152
+ GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
153
+ GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
154
+ GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
155
+ GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
156
+ GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
157
+ GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
158
+ GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
159
+
160
+ /* Round 3 */
161
+ HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
162
+ HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
163
+ HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
164
+ HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
165
+ HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
166
+ HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
167
+ HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
168
+ HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
169
+ HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
170
+ HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
171
+ HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
172
+ HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
173
+ HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
174
+ HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
175
+ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
176
+ HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
177
+
178
+ /* Round 4 */
179
+ II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
180
+ II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
181
+ II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
182
+ II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
183
+ II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
184
+ II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
185
+ II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
186
+ II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
187
+ II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
188
+ II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
189
+ II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
190
+ II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
191
+ II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
192
+ II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
193
+ II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
194
+ II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
195
+
196
+ state[0] += a;
197
+ state[1] += b;
198
+ state[2] += c;
199
+ state[3] += d;
200
+
201
+ // Zeroize sensitive information.
202
+ memset((POINTER)x, 0, sizeof (x));
203
+ }
204
+
205
+ // Encodes input (UINT4) into output (unsigned char). Assumes len is
206
+ // a multiple of 4.
207
+ static void Encode( unsigned char *output, UINT4 *input, unsigned int len ) {
208
+ unsigned int i, j;
209
+
210
+ for (i = 0, j = 0; j < len; i++, j += 4) {
211
+ output[j] = (unsigned char)(input[i] & 0xff);
212
+ output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
213
+ output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
214
+ output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
215
+ }
216
+ }
217
+
218
+ // Decodes input (unsigned char) into output (UINT4). Assumes len is
219
+ // a multiple of 4.
220
+ static void Decode( UINT4 *output, unsigned char *input, unsigned int len ) {
221
+ unsigned int i, j;
222
+
223
+ for (i = 0, j = 0; j < len; i++, j += 4)
224
+ output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
225
+ (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
226
+ }
227
+ //#pragma endregion
228
+
229
+
230
+ public:
231
+ // MAIN FUNCTIONS
232
+ MD5() {
233
+ Init() ;
234
+ }
235
+
236
+ // MD5 initialization. Begins an MD5 operation, writing a new context.
237
+ void Init() {
238
+ context.count[0] = context.count[1] = 0;
239
+
240
+ // Load magic initialization constants.
241
+ context.state[0] = 0x67452301;
242
+ context.state[1] = 0xefcdab89;
243
+ context.state[2] = 0x98badcfe;
244
+ context.state[3] = 0x10325476;
245
+ }
246
+
247
+ // MD5 block update operation. Continues an MD5 message-digest
248
+ // operation, processing another message block, and updating the
249
+ // context.
250
+ void Update(
251
+ unsigned char *input, // input block
252
+ unsigned int inputLen ) { // length of input block
253
+ unsigned int i, index, partLen;
254
+
255
+ // Compute number of bytes mod 64
256
+ index = (unsigned int)((context.count[0] >> 3) & 0x3F);
257
+
258
+ // Update number of bits
259
+ if ((context.count[0] += ((UINT4)inputLen << 3))
260
+ < ((UINT4)inputLen << 3))
261
+ context.count[1]++;
262
+ context.count[1] += ((UINT4)inputLen >> 29);
263
+
264
+ partLen = 64 - index;
265
+
266
+ // Transform as many times as possible.
267
+ if (inputLen >= partLen) {
268
+ memcpy((POINTER)&context.buffer[index], (POINTER)input, partLen);
269
+ MD5Transform (context.state, context.buffer);
270
+
271
+ for (i = partLen; i + 63 < inputLen; i += 64)
272
+ MD5Transform (context.state, &input[i]);
273
+
274
+ index = 0;
275
+ } else
276
+ i = 0;
277
+
278
+ /* Buffer remaining input */
279
+ memcpy((POINTER)&context.buffer[index], (POINTER)&input[i], inputLen-i);
280
+ }
281
+
282
+ // MD5 finalization. Ends an MD5 message-digest operation, writing the
283
+ // the message digest and zeroizing the context.
284
+ // Writes to digestRaw
285
+ void Final() {
286
+ unsigned char bits[8];
287
+ unsigned int index, padLen;
288
+
289
+ // Save number of bits
290
+ Encode( bits, context.count, 8 );
291
+
292
+ // Pad out to 56 mod 64.
293
+ index = (unsigned int)((context.count[0] >> 3) & 0x3f);
294
+ padLen = (index < 56) ? (56 - index) : (120 - index);
295
+ Update( PADDING, padLen );
296
+
297
+ // Append length (before padding)
298
+ Update( bits, 8 );
299
+
300
+ // Store state in digest
301
+ Encode( digestRaw, context.state, 16);
302
+
303
+ // Zeroize sensitive information.
304
+ memset((POINTER)&context, 0, sizeof (context));
305
+
306
+ writeToString() ;
307
+ }
308
+
309
+ /// Buffer must be 32+1 (nul) = 33 chars long at least
310
+ void writeToString() {
311
+ int pos ;
312
+
313
+ for( pos = 0 ; pos < 16 ; pos++ )
314
+ sprintf( digestChars+(pos*2), "%02x", digestRaw[pos] ) ;
315
+ }
316
+
317
+
318
+ public:
319
+ // an MD5 digest is a 16-byte number (32 hex digits)
320
+ BYTE digestRaw[ 16 ] ;
321
+
322
+ // This version of the digest is actually
323
+ // a "printf'd" version of the digest.
324
+ char digestChars[ 33 ] ;
325
+
326
+ /// Load a file from disk and digest it
327
+ // Digests a file and returns the result.
328
+ const char* digestFile( const char *filename ) {
329
+ if (NULL == filename || strcmp(filename, "") == 0)
330
+ return NULL;
331
+
332
+ Init() ;
333
+
334
+ FILE *file;
335
+
336
+ unsigned char buffer[1024] ;
337
+
338
+ if((file = fopen (filename, "rb")) == NULL) {
339
+ return NULL;
340
+ }
341
+ int len;
342
+ while( (len = fread( buffer, 1, 1024, file )) )
343
+ Update( buffer, len ) ;
344
+ Final();
345
+
346
+ fclose( file );
347
+
348
+ return digestChars ;
349
+ }
350
+
351
+ /// Digests a byte-array already in memory
352
+ const char* digestMemory( BYTE *memchunk, int len ) {
353
+ if (NULL == memchunk)
354
+ return NULL;
355
+
356
+ Init() ;
357
+ Update( memchunk, len ) ;
358
+ Final() ;
359
+
360
+ return digestChars ;
361
+ }
362
+
363
+ // Digests a string and prints the result.
364
+ const char* digestString(const char *string ) {
365
+ if (string == NULL)
366
+ return NULL;
367
+
368
+ Init() ;
369
+ Update( (unsigned char*)string, strlen(string) ) ;
370
+ Final() ;
371
+
372
+ return digestChars ;
373
+ }
374
+ };
375
+
376
+ inline bool md5String(const char* str, std::string& res) {
377
+ if (NULL == str) {
378
+ res = "";
379
+ return false;
380
+ }
381
+
382
+ MD5 md5;
383
+ const char *pRes = md5.digestString(str);
384
+ if (NULL == pRes) {
385
+ res = "";
386
+ return false;
387
+ }
388
+
389
+ res = pRes;
390
+ return true;
391
+ }
392
+
393
+ inline bool md5File(const char* filepath, std::string& res) {
394
+ if (NULL == filepath || strcmp(filepath, "") == 0) {
395
+ res = "";
396
+ return false;
397
+ }
398
+
399
+ MD5 md5;
400
+ const char *pRes = md5.digestFile(filepath);
401
+
402
+ if (NULL == pRes) {
403
+ res = "";
404
+ return false;
405
+ }
406
+
407
+ res = pRes;
408
+ return true;
409
+ }
410
+ }
411
+ #endif