cppjieba_rb 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +26 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +81 -0
  8. data/Rakefile +20 -0
  9. data/cppjieba_rb.gemspec +50 -0
  10. data/ext/cppjieba/.gitignore +17 -0
  11. data/ext/cppjieba/.travis.yml +22 -0
  12. data/ext/cppjieba/CMakeLists.txt +28 -0
  13. data/ext/cppjieba/ChangeLog.md +236 -0
  14. data/ext/cppjieba/README.md +285 -0
  15. data/ext/cppjieba/README_EN.md +111 -0
  16. data/ext/cppjieba/appveyor.yml +32 -0
  17. data/ext/cppjieba/deps/CMakeLists.txt +1 -0
  18. data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
  28. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
  29. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
  41. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
  42. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
  43. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  44. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
  45. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
  46. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  47. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
  48. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
  49. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
  50. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
  51. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
  52. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
  53. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
  54. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
  55. data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
  56. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
  57. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
  58. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
  59. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
  60. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
  61. data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
  62. data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
  63. data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
  64. data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
  65. data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
  66. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
  67. data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
  68. data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
  69. data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
  70. data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
  71. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
  72. data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
  73. data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
  74. data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
  75. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
  76. data/ext/cppjieba/dict/README.md +31 -0
  77. data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
  78. data/ext/cppjieba/dict/idf.utf8 +258826 -0
  79. data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
  80. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
  81. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
  82. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
  83. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
  84. data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
  85. data/ext/cppjieba/dict/user.dict.utf8 +4 -0
  86. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
  87. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
  88. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
  89. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
  90. data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
  91. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
  92. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
  93. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
  94. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
  95. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
  96. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
  97. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
  98. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
  99. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
  100. data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
  101. data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
  102. data/ext/cppjieba/test/CMakeLists.txt +5 -0
  103. data/ext/cppjieba/test/demo.cpp +80 -0
  104. data/ext/cppjieba/test/load_test.cpp +54 -0
  105. data/ext/cppjieba/test/testdata/curl.res +1 -0
  106. data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
  107. data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
  108. data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
  109. data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
  110. data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
  111. data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
  112. data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
  113. data/ext/cppjieba/test/testdata/load_test.urls +2 -0
  114. data/ext/cppjieba/test/testdata/review.100 +100 -0
  115. data/ext/cppjieba/test/testdata/review.100.res +200 -0
  116. data/ext/cppjieba/test/testdata/server.conf +19 -0
  117. data/ext/cppjieba/test/testdata/testlines.gbk +9 -0
  118. data/ext/cppjieba/test/testdata/testlines.utf8 +8 -0
  119. data/ext/cppjieba/test/testdata/userdict.2.utf8 +1 -0
  120. data/ext/cppjieba/test/testdata/userdict.english +2 -0
  121. data/ext/cppjieba/test/testdata/userdict.utf8 +8 -0
  122. data/ext/cppjieba/test/testdata/weicheng.utf8 +247 -0
  123. data/ext/cppjieba/test/unittest/CMakeLists.txt +24 -0
  124. data/ext/cppjieba/test/unittest/gtest_main.cpp +39 -0
  125. data/ext/cppjieba/test/unittest/jieba_test.cpp +133 -0
  126. data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
  127. data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
  128. data/ext/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
  129. data/ext/cppjieba/test/unittest/segments_test.cpp +256 -0
  130. data/ext/cppjieba/test/unittest/textrank_test.cpp +86 -0
  131. data/ext/cppjieba/test/unittest/trie_test.cpp +177 -0
  132. data/ext/cppjieba/test/unittest/unicode_test.cpp +43 -0
  133. data/ext/cppjieba_rb/cppjieba_rb.c +10 -0
  134. data/ext/cppjieba_rb/extconf.rb +26 -0
  135. data/ext/cppjieba_rb/internal.cc +148 -0
  136. data/lib/cppjieba_rb/segment.rb +20 -0
  137. data/lib/cppjieba_rb/version.rb +3 -0
  138. data/lib/cppjieba_rb.rb +34 -0
  139. data/test/test_keyword.rb +17 -0
  140. data/test/test_segment.rb +24 -0
  141. data/test/test_tagging.rb +19 -0
  142. metadata +244 -0
@@ -0,0 +1,746 @@
1
+ // Copyright 2008, Google Inc.
2
+ // All rights reserved.
3
+ //
4
+ // Redistribution and use in source and binary forms, with or without
5
+ // modification, are permitted provided that the following conditions are
6
+ // met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright
9
+ // notice, this list of conditions and the following disclaimer.
10
+ // * Redistributions in binary form must reproduce the above
11
+ // copyright notice, this list of conditions and the following disclaimer
12
+ // in the documentation and/or other materials provided with the
13
+ // distribution.
14
+ // * Neither the name of Google Inc. nor the names of its
15
+ // contributors may be used to endorse or promote products derived from
16
+ // this software without specific prior written permission.
17
+ //
18
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ //
30
+ // Author: wan@google.com (Zhanyong Wan)
31
+
32
+ #include "gtest/internal/gtest-port.h"
33
+
34
+ #include <limits.h>
35
+ #include <stdlib.h>
36
+ #include <stdio.h>
37
+ #include <string.h>
38
+
39
+ #if GTEST_OS_WINDOWS_MOBILE
40
+ # include <windows.h> // For TerminateProcess()
41
+ #elif GTEST_OS_WINDOWS
42
+ # include <io.h>
43
+ # include <sys/stat.h>
44
+ #else
45
+ # include <unistd.h>
46
+ #endif // GTEST_OS_WINDOWS_MOBILE
47
+
48
+ #if GTEST_OS_MAC
49
+ # include <mach/mach_init.h>
50
+ # include <mach/task.h>
51
+ # include <mach/vm_map.h>
52
+ #endif // GTEST_OS_MAC
53
+
54
+ #include "gtest/gtest-spi.h"
55
+ #include "gtest/gtest-message.h"
56
+ #include "gtest/internal/gtest-internal.h"
57
+ #include "gtest/internal/gtest-string.h"
58
+
59
+ // Indicates that this translation unit is part of Google Test's
60
+ // implementation. It must come before gtest-internal-inl.h is
61
+ // included, or there will be a compiler error. This trick is to
62
+ // prevent a user from accidentally including gtest-internal-inl.h in
63
+ // his code.
64
+ #define GTEST_IMPLEMENTATION_ 1
65
+ #include "src/gtest-internal-inl.h"
66
+ #undef GTEST_IMPLEMENTATION_
67
+
68
+ namespace testing {
69
+ namespace internal {
70
+
71
+ #if defined(_MSC_VER) || defined(__BORLANDC__)
72
+ // MSVC and C++Builder do not provide a definition of STDERR_FILENO.
73
+ const int kStdOutFileno = 1;
74
+ const int kStdErrFileno = 2;
75
+ #else
76
+ const int kStdOutFileno = STDOUT_FILENO;
77
+ const int kStdErrFileno = STDERR_FILENO;
78
+ #endif // _MSC_VER
79
+
80
+ #if GTEST_OS_MAC
81
+
82
+ // Returns the number of threads running in the process, or 0 to indicate that
83
+ // we cannot detect it.
84
+ size_t GetThreadCount() {
85
+ const task_t task = mach_task_self();
86
+ mach_msg_type_number_t thread_count;
87
+ thread_act_array_t thread_list;
88
+ const kern_return_t status = task_threads(task, &thread_list, &thread_count);
89
+ if (status == KERN_SUCCESS) {
90
+ // task_threads allocates resources in thread_list and we need to free them
91
+ // to avoid leaks.
92
+ vm_deallocate(task,
93
+ reinterpret_cast<vm_address_t>(thread_list),
94
+ sizeof(thread_t) * thread_count);
95
+ return static_cast<size_t>(thread_count);
96
+ } else {
97
+ return 0;
98
+ }
99
+ }
100
+
101
+ #else
102
+
103
+ size_t GetThreadCount() {
104
+ // There's no portable way to detect the number of threads, so we just
105
+ // return 0 to indicate that we cannot detect it.
106
+ return 0;
107
+ }
108
+
109
+ #endif // GTEST_OS_MAC
110
+
111
+ #if GTEST_USES_POSIX_RE
112
+
113
+ // Implements RE. Currently only needed for death tests.
114
+
115
+ RE::~RE() {
116
+ if (is_valid_) {
117
+ // regfree'ing an invalid regex might crash because the content
118
+ // of the regex is undefined. Since the regex's are essentially
119
+ // the same, one cannot be valid (or invalid) without the other
120
+ // being so too.
121
+ regfree(&partial_regex_);
122
+ regfree(&full_regex_);
123
+ }
124
+ free(const_cast<char*>(pattern_));
125
+ }
126
+
127
+ // Returns true iff regular expression re matches the entire str.
128
+ bool RE::FullMatch(const char* str, const RE& re) {
129
+ if (!re.is_valid_) return false;
130
+
131
+ regmatch_t match;
132
+ return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
133
+ }
134
+
135
+ // Returns true iff regular expression re matches a substring of str
136
+ // (including str itself).
137
+ bool RE::PartialMatch(const char* str, const RE& re) {
138
+ if (!re.is_valid_) return false;
139
+
140
+ regmatch_t match;
141
+ return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
142
+ }
143
+
144
+ // Initializes an RE from its string representation.
145
+ void RE::Init(const char* regex) {
146
+ pattern_ = posix::StrDup(regex);
147
+
148
+ // Reserves enough bytes to hold the regular expression used for a
149
+ // full match.
150
+ const size_t full_regex_len = strlen(regex) + 10;
151
+ char* const full_pattern = new char[full_regex_len];
152
+
153
+ snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
154
+ is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
155
+ // We want to call regcomp(&partial_regex_, ...) even if the
156
+ // previous expression returns false. Otherwise partial_regex_ may
157
+ // not be properly initialized can may cause trouble when it's
158
+ // freed.
159
+ //
160
+ // Some implementation of POSIX regex (e.g. on at least some
161
+ // versions of Cygwin) doesn't accept the empty string as a valid
162
+ // regex. We change it to an equivalent form "()" to be safe.
163
+ if (is_valid_) {
164
+ const char* const partial_regex = (*regex == '\0') ? "()" : regex;
165
+ is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
166
+ }
167
+ EXPECT_TRUE(is_valid_)
168
+ << "Regular expression \"" << regex
169
+ << "\" is not a valid POSIX Extended regular expression.";
170
+
171
+ delete[] full_pattern;
172
+ }
173
+
174
+ #elif GTEST_USES_SIMPLE_RE
175
+
176
+ // Returns true iff ch appears anywhere in str (excluding the
177
+ // terminating '\0' character).
178
+ bool IsInSet(char ch, const char* str) {
179
+ return ch != '\0' && strchr(str, ch) != NULL;
180
+ }
181
+
182
+ // Returns true iff ch belongs to the given classification. Unlike
183
+ // similar functions in <ctype.h>, these aren't affected by the
184
+ // current locale.
185
+ bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; }
186
+ bool IsAsciiPunct(char ch) {
187
+ return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
188
+ }
189
+ bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
190
+ bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
191
+ bool IsAsciiWordChar(char ch) {
192
+ return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
193
+ ('0' <= ch && ch <= '9') || ch == '_';
194
+ }
195
+
196
+ // Returns true iff "\\c" is a supported escape sequence.
197
+ bool IsValidEscape(char c) {
198
+ return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
199
+ }
200
+
201
+ // Returns true iff the given atom (specified by escaped and pattern)
202
+ // matches ch. The result is undefined if the atom is invalid.
203
+ bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
204
+ if (escaped) { // "\\p" where p is pattern_char.
205
+ switch (pattern_char) {
206
+ case 'd': return IsAsciiDigit(ch);
207
+ case 'D': return !IsAsciiDigit(ch);
208
+ case 'f': return ch == '\f';
209
+ case 'n': return ch == '\n';
210
+ case 'r': return ch == '\r';
211
+ case 's': return IsAsciiWhiteSpace(ch);
212
+ case 'S': return !IsAsciiWhiteSpace(ch);
213
+ case 't': return ch == '\t';
214
+ case 'v': return ch == '\v';
215
+ case 'w': return IsAsciiWordChar(ch);
216
+ case 'W': return !IsAsciiWordChar(ch);
217
+ }
218
+ return IsAsciiPunct(pattern_char) && pattern_char == ch;
219
+ }
220
+
221
+ return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
222
+ }
223
+
224
+ // Helper function used by ValidateRegex() to format error messages.
225
+ String FormatRegexSyntaxError(const char* regex, int index) {
226
+ return (Message() << "Syntax error at index " << index
227
+ << " in simple regular expression \"" << regex << "\": ").GetString();
228
+ }
229
+
230
+ // Generates non-fatal failures and returns false if regex is invalid;
231
+ // otherwise returns true.
232
+ bool ValidateRegex(const char* regex) {
233
+ if (regex == NULL) {
234
+ // TODO(wan@google.com): fix the source file location in the
235
+ // assertion failures to match where the regex is used in user
236
+ // code.
237
+ ADD_FAILURE() << "NULL is not a valid simple regular expression.";
238
+ return false;
239
+ }
240
+
241
+ bool is_valid = true;
242
+
243
+ // True iff ?, *, or + can follow the previous atom.
244
+ bool prev_repeatable = false;
245
+ for (int i = 0; regex[i]; i++) {
246
+ if (regex[i] == '\\') { // An escape sequence
247
+ i++;
248
+ if (regex[i] == '\0') {
249
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
250
+ << "'\\' cannot appear at the end.";
251
+ return false;
252
+ }
253
+
254
+ if (!IsValidEscape(regex[i])) {
255
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
256
+ << "invalid escape sequence \"\\" << regex[i] << "\".";
257
+ is_valid = false;
258
+ }
259
+ prev_repeatable = true;
260
+ } else { // Not an escape sequence.
261
+ const char ch = regex[i];
262
+
263
+ if (ch == '^' && i > 0) {
264
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
265
+ << "'^' can only appear at the beginning.";
266
+ is_valid = false;
267
+ } else if (ch == '$' && regex[i + 1] != '\0') {
268
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
269
+ << "'$' can only appear at the end.";
270
+ is_valid = false;
271
+ } else if (IsInSet(ch, "()[]{}|")) {
272
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
273
+ << "'" << ch << "' is unsupported.";
274
+ is_valid = false;
275
+ } else if (IsRepeat(ch) && !prev_repeatable) {
276
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
277
+ << "'" << ch << "' can only follow a repeatable token.";
278
+ is_valid = false;
279
+ }
280
+
281
+ prev_repeatable = !IsInSet(ch, "^$?*+");
282
+ }
283
+ }
284
+
285
+ return is_valid;
286
+ }
287
+
288
+ // Matches a repeated regex atom followed by a valid simple regular
289
+ // expression. The regex atom is defined as c if escaped is false,
290
+ // or \c otherwise. repeat is the repetition meta character (?, *,
291
+ // or +). The behavior is undefined if str contains too many
292
+ // characters to be indexable by size_t, in which case the test will
293
+ // probably time out anyway. We are fine with this limitation as
294
+ // std::string has it too.
295
+ bool MatchRepetitionAndRegexAtHead(
296
+ bool escaped, char c, char repeat, const char* regex,
297
+ const char* str) {
298
+ const size_t min_count = (repeat == '+') ? 1 : 0;
299
+ const size_t max_count = (repeat == '?') ? 1 :
300
+ static_cast<size_t>(-1) - 1;
301
+ // We cannot call numeric_limits::max() as it conflicts with the
302
+ // max() macro on Windows.
303
+
304
+ for (size_t i = 0; i <= max_count; ++i) {
305
+ // We know that the atom matches each of the first i characters in str.
306
+ if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
307
+ // We have enough matches at the head, and the tail matches too.
308
+ // Since we only care about *whether* the pattern matches str
309
+ // (as opposed to *how* it matches), there is no need to find a
310
+ // greedy match.
311
+ return true;
312
+ }
313
+ if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i]))
314
+ return false;
315
+ }
316
+ return false;
317
+ }
318
+
319
+ // Returns true iff regex matches a prefix of str. regex must be a
320
+ // valid simple regular expression and not start with "^", or the
321
+ // result is undefined.
322
+ bool MatchRegexAtHead(const char* regex, const char* str) {
323
+ if (*regex == '\0') // An empty regex matches a prefix of anything.
324
+ return true;
325
+
326
+ // "$" only matches the end of a string. Note that regex being
327
+ // valid guarantees that there's nothing after "$" in it.
328
+ if (*regex == '$')
329
+ return *str == '\0';
330
+
331
+ // Is the first thing in regex an escape sequence?
332
+ const bool escaped = *regex == '\\';
333
+ if (escaped)
334
+ ++regex;
335
+ if (IsRepeat(regex[1])) {
336
+ // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
337
+ // here's an indirect recursion. It terminates as the regex gets
338
+ // shorter in each recursion.
339
+ return MatchRepetitionAndRegexAtHead(
340
+ escaped, regex[0], regex[1], regex + 2, str);
341
+ } else {
342
+ // regex isn't empty, isn't "$", and doesn't start with a
343
+ // repetition. We match the first atom of regex with the first
344
+ // character of str and recurse.
345
+ return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
346
+ MatchRegexAtHead(regex + 1, str + 1);
347
+ }
348
+ }
349
+
350
+ // Returns true iff regex matches any substring of str. regex must be
351
+ // a valid simple regular expression, or the result is undefined.
352
+ //
353
+ // The algorithm is recursive, but the recursion depth doesn't exceed
354
+ // the regex length, so we won't need to worry about running out of
355
+ // stack space normally. In rare cases the time complexity can be
356
+ // exponential with respect to the regex length + the string length,
357
+ // but usually it's must faster (often close to linear).
358
+ bool MatchRegexAnywhere(const char* regex, const char* str) {
359
+ if (regex == NULL || str == NULL)
360
+ return false;
361
+
362
+ if (*regex == '^')
363
+ return MatchRegexAtHead(regex + 1, str);
364
+
365
+ // A successful match can be anywhere in str.
366
+ do {
367
+ if (MatchRegexAtHead(regex, str))
368
+ return true;
369
+ } while (*str++ != '\0');
370
+ return false;
371
+ }
372
+
373
+ // Implements the RE class.
374
+
375
+ RE::~RE() {
376
+ free(const_cast<char*>(pattern_));
377
+ free(const_cast<char*>(full_pattern_));
378
+ }
379
+
380
+ // Returns true iff regular expression re matches the entire str.
381
+ bool RE::FullMatch(const char* str, const RE& re) {
382
+ return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
383
+ }
384
+
385
+ // Returns true iff regular expression re matches a substring of str
386
+ // (including str itself).
387
+ bool RE::PartialMatch(const char* str, const RE& re) {
388
+ return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
389
+ }
390
+
391
+ // Initializes an RE from its string representation.
392
+ void RE::Init(const char* regex) {
393
+ pattern_ = full_pattern_ = NULL;
394
+ if (regex != NULL) {
395
+ pattern_ = posix::StrDup(regex);
396
+ }
397
+
398
+ is_valid_ = ValidateRegex(regex);
399
+ if (!is_valid_) {
400
+ // No need to calculate the full pattern when the regex is invalid.
401
+ return;
402
+ }
403
+
404
+ const size_t len = strlen(regex);
405
+ // Reserves enough bytes to hold the regular expression used for a
406
+ // full match: we need space to prepend a '^', append a '$', and
407
+ // terminate the string with '\0'.
408
+ char* buffer = static_cast<char*>(malloc(len + 3));
409
+ full_pattern_ = buffer;
410
+
411
+ if (*regex != '^')
412
+ *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'.
413
+
414
+ // We don't use snprintf or strncpy, as they trigger a warning when
415
+ // compiled with VC++ 8.0.
416
+ memcpy(buffer, regex, len);
417
+ buffer += len;
418
+
419
+ if (len == 0 || regex[len - 1] != '$')
420
+ *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'.
421
+
422
+ *buffer = '\0';
423
+ }
424
+
425
+ #endif // GTEST_USES_POSIX_RE
426
+
427
+ const char kUnknownFile[] = "unknown file";
428
+
429
+ // Formats a source file path and a line number as they would appear
430
+ // in an error message from the compiler used to compile this code.
431
+ GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
432
+ const char* const file_name = file == NULL ? kUnknownFile : file;
433
+
434
+ if (line < 0) {
435
+ return String::Format("%s:", file_name).c_str();
436
+ }
437
+ #ifdef _MSC_VER
438
+ return String::Format("%s(%d):", file_name, line).c_str();
439
+ #else
440
+ return String::Format("%s:%d:", file_name, line).c_str();
441
+ #endif // _MSC_VER
442
+ }
443
+
444
+ // Formats a file location for compiler-independent XML output.
445
+ // Although this function is not platform dependent, we put it next to
446
+ // FormatFileLocation in order to contrast the two functions.
447
+ // Note that FormatCompilerIndependentFileLocation() does NOT append colon
448
+ // to the file location it produces, unlike FormatFileLocation().
449
+ GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
450
+ const char* file, int line) {
451
+ const char* const file_name = file == NULL ? kUnknownFile : file;
452
+
453
+ if (line < 0)
454
+ return file_name;
455
+ else
456
+ return String::Format("%s:%d", file_name, line).c_str();
457
+ }
458
+
459
+
460
+ GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
461
+ : severity_(severity) {
462
+ const char* const marker =
463
+ severity == GTEST_INFO ? "[ INFO ]" :
464
+ severity == GTEST_WARNING ? "[WARNING]" :
465
+ severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]";
466
+ GetStream() << ::std::endl << marker << " "
467
+ << FormatFileLocation(file, line).c_str() << ": ";
468
+ }
469
+
470
+ // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
471
+ GTestLog::~GTestLog() {
472
+ GetStream() << ::std::endl;
473
+ if (severity_ == GTEST_FATAL) {
474
+ fflush(stderr);
475
+ posix::Abort();
476
+ }
477
+ }
478
+ // Disable Microsoft deprecation warnings for POSIX functions called from
479
+ // this class (creat, dup, dup2, and close)
480
+ #ifdef _MSC_VER
481
+ # pragma warning(push)
482
+ # pragma warning(disable: 4996)
483
+ #endif // _MSC_VER
484
+
485
+ #if GTEST_HAS_STREAM_REDIRECTION
486
+
487
+ // Object that captures an output stream (stdout/stderr).
488
+ class CapturedStream {
489
+ public:
490
+ // The ctor redirects the stream to a temporary file.
491
+ CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
492
+
493
+ # if GTEST_OS_WINDOWS
494
+ char temp_dir_path[MAX_PATH + 1] = { '\0' }; // NOLINT
495
+ char temp_file_path[MAX_PATH + 1] = { '\0' }; // NOLINT
496
+
497
+ ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
498
+ const UINT success = ::GetTempFileNameA(temp_dir_path,
499
+ "gtest_redir",
500
+ 0, // Generate unique file name.
501
+ temp_file_path);
502
+ GTEST_CHECK_(success != 0)
503
+ << "Unable to create a temporary file in " << temp_dir_path;
504
+ const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
505
+ GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file "
506
+ << temp_file_path;
507
+ filename_ = temp_file_path;
508
+ # else
509
+ // There's no guarantee that a test has write access to the
510
+ // current directory, so we create the temporary file in the /tmp
511
+ // directory instead.
512
+ char name_template[] = "/tmp/captured_stream.XXXXXX";
513
+ const int captured_fd = mkstemp(name_template);
514
+ filename_ = name_template;
515
+ # endif // GTEST_OS_WINDOWS
516
+ fflush(NULL);
517
+ dup2(captured_fd, fd_);
518
+ close(captured_fd);
519
+ }
520
+
521
+ ~CapturedStream() {
522
+ remove(filename_.c_str());
523
+ }
524
+
525
+ String GetCapturedString() {
526
+ if (uncaptured_fd_ != -1) {
527
+ // Restores the original stream.
528
+ fflush(NULL);
529
+ dup2(uncaptured_fd_, fd_);
530
+ close(uncaptured_fd_);
531
+ uncaptured_fd_ = -1;
532
+ }
533
+
534
+ FILE* const file = posix::FOpen(filename_.c_str(), "r");
535
+ const String content = ReadEntireFile(file);
536
+ posix::FClose(file);
537
+ return content;
538
+ }
539
+
540
+ private:
541
+ // Reads the entire content of a file as a String.
542
+ static String ReadEntireFile(FILE* file);
543
+
544
+ // Returns the size (in bytes) of a file.
545
+ static size_t GetFileSize(FILE* file);
546
+
547
+ const int fd_; // A stream to capture.
548
+ int uncaptured_fd_;
549
+ // Name of the temporary file holding the stderr output.
550
+ ::std::string filename_;
551
+
552
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
553
+ };
554
+
555
+ // Returns the size (in bytes) of a file.
556
+ size_t CapturedStream::GetFileSize(FILE* file) {
557
+ fseek(file, 0, SEEK_END);
558
+ return static_cast<size_t>(ftell(file));
559
+ }
560
+
561
+ // Reads the entire content of a file as a string.
562
+ String CapturedStream::ReadEntireFile(FILE* file) {
563
+ const size_t file_size = GetFileSize(file);
564
+ char* const buffer = new char[file_size];
565
+
566
+ size_t bytes_last_read = 0; // # of bytes read in the last fread()
567
+ size_t bytes_read = 0; // # of bytes read so far
568
+
569
+ fseek(file, 0, SEEK_SET);
570
+
571
+ // Keeps reading the file until we cannot read further or the
572
+ // pre-determined file size is reached.
573
+ do {
574
+ bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file);
575
+ bytes_read += bytes_last_read;
576
+ } while (bytes_last_read > 0 && bytes_read < file_size);
577
+
578
+ const String content(buffer, bytes_read);
579
+ delete[] buffer;
580
+
581
+ return content;
582
+ }
583
+
584
+ # ifdef _MSC_VER
585
+ # pragma warning(pop)
586
+ # endif // _MSC_VER
587
+
588
+ static CapturedStream* g_captured_stderr = NULL;
589
+ static CapturedStream* g_captured_stdout = NULL;
590
+
591
+ // Starts capturing an output stream (stdout/stderr).
592
+ void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
593
+ if (*stream != NULL) {
594
+ GTEST_LOG_(FATAL) << "Only one " << stream_name
595
+ << " capturer can exist at a time.";
596
+ }
597
+ *stream = new CapturedStream(fd);
598
+ }
599
+
600
+ // Stops capturing the output stream and returns the captured string.
601
+ String GetCapturedStream(CapturedStream** captured_stream) {
602
+ const String content = (*captured_stream)->GetCapturedString();
603
+
604
+ delete *captured_stream;
605
+ *captured_stream = NULL;
606
+
607
+ return content;
608
+ }
609
+
610
+ // Starts capturing stdout.
611
+ void CaptureStdout() {
612
+ CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
613
+ }
614
+
615
+ // Starts capturing stderr.
616
+ void CaptureStderr() {
617
+ CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
618
+ }
619
+
620
+ // Stops capturing stdout and returns the captured string.
621
+ String GetCapturedStdout() { return GetCapturedStream(&g_captured_stdout); }
622
+
623
+ // Stops capturing stderr and returns the captured string.
624
+ String GetCapturedStderr() { return GetCapturedStream(&g_captured_stderr); }
625
+
626
+ #endif // GTEST_HAS_STREAM_REDIRECTION
627
+
628
+ #if GTEST_HAS_DEATH_TEST
629
+
630
+ // A copy of all command line arguments. Set by InitGoogleTest().
631
+ ::std::vector<String> g_argvs;
632
+
633
+ // Returns the command line as a vector of strings.
634
+ const ::std::vector<String>& GetArgvs() { return g_argvs; }
635
+
636
+ #endif // GTEST_HAS_DEATH_TEST
637
+
638
+ #if GTEST_OS_WINDOWS_MOBILE
639
+ namespace posix {
640
+ void Abort() {
641
+ DebugBreak();
642
+ TerminateProcess(GetCurrentProcess(), 1);
643
+ }
644
+ } // namespace posix
645
+ #endif // GTEST_OS_WINDOWS_MOBILE
646
+
647
+ // Returns the name of the environment variable corresponding to the
648
+ // given flag. For example, FlagToEnvVar("foo") will return
649
+ // "GTEST_FOO" in the open-source version.
650
+ static String FlagToEnvVar(const char* flag) {
651
+ const String full_flag =
652
+ (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();
653
+
654
+ Message env_var;
655
+ for (size_t i = 0; i != full_flag.length(); i++) {
656
+ env_var << ToUpper(full_flag.c_str()[i]);
657
+ }
658
+
659
+ return env_var.GetString();
660
+ }
661
+
662
+ // Parses 'str' for a 32-bit signed integer. If successful, writes
663
+ // the result to *value and returns true; otherwise leaves *value
664
+ // unchanged and returns false.
665
+ bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
666
+ // Parses the environment variable as a decimal integer.
667
+ char* end = NULL;
668
+ const long long_value = strtol(str, &end, 10); // NOLINT
669
+
670
+ // Has strtol() consumed all characters in the string?
671
+ if (*end != '\0') {
672
+ // No - an invalid character was encountered.
673
+ Message msg;
674
+ msg << "WARNING: " << src_text
675
+ << " is expected to be a 32-bit integer, but actually"
676
+ << " has value \"" << str << "\".\n";
677
+ printf("%s", msg.GetString().c_str());
678
+ fflush(stdout);
679
+ return false;
680
+ }
681
+
682
+ // Is the parsed value in the range of an Int32?
683
+ const Int32 result = static_cast<Int32>(long_value);
684
+ if (long_value == LONG_MAX || long_value == LONG_MIN ||
685
+ // The parsed value overflows as a long. (strtol() returns
686
+ // LONG_MAX or LONG_MIN when the input overflows.)
687
+ result != long_value
688
+ // The parsed value overflows as an Int32.
689
+ ) {
690
+ Message msg;
691
+ msg << "WARNING: " << src_text
692
+ << " is expected to be a 32-bit integer, but actually"
693
+ << " has value " << str << ", which overflows.\n";
694
+ printf("%s", msg.GetString().c_str());
695
+ fflush(stdout);
696
+ return false;
697
+ }
698
+
699
+ *value = result;
700
+ return true;
701
+ }
702
+
703
+ // Reads and returns the Boolean environment variable corresponding to
704
+ // the given flag; if it's not set, returns default_value.
705
+ //
706
+ // The value is considered true iff it's not "0".
707
+ bool BoolFromGTestEnv(const char* flag, bool default_value) {
708
+ const String env_var = FlagToEnvVar(flag);
709
+ const char* const string_value = posix::GetEnv(env_var.c_str());
710
+ return string_value == NULL ?
711
+ default_value : strcmp(string_value, "0") != 0;
712
+ }
713
+
714
+ // Reads and returns a 32-bit integer stored in the environment
715
+ // variable corresponding to the given flag; if it isn't set or
716
+ // doesn't represent a valid 32-bit integer, returns default_value.
717
+ Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
718
+ const String env_var = FlagToEnvVar(flag);
719
+ const char* const string_value = posix::GetEnv(env_var.c_str());
720
+ if (string_value == NULL) {
721
+ // The environment variable is not set.
722
+ return default_value;
723
+ }
724
+
725
+ Int32 result = default_value;
726
+ if (!ParseInt32(Message() << "Environment variable " << env_var,
727
+ string_value, &result)) {
728
+ printf("The default value %s is used.\n",
729
+ (Message() << default_value).GetString().c_str());
730
+ fflush(stdout);
731
+ return default_value;
732
+ }
733
+
734
+ return result;
735
+ }
736
+
737
+ // Reads and returns the string environment variable corresponding to
738
+ // the given flag; if it's not set, returns default_value.
739
+ const char* StringFromGTestEnv(const char* flag, const char* default_value) {
740
+ const String env_var = FlagToEnvVar(flag);
741
+ const char* const value = posix::GetEnv(env_var.c_str());
742
+ return value == NULL ? default_value : value;
743
+ }
744
+
745
+ } // namespace internal
746
+ } // namespace testing