cppjieba_rb 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +26 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +81 -0
  8. data/Rakefile +20 -0
  9. data/cppjieba_rb.gemspec +50 -0
  10. data/ext/cppjieba/.gitignore +17 -0
  11. data/ext/cppjieba/.travis.yml +22 -0
  12. data/ext/cppjieba/CMakeLists.txt +28 -0
  13. data/ext/cppjieba/ChangeLog.md +236 -0
  14. data/ext/cppjieba/README.md +285 -0
  15. data/ext/cppjieba/README_EN.md +111 -0
  16. data/ext/cppjieba/appveyor.yml +32 -0
  17. data/ext/cppjieba/deps/CMakeLists.txt +1 -0
  18. data/ext/cppjieba/deps/gtest/CMakeLists.txt +5 -0
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +283 -0
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +230 -0
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +1421 -0
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +487 -0
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +796 -0
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +232 -0
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +176 -0
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +259 -0
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +2155 -0
  28. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +358 -0
  29. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +58 -0
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +308 -0
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +210 -0
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +1226 -0
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +233 -0
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +4822 -0
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +301 -0
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +619 -0
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +1788 -0
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +350 -0
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +968 -0
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +336 -0
  41. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +3330 -0
  42. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +296 -0
  43. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  44. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +681 -0
  45. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +509 -0
  46. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  47. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +48 -0
  48. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +1234 -0
  49. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +380 -0
  50. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +1038 -0
  51. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +746 -0
  52. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +356 -0
  53. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +110 -0
  54. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +110 -0
  55. data/ext/cppjieba/deps/gtest/src/gtest.cc +4898 -0
  56. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +39 -0
  57. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +70 -0
  58. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +49 -0
  59. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +67 -0
  60. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +65 -0
  61. data/ext/cppjieba/deps/limonp/Closure.hpp +206 -0
  62. data/ext/cppjieba/deps/limonp/Colors.hpp +31 -0
  63. data/ext/cppjieba/deps/limonp/Condition.hpp +38 -0
  64. data/ext/cppjieba/deps/limonp/Config.hpp +103 -0
  65. data/ext/cppjieba/deps/limonp/FileLock.hpp +74 -0
  66. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +7 -0
  67. data/ext/cppjieba/deps/limonp/LocalVector.hpp +139 -0
  68. data/ext/cppjieba/deps/limonp/Logging.hpp +76 -0
  69. data/ext/cppjieba/deps/limonp/Md5.hpp +411 -0
  70. data/ext/cppjieba/deps/limonp/MutexLock.hpp +51 -0
  71. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +21 -0
  72. data/ext/cppjieba/deps/limonp/StdExtension.hpp +159 -0
  73. data/ext/cppjieba/deps/limonp/StringUtil.hpp +365 -0
  74. data/ext/cppjieba/deps/limonp/Thread.hpp +44 -0
  75. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +86 -0
  76. data/ext/cppjieba/dict/README.md +31 -0
  77. data/ext/cppjieba/dict/hmm_model.utf8 +34 -0
  78. data/ext/cppjieba/dict/idf.utf8 +258826 -0
  79. data/ext/cppjieba/dict/jieba.dict.utf8 +348982 -0
  80. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +6653 -0
  81. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +166 -0
  82. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +259 -0
  83. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +5222 -0
  84. data/ext/cppjieba/dict/stop_words.utf8 +1534 -0
  85. data/ext/cppjieba/dict/user.dict.utf8 +4 -0
  86. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +227 -0
  87. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +93 -0
  88. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +129 -0
  89. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +190 -0
  90. data/ext/cppjieba/include/cppjieba/Jieba.hpp +108 -0
  91. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +153 -0
  92. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +137 -0
  93. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +109 -0
  94. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +77 -0
  95. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +54 -0
  96. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +90 -0
  97. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +46 -0
  98. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +23 -0
  99. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +190 -0
  100. data/ext/cppjieba/include/cppjieba/Trie.hpp +174 -0
  101. data/ext/cppjieba/include/cppjieba/Unicode.hpp +215 -0
  102. data/ext/cppjieba/test/CMakeLists.txt +5 -0
  103. data/ext/cppjieba/test/demo.cpp +80 -0
  104. data/ext/cppjieba/test/load_test.cpp +54 -0
  105. data/ext/cppjieba/test/testdata/curl.res +1 -0
  106. data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +109750 -0
  107. data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +34 -0
  108. data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +348982 -0
  109. data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +93 -0
  110. data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +93 -0
  111. data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +67 -0
  112. data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +64 -0
  113. data/ext/cppjieba/test/testdata/load_test.urls +2 -0
  114. data/ext/cppjieba/test/testdata/review.100 +100 -0
  115. data/ext/cppjieba/test/testdata/review.100.res +200 -0
  116. data/ext/cppjieba/test/testdata/server.conf +19 -0
  117. data/ext/cppjieba/test/testdata/testlines.gbk +9 -0
  118. data/ext/cppjieba/test/testdata/testlines.utf8 +8 -0
  119. data/ext/cppjieba/test/testdata/userdict.2.utf8 +1 -0
  120. data/ext/cppjieba/test/testdata/userdict.english +2 -0
  121. data/ext/cppjieba/test/testdata/userdict.utf8 +8 -0
  122. data/ext/cppjieba/test/testdata/weicheng.utf8 +247 -0
  123. data/ext/cppjieba/test/unittest/CMakeLists.txt +24 -0
  124. data/ext/cppjieba/test/unittest/gtest_main.cpp +39 -0
  125. data/ext/cppjieba/test/unittest/jieba_test.cpp +133 -0
  126. data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +79 -0
  127. data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +41 -0
  128. data/ext/cppjieba/test/unittest/pre_filter_test.cpp +43 -0
  129. data/ext/cppjieba/test/unittest/segments_test.cpp +256 -0
  130. data/ext/cppjieba/test/unittest/textrank_test.cpp +86 -0
  131. data/ext/cppjieba/test/unittest/trie_test.cpp +177 -0
  132. data/ext/cppjieba/test/unittest/unicode_test.cpp +43 -0
  133. data/ext/cppjieba_rb/cppjieba_rb.c +10 -0
  134. data/ext/cppjieba_rb/extconf.rb +26 -0
  135. data/ext/cppjieba_rb/internal.cc +148 -0
  136. data/lib/cppjieba_rb/segment.rb +20 -0
  137. data/lib/cppjieba_rb/version.rb +3 -0
  138. data/lib/cppjieba_rb.rb +34 -0
  139. data/test/test_keyword.rb +17 -0
  140. data/test/test_segment.rb +24 -0
  141. data/test/test_tagging.rb +19 -0
  142. metadata +244 -0
@@ -0,0 +1,746 @@
1
+ // Copyright 2008, Google Inc.
2
+ // All rights reserved.
3
+ //
4
+ // Redistribution and use in source and binary forms, with or without
5
+ // modification, are permitted provided that the following conditions are
6
+ // met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright
9
+ // notice, this list of conditions and the following disclaimer.
10
+ // * Redistributions in binary form must reproduce the above
11
+ // copyright notice, this list of conditions and the following disclaimer
12
+ // in the documentation and/or other materials provided with the
13
+ // distribution.
14
+ // * Neither the name of Google Inc. nor the names of its
15
+ // contributors may be used to endorse or promote products derived from
16
+ // this software without specific prior written permission.
17
+ //
18
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ //
30
+ // Author: wan@google.com (Zhanyong Wan)
31
+
32
+ #include "gtest/internal/gtest-port.h"
33
+
34
+ #include <limits.h>
35
+ #include <stdlib.h>
36
+ #include <stdio.h>
37
+ #include <string.h>
38
+
39
+ #if GTEST_OS_WINDOWS_MOBILE
40
+ # include <windows.h> // For TerminateProcess()
41
+ #elif GTEST_OS_WINDOWS
42
+ # include <io.h>
43
+ # include <sys/stat.h>
44
+ #else
45
+ # include <unistd.h>
46
+ #endif // GTEST_OS_WINDOWS_MOBILE
47
+
48
+ #if GTEST_OS_MAC
49
+ # include <mach/mach_init.h>
50
+ # include <mach/task.h>
51
+ # include <mach/vm_map.h>
52
+ #endif // GTEST_OS_MAC
53
+
54
+ #include "gtest/gtest-spi.h"
55
+ #include "gtest/gtest-message.h"
56
+ #include "gtest/internal/gtest-internal.h"
57
+ #include "gtest/internal/gtest-string.h"
58
+
59
+ // Indicates that this translation unit is part of Google Test's
60
+ // implementation. It must come before gtest-internal-inl.h is
61
+ // included, or there will be a compiler error. This trick is to
62
+ // prevent a user from accidentally including gtest-internal-inl.h in
63
+ // his code.
64
+ #define GTEST_IMPLEMENTATION_ 1
65
+ #include "src/gtest-internal-inl.h"
66
+ #undef GTEST_IMPLEMENTATION_
67
+
68
+ namespace testing {
69
+ namespace internal {
70
+
71
+ #if defined(_MSC_VER) || defined(__BORLANDC__)
72
+ // MSVC and C++Builder do not provide a definition of STDERR_FILENO.
73
+ const int kStdOutFileno = 1;
74
+ const int kStdErrFileno = 2;
75
+ #else
76
+ const int kStdOutFileno = STDOUT_FILENO;
77
+ const int kStdErrFileno = STDERR_FILENO;
78
+ #endif // _MSC_VER
79
+
80
+ #if GTEST_OS_MAC
81
+
82
+ // Returns the number of threads running in the process, or 0 to indicate that
83
+ // we cannot detect it.
84
+ size_t GetThreadCount() {
85
+ const task_t task = mach_task_self();
86
+ mach_msg_type_number_t thread_count;
87
+ thread_act_array_t thread_list;
88
+ const kern_return_t status = task_threads(task, &thread_list, &thread_count);
89
+ if (status == KERN_SUCCESS) {
90
+ // task_threads allocates resources in thread_list and we need to free them
91
+ // to avoid leaks.
92
+ vm_deallocate(task,
93
+ reinterpret_cast<vm_address_t>(thread_list),
94
+ sizeof(thread_t) * thread_count);
95
+ return static_cast<size_t>(thread_count);
96
+ } else {
97
+ return 0;
98
+ }
99
+ }
100
+
101
+ #else
102
+
103
+ size_t GetThreadCount() {
104
+ // There's no portable way to detect the number of threads, so we just
105
+ // return 0 to indicate that we cannot detect it.
106
+ return 0;
107
+ }
108
+
109
+ #endif // GTEST_OS_MAC
110
+
111
+ #if GTEST_USES_POSIX_RE
112
+
113
+ // Implements RE. Currently only needed for death tests.
114
+
115
+ RE::~RE() {
116
+ if (is_valid_) {
117
+ // regfree'ing an invalid regex might crash because the content
118
+ // of the regex is undefined. Since the regex's are essentially
119
+ // the same, one cannot be valid (or invalid) without the other
120
+ // being so too.
121
+ regfree(&partial_regex_);
122
+ regfree(&full_regex_);
123
+ }
124
+ free(const_cast<char*>(pattern_));
125
+ }
126
+
127
+ // Returns true iff regular expression re matches the entire str.
128
+ bool RE::FullMatch(const char* str, const RE& re) {
129
+ if (!re.is_valid_) return false;
130
+
131
+ regmatch_t match;
132
+ return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
133
+ }
134
+
135
+ // Returns true iff regular expression re matches a substring of str
136
+ // (including str itself).
137
+ bool RE::PartialMatch(const char* str, const RE& re) {
138
+ if (!re.is_valid_) return false;
139
+
140
+ regmatch_t match;
141
+ return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
142
+ }
143
+
144
+ // Initializes an RE from its string representation.
145
+ void RE::Init(const char* regex) {
146
+ pattern_ = posix::StrDup(regex);
147
+
148
+ // Reserves enough bytes to hold the regular expression used for a
149
+ // full match.
150
+ const size_t full_regex_len = strlen(regex) + 10;
151
+ char* const full_pattern = new char[full_regex_len];
152
+
153
+ snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
154
+ is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
155
+ // We want to call regcomp(&partial_regex_, ...) even if the
156
+ // previous expression returns false. Otherwise partial_regex_ may
157
+ // not be properly initialized can may cause trouble when it's
158
+ // freed.
159
+ //
160
+ // Some implementation of POSIX regex (e.g. on at least some
161
+ // versions of Cygwin) doesn't accept the empty string as a valid
162
+ // regex. We change it to an equivalent form "()" to be safe.
163
+ if (is_valid_) {
164
+ const char* const partial_regex = (*regex == '\0') ? "()" : regex;
165
+ is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
166
+ }
167
+ EXPECT_TRUE(is_valid_)
168
+ << "Regular expression \"" << regex
169
+ << "\" is not a valid POSIX Extended regular expression.";
170
+
171
+ delete[] full_pattern;
172
+ }
173
+
174
+ #elif GTEST_USES_SIMPLE_RE
175
+
176
+ // Returns true iff ch appears anywhere in str (excluding the
177
+ // terminating '\0' character).
178
+ bool IsInSet(char ch, const char* str) {
179
+ return ch != '\0' && strchr(str, ch) != NULL;
180
+ }
181
+
182
+ // Returns true iff ch belongs to the given classification. Unlike
183
+ // similar functions in <ctype.h>, these aren't affected by the
184
+ // current locale.
185
+ bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; }
186
+ bool IsAsciiPunct(char ch) {
187
+ return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
188
+ }
189
+ bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
190
+ bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
191
+ bool IsAsciiWordChar(char ch) {
192
+ return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
193
+ ('0' <= ch && ch <= '9') || ch == '_';
194
+ }
195
+
196
+ // Returns true iff "\\c" is a supported escape sequence.
197
+ bool IsValidEscape(char c) {
198
+ return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
199
+ }
200
+
201
+ // Returns true iff the given atom (specified by escaped and pattern)
202
+ // matches ch. The result is undefined if the atom is invalid.
203
+ bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
204
+ if (escaped) { // "\\p" where p is pattern_char.
205
+ switch (pattern_char) {
206
+ case 'd': return IsAsciiDigit(ch);
207
+ case 'D': return !IsAsciiDigit(ch);
208
+ case 'f': return ch == '\f';
209
+ case 'n': return ch == '\n';
210
+ case 'r': return ch == '\r';
211
+ case 's': return IsAsciiWhiteSpace(ch);
212
+ case 'S': return !IsAsciiWhiteSpace(ch);
213
+ case 't': return ch == '\t';
214
+ case 'v': return ch == '\v';
215
+ case 'w': return IsAsciiWordChar(ch);
216
+ case 'W': return !IsAsciiWordChar(ch);
217
+ }
218
+ return IsAsciiPunct(pattern_char) && pattern_char == ch;
219
+ }
220
+
221
+ return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
222
+ }
223
+
224
+ // Helper function used by ValidateRegex() to format error messages.
225
+ String FormatRegexSyntaxError(const char* regex, int index) {
226
+ return (Message() << "Syntax error at index " << index
227
+ << " in simple regular expression \"" << regex << "\": ").GetString();
228
+ }
229
+
230
+ // Generates non-fatal failures and returns false if regex is invalid;
231
+ // otherwise returns true.
232
+ bool ValidateRegex(const char* regex) {
233
+ if (regex == NULL) {
234
+ // TODO(wan@google.com): fix the source file location in the
235
+ // assertion failures to match where the regex is used in user
236
+ // code.
237
+ ADD_FAILURE() << "NULL is not a valid simple regular expression.";
238
+ return false;
239
+ }
240
+
241
+ bool is_valid = true;
242
+
243
+ // True iff ?, *, or + can follow the previous atom.
244
+ bool prev_repeatable = false;
245
+ for (int i = 0; regex[i]; i++) {
246
+ if (regex[i] == '\\') { // An escape sequence
247
+ i++;
248
+ if (regex[i] == '\0') {
249
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
250
+ << "'\\' cannot appear at the end.";
251
+ return false;
252
+ }
253
+
254
+ if (!IsValidEscape(regex[i])) {
255
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
256
+ << "invalid escape sequence \"\\" << regex[i] << "\".";
257
+ is_valid = false;
258
+ }
259
+ prev_repeatable = true;
260
+ } else { // Not an escape sequence.
261
+ const char ch = regex[i];
262
+
263
+ if (ch == '^' && i > 0) {
264
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
265
+ << "'^' can only appear at the beginning.";
266
+ is_valid = false;
267
+ } else if (ch == '$' && regex[i + 1] != '\0') {
268
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
269
+ << "'$' can only appear at the end.";
270
+ is_valid = false;
271
+ } else if (IsInSet(ch, "()[]{}|")) {
272
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
273
+ << "'" << ch << "' is unsupported.";
274
+ is_valid = false;
275
+ } else if (IsRepeat(ch) && !prev_repeatable) {
276
+ ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
277
+ << "'" << ch << "' can only follow a repeatable token.";
278
+ is_valid = false;
279
+ }
280
+
281
+ prev_repeatable = !IsInSet(ch, "^$?*+");
282
+ }
283
+ }
284
+
285
+ return is_valid;
286
+ }
287
+
288
+ // Matches a repeated regex atom followed by a valid simple regular
289
+ // expression. The regex atom is defined as c if escaped is false,
290
+ // or \c otherwise. repeat is the repetition meta character (?, *,
291
+ // or +). The behavior is undefined if str contains too many
292
+ // characters to be indexable by size_t, in which case the test will
293
+ // probably time out anyway. We are fine with this limitation as
294
+ // std::string has it too.
295
+ bool MatchRepetitionAndRegexAtHead(
296
+ bool escaped, char c, char repeat, const char* regex,
297
+ const char* str) {
298
+ const size_t min_count = (repeat == '+') ? 1 : 0;
299
+ const size_t max_count = (repeat == '?') ? 1 :
300
+ static_cast<size_t>(-1) - 1;
301
+ // We cannot call numeric_limits::max() as it conflicts with the
302
+ // max() macro on Windows.
303
+
304
+ for (size_t i = 0; i <= max_count; ++i) {
305
+ // We know that the atom matches each of the first i characters in str.
306
+ if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
307
+ // We have enough matches at the head, and the tail matches too.
308
+ // Since we only care about *whether* the pattern matches str
309
+ // (as opposed to *how* it matches), there is no need to find a
310
+ // greedy match.
311
+ return true;
312
+ }
313
+ if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i]))
314
+ return false;
315
+ }
316
+ return false;
317
+ }
318
+
319
+ // Returns true iff regex matches a prefix of str. regex must be a
320
+ // valid simple regular expression and not start with "^", or the
321
+ // result is undefined.
322
+ bool MatchRegexAtHead(const char* regex, const char* str) {
323
+ if (*regex == '\0') // An empty regex matches a prefix of anything.
324
+ return true;
325
+
326
+ // "$" only matches the end of a string. Note that regex being
327
+ // valid guarantees that there's nothing after "$" in it.
328
+ if (*regex == '$')
329
+ return *str == '\0';
330
+
331
+ // Is the first thing in regex an escape sequence?
332
+ const bool escaped = *regex == '\\';
333
+ if (escaped)
334
+ ++regex;
335
+ if (IsRepeat(regex[1])) {
336
+ // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
337
+ // here's an indirect recursion. It terminates as the regex gets
338
+ // shorter in each recursion.
339
+ return MatchRepetitionAndRegexAtHead(
340
+ escaped, regex[0], regex[1], regex + 2, str);
341
+ } else {
342
+ // regex isn't empty, isn't "$", and doesn't start with a
343
+ // repetition. We match the first atom of regex with the first
344
+ // character of str and recurse.
345
+ return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
346
+ MatchRegexAtHead(regex + 1, str + 1);
347
+ }
348
+ }
349
+
350
+ // Returns true iff regex matches any substring of str. regex must be
351
+ // a valid simple regular expression, or the result is undefined.
352
+ //
353
+ // The algorithm is recursive, but the recursion depth doesn't exceed
354
+ // the regex length, so we won't need to worry about running out of
355
+ // stack space normally. In rare cases the time complexity can be
356
+ // exponential with respect to the regex length + the string length,
357
+ // but usually it's must faster (often close to linear).
358
+ bool MatchRegexAnywhere(const char* regex, const char* str) {
359
+ if (regex == NULL || str == NULL)
360
+ return false;
361
+
362
+ if (*regex == '^')
363
+ return MatchRegexAtHead(regex + 1, str);
364
+
365
+ // A successful match can be anywhere in str.
366
+ do {
367
+ if (MatchRegexAtHead(regex, str))
368
+ return true;
369
+ } while (*str++ != '\0');
370
+ return false;
371
+ }
372
+
373
+ // Implements the RE class.
374
+
375
+ RE::~RE() {
376
+ free(const_cast<char*>(pattern_));
377
+ free(const_cast<char*>(full_pattern_));
378
+ }
379
+
380
+ // Returns true iff regular expression re matches the entire str.
381
+ bool RE::FullMatch(const char* str, const RE& re) {
382
+ return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
383
+ }
384
+
385
+ // Returns true iff regular expression re matches a substring of str
386
+ // (including str itself).
387
+ bool RE::PartialMatch(const char* str, const RE& re) {
388
+ return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
389
+ }
390
+
391
+ // Initializes an RE from its string representation.
392
+ void RE::Init(const char* regex) {
393
+ pattern_ = full_pattern_ = NULL;
394
+ if (regex != NULL) {
395
+ pattern_ = posix::StrDup(regex);
396
+ }
397
+
398
+ is_valid_ = ValidateRegex(regex);
399
+ if (!is_valid_) {
400
+ // No need to calculate the full pattern when the regex is invalid.
401
+ return;
402
+ }
403
+
404
+ const size_t len = strlen(regex);
405
+ // Reserves enough bytes to hold the regular expression used for a
406
+ // full match: we need space to prepend a '^', append a '$', and
407
+ // terminate the string with '\0'.
408
+ char* buffer = static_cast<char*>(malloc(len + 3));
409
+ full_pattern_ = buffer;
410
+
411
+ if (*regex != '^')
412
+ *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'.
413
+
414
+ // We don't use snprintf or strncpy, as they trigger a warning when
415
+ // compiled with VC++ 8.0.
416
+ memcpy(buffer, regex, len);
417
+ buffer += len;
418
+
419
+ if (len == 0 || regex[len - 1] != '$')
420
+ *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'.
421
+
422
+ *buffer = '\0';
423
+ }
424
+
425
+ #endif // GTEST_USES_POSIX_RE
426
+
427
+ const char kUnknownFile[] = "unknown file";
428
+
429
+ // Formats a source file path and a line number as they would appear
430
+ // in an error message from the compiler used to compile this code.
431
+ GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
432
+ const char* const file_name = file == NULL ? kUnknownFile : file;
433
+
434
+ if (line < 0) {
435
+ return String::Format("%s:", file_name).c_str();
436
+ }
437
+ #ifdef _MSC_VER
438
+ return String::Format("%s(%d):", file_name, line).c_str();
439
+ #else
440
+ return String::Format("%s:%d:", file_name, line).c_str();
441
+ #endif // _MSC_VER
442
+ }
443
+
444
+ // Formats a file location for compiler-independent XML output.
445
+ // Although this function is not platform dependent, we put it next to
446
+ // FormatFileLocation in order to contrast the two functions.
447
+ // Note that FormatCompilerIndependentFileLocation() does NOT append colon
448
+ // to the file location it produces, unlike FormatFileLocation().
449
+ GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
450
+ const char* file, int line) {
451
+ const char* const file_name = file == NULL ? kUnknownFile : file;
452
+
453
+ if (line < 0)
454
+ return file_name;
455
+ else
456
+ return String::Format("%s:%d", file_name, line).c_str();
457
+ }
458
+
459
+
460
+ GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
461
+ : severity_(severity) {
462
+ const char* const marker =
463
+ severity == GTEST_INFO ? "[ INFO ]" :
464
+ severity == GTEST_WARNING ? "[WARNING]" :
465
+ severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]";
466
+ GetStream() << ::std::endl << marker << " "
467
+ << FormatFileLocation(file, line).c_str() << ": ";
468
+ }
469
+
470
+ // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
471
+ GTestLog::~GTestLog() {
472
+ GetStream() << ::std::endl;
473
+ if (severity_ == GTEST_FATAL) {
474
+ fflush(stderr);
475
+ posix::Abort();
476
+ }
477
+ }
478
+ // Disable Microsoft deprecation warnings for POSIX functions called from
479
+ // this class (creat, dup, dup2, and close)
480
+ #ifdef _MSC_VER
481
+ # pragma warning(push)
482
+ # pragma warning(disable: 4996)
483
+ #endif // _MSC_VER
484
+
485
+ #if GTEST_HAS_STREAM_REDIRECTION
486
+
487
+ // Object that captures an output stream (stdout/stderr).
488
+ class CapturedStream {
489
+ public:
490
+ // The ctor redirects the stream to a temporary file.
491
+ CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
492
+
493
+ # if GTEST_OS_WINDOWS
494
+ char temp_dir_path[MAX_PATH + 1] = { '\0' }; // NOLINT
495
+ char temp_file_path[MAX_PATH + 1] = { '\0' }; // NOLINT
496
+
497
+ ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
498
+ const UINT success = ::GetTempFileNameA(temp_dir_path,
499
+ "gtest_redir",
500
+ 0, // Generate unique file name.
501
+ temp_file_path);
502
+ GTEST_CHECK_(success != 0)
503
+ << "Unable to create a temporary file in " << temp_dir_path;
504
+ const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
505
+ GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file "
506
+ << temp_file_path;
507
+ filename_ = temp_file_path;
508
+ # else
509
+ // There's no guarantee that a test has write access to the
510
+ // current directory, so we create the temporary file in the /tmp
511
+ // directory instead.
512
+ char name_template[] = "/tmp/captured_stream.XXXXXX";
513
+ const int captured_fd = mkstemp(name_template);
514
+ filename_ = name_template;
515
+ # endif // GTEST_OS_WINDOWS
516
+ fflush(NULL);
517
+ dup2(captured_fd, fd_);
518
+ close(captured_fd);
519
+ }
520
+
521
+ ~CapturedStream() {
522
+ remove(filename_.c_str());
523
+ }
524
+
525
+ String GetCapturedString() {
526
+ if (uncaptured_fd_ != -1) {
527
+ // Restores the original stream.
528
+ fflush(NULL);
529
+ dup2(uncaptured_fd_, fd_);
530
+ close(uncaptured_fd_);
531
+ uncaptured_fd_ = -1;
532
+ }
533
+
534
+ FILE* const file = posix::FOpen(filename_.c_str(), "r");
535
+ const String content = ReadEntireFile(file);
536
+ posix::FClose(file);
537
+ return content;
538
+ }
539
+
540
+ private:
541
+ // Reads the entire content of a file as a String.
542
+ static String ReadEntireFile(FILE* file);
543
+
544
+ // Returns the size (in bytes) of a file.
545
+ static size_t GetFileSize(FILE* file);
546
+
547
+ const int fd_; // A stream to capture.
548
+ int uncaptured_fd_;
549
+ // Name of the temporary file holding the stderr output.
550
+ ::std::string filename_;
551
+
552
+ GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
553
+ };
554
+
555
+ // Returns the size (in bytes) of a file.
556
+ size_t CapturedStream::GetFileSize(FILE* file) {
557
+ fseek(file, 0, SEEK_END);
558
+ return static_cast<size_t>(ftell(file));
559
+ }
560
+
561
+ // Reads the entire content of a file as a string.
562
+ String CapturedStream::ReadEntireFile(FILE* file) {
563
+ const size_t file_size = GetFileSize(file);
564
+ char* const buffer = new char[file_size];
565
+
566
+ size_t bytes_last_read = 0; // # of bytes read in the last fread()
567
+ size_t bytes_read = 0; // # of bytes read so far
568
+
569
+ fseek(file, 0, SEEK_SET);
570
+
571
+ // Keeps reading the file until we cannot read further or the
572
+ // pre-determined file size is reached.
573
+ do {
574
+ bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file);
575
+ bytes_read += bytes_last_read;
576
+ } while (bytes_last_read > 0 && bytes_read < file_size);
577
+
578
+ const String content(buffer, bytes_read);
579
+ delete[] buffer;
580
+
581
+ return content;
582
+ }
583
+
584
+ # ifdef _MSC_VER
585
+ # pragma warning(pop)
586
+ # endif // _MSC_VER
587
+
588
+ static CapturedStream* g_captured_stderr = NULL;
589
+ static CapturedStream* g_captured_stdout = NULL;
590
+
591
+ // Starts capturing an output stream (stdout/stderr).
592
+ void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
593
+ if (*stream != NULL) {
594
+ GTEST_LOG_(FATAL) << "Only one " << stream_name
595
+ << " capturer can exist at a time.";
596
+ }
597
+ *stream = new CapturedStream(fd);
598
+ }
599
+
600
+ // Stops capturing the output stream and returns the captured string.
601
+ String GetCapturedStream(CapturedStream** captured_stream) {
602
+ const String content = (*captured_stream)->GetCapturedString();
603
+
604
+ delete *captured_stream;
605
+ *captured_stream = NULL;
606
+
607
+ return content;
608
+ }
609
+
610
+ // Starts capturing stdout.
611
+ void CaptureStdout() {
612
+ CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
613
+ }
614
+
615
+ // Starts capturing stderr.
616
+ void CaptureStderr() {
617
+ CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
618
+ }
619
+
620
+ // Stops capturing stdout and returns the captured string.
621
+ String GetCapturedStdout() { return GetCapturedStream(&g_captured_stdout); }
622
+
623
+ // Stops capturing stderr and returns the captured string.
624
+ String GetCapturedStderr() { return GetCapturedStream(&g_captured_stderr); }
625
+
626
+ #endif // GTEST_HAS_STREAM_REDIRECTION
627
+
628
+ #if GTEST_HAS_DEATH_TEST
629
+
630
+ // A copy of all command line arguments. Set by InitGoogleTest().
631
+ ::std::vector<String> g_argvs;
632
+
633
+ // Returns the command line as a vector of strings.
634
+ const ::std::vector<String>& GetArgvs() { return g_argvs; }
635
+
636
+ #endif // GTEST_HAS_DEATH_TEST
637
+
638
+ #if GTEST_OS_WINDOWS_MOBILE
639
+ namespace posix {
640
+ void Abort() {
641
+ DebugBreak();
642
+ TerminateProcess(GetCurrentProcess(), 1);
643
+ }
644
+ } // namespace posix
645
+ #endif // GTEST_OS_WINDOWS_MOBILE
646
+
647
+ // Returns the name of the environment variable corresponding to the
648
+ // given flag. For example, FlagToEnvVar("foo") will return
649
+ // "GTEST_FOO" in the open-source version.
650
+ static String FlagToEnvVar(const char* flag) {
651
+ const String full_flag =
652
+ (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();
653
+
654
+ Message env_var;
655
+ for (size_t i = 0; i != full_flag.length(); i++) {
656
+ env_var << ToUpper(full_flag.c_str()[i]);
657
+ }
658
+
659
+ return env_var.GetString();
660
+ }
661
+
662
+ // Parses 'str' for a 32-bit signed integer. If successful, writes
663
+ // the result to *value and returns true; otherwise leaves *value
664
+ // unchanged and returns false.
665
+ bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
666
+ // Parses the environment variable as a decimal integer.
667
+ char* end = NULL;
668
+ const long long_value = strtol(str, &end, 10); // NOLINT
669
+
670
+ // Has strtol() consumed all characters in the string?
671
+ if (*end != '\0') {
672
+ // No - an invalid character was encountered.
673
+ Message msg;
674
+ msg << "WARNING: " << src_text
675
+ << " is expected to be a 32-bit integer, but actually"
676
+ << " has value \"" << str << "\".\n";
677
+ printf("%s", msg.GetString().c_str());
678
+ fflush(stdout);
679
+ return false;
680
+ }
681
+
682
+ // Is the parsed value in the range of an Int32?
683
+ const Int32 result = static_cast<Int32>(long_value);
684
+ if (long_value == LONG_MAX || long_value == LONG_MIN ||
685
+ // The parsed value overflows as a long. (strtol() returns
686
+ // LONG_MAX or LONG_MIN when the input overflows.)
687
+ result != long_value
688
+ // The parsed value overflows as an Int32.
689
+ ) {
690
+ Message msg;
691
+ msg << "WARNING: " << src_text
692
+ << " is expected to be a 32-bit integer, but actually"
693
+ << " has value " << str << ", which overflows.\n";
694
+ printf("%s", msg.GetString().c_str());
695
+ fflush(stdout);
696
+ return false;
697
+ }
698
+
699
+ *value = result;
700
+ return true;
701
+ }
702
+
703
+ // Reads and returns the Boolean environment variable corresponding to
704
+ // the given flag; if it's not set, returns default_value.
705
+ //
706
+ // The value is considered true iff it's not "0".
707
+ bool BoolFromGTestEnv(const char* flag, bool default_value) {
708
+ const String env_var = FlagToEnvVar(flag);
709
+ const char* const string_value = posix::GetEnv(env_var.c_str());
710
+ return string_value == NULL ?
711
+ default_value : strcmp(string_value, "0") != 0;
712
+ }
713
+
714
+ // Reads and returns a 32-bit integer stored in the environment
715
+ // variable corresponding to the given flag; if it isn't set or
716
+ // doesn't represent a valid 32-bit integer, returns default_value.
717
+ Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
718
+ const String env_var = FlagToEnvVar(flag);
719
+ const char* const string_value = posix::GetEnv(env_var.c_str());
720
+ if (string_value == NULL) {
721
+ // The environment variable is not set.
722
+ return default_value;
723
+ }
724
+
725
+ Int32 result = default_value;
726
+ if (!ParseInt32(Message() << "Environment variable " << env_var,
727
+ string_value, &result)) {
728
+ printf("The default value %s is used.\n",
729
+ (Message() << default_value).GetString().c_str());
730
+ fflush(stdout);
731
+ return default_value;
732
+ }
733
+
734
+ return result;
735
+ }
736
+
737
+ // Reads and returns the string environment variable corresponding to
738
+ // the given flag; if it's not set, returns default_value.
739
+ const char* StringFromGTestEnv(const char* flag, const char* default_value) {
740
+ const String env_var = FlagToEnvVar(flag);
741
+ const char* const value = posix::GetEnv(env_var.c_str());
742
+ return value == NULL ? default_value : value;
743
+ }
744
+
745
+ } // namespace internal
746
+ } // namespace testing