cppjieba_rb 0.3.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (130) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +3 -0
  3. data/README.md +1 -1
  4. data/Rakefile +2 -2
  5. data/cppjieba_rb.gemspec +4 -4
  6. data/lib/cppjieba_rb/version.rb +1 -1
  7. metadata +17 -135
  8. data/ext/cppjieba/.gitignore +0 -17
  9. data/ext/cppjieba/.travis.yml +0 -21
  10. data/ext/cppjieba/CMakeLists.txt +0 -28
  11. data/ext/cppjieba/ChangeLog.md +0 -236
  12. data/ext/cppjieba/README.md +0 -292
  13. data/ext/cppjieba/README_EN.md +0 -113
  14. data/ext/cppjieba/appveyor.yml +0 -32
  15. data/ext/cppjieba/deps/CMakeLists.txt +0 -1
  16. data/ext/cppjieba/deps/gtest/CMakeLists.txt +0 -5
  17. data/ext/cppjieba/deps/gtest/include/gtest/gtest-death-test.h +0 -283
  18. data/ext/cppjieba/deps/gtest/include/gtest/gtest-message.h +0 -230
  19. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h +0 -1421
  20. data/ext/cppjieba/deps/gtest/include/gtest/gtest-param-test.h.pump +0 -487
  21. data/ext/cppjieba/deps/gtest/include/gtest/gtest-printers.h +0 -796
  22. data/ext/cppjieba/deps/gtest/include/gtest/gtest-spi.h +0 -232
  23. data/ext/cppjieba/deps/gtest/include/gtest/gtest-test-part.h +0 -176
  24. data/ext/cppjieba/deps/gtest/include/gtest/gtest-typed-test.h +0 -259
  25. data/ext/cppjieba/deps/gtest/include/gtest/gtest.h +0 -2155
  26. data/ext/cppjieba/deps/gtest/include/gtest/gtest_pred_impl.h +0 -358
  27. data/ext/cppjieba/deps/gtest/include/gtest/gtest_prod.h +0 -58
  28. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-death-test-internal.h +0 -308
  29. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-filepath.h +0 -210
  30. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-internal.h +0 -1226
  31. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-linked_ptr.h +0 -233
  32. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h +0 -4822
  33. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util-generated.h.pump +0 -301
  34. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-param-util.h +0 -619
  35. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-port.h +0 -1788
  36. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-string.h +0 -350
  37. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h +0 -968
  38. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-tuple.h.pump +0 -336
  39. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h +0 -3330
  40. data/ext/cppjieba/deps/gtest/include/gtest/internal/gtest-type-util.h.pump +0 -296
  41. data/ext/cppjieba/deps/gtest/src/.deps/.dirstamp +0 -0
  42. data/ext/cppjieba/deps/gtest/src/.deps/gtest-all.Plo +0 -681
  43. data/ext/cppjieba/deps/gtest/src/.deps/gtest_main.Plo +0 -509
  44. data/ext/cppjieba/deps/gtest/src/.dirstamp +0 -0
  45. data/ext/cppjieba/deps/gtest/src/gtest-all.cc +0 -48
  46. data/ext/cppjieba/deps/gtest/src/gtest-death-test.cc +0 -1234
  47. data/ext/cppjieba/deps/gtest/src/gtest-filepath.cc +0 -380
  48. data/ext/cppjieba/deps/gtest/src/gtest-internal-inl.h +0 -1038
  49. data/ext/cppjieba/deps/gtest/src/gtest-port.cc +0 -746
  50. data/ext/cppjieba/deps/gtest/src/gtest-printers.cc +0 -356
  51. data/ext/cppjieba/deps/gtest/src/gtest-test-part.cc +0 -110
  52. data/ext/cppjieba/deps/gtest/src/gtest-typed-test.cc +0 -110
  53. data/ext/cppjieba/deps/gtest/src/gtest.cc +0 -4898
  54. data/ext/cppjieba/deps/gtest/src/gtest_main.cc +0 -39
  55. data/ext/cppjieba/deps/limonp/ArgvContext.hpp +0 -70
  56. data/ext/cppjieba/deps/limonp/BlockingQueue.hpp +0 -49
  57. data/ext/cppjieba/deps/limonp/BoundedBlockingQueue.hpp +0 -67
  58. data/ext/cppjieba/deps/limonp/BoundedQueue.hpp +0 -65
  59. data/ext/cppjieba/deps/limonp/Closure.hpp +0 -206
  60. data/ext/cppjieba/deps/limonp/Colors.hpp +0 -31
  61. data/ext/cppjieba/deps/limonp/Condition.hpp +0 -38
  62. data/ext/cppjieba/deps/limonp/Config.hpp +0 -103
  63. data/ext/cppjieba/deps/limonp/FileLock.hpp +0 -74
  64. data/ext/cppjieba/deps/limonp/ForcePublic.hpp +0 -7
  65. data/ext/cppjieba/deps/limonp/LocalVector.hpp +0 -139
  66. data/ext/cppjieba/deps/limonp/Logging.hpp +0 -76
  67. data/ext/cppjieba/deps/limonp/Md5.hpp +0 -411
  68. data/ext/cppjieba/deps/limonp/MutexLock.hpp +0 -51
  69. data/ext/cppjieba/deps/limonp/NonCopyable.hpp +0 -21
  70. data/ext/cppjieba/deps/limonp/StdExtension.hpp +0 -159
  71. data/ext/cppjieba/deps/limonp/StringUtil.hpp +0 -365
  72. data/ext/cppjieba/deps/limonp/Thread.hpp +0 -44
  73. data/ext/cppjieba/deps/limonp/ThreadPool.hpp +0 -86
  74. data/ext/cppjieba/dict/README.md +0 -31
  75. data/ext/cppjieba/dict/hmm_model.utf8 +0 -34
  76. data/ext/cppjieba/dict/idf.utf8 +0 -258826
  77. data/ext/cppjieba/dict/jieba.dict.utf8 +0 -348982
  78. data/ext/cppjieba/dict/pos_dict/char_state_tab.utf8 +0 -6653
  79. data/ext/cppjieba/dict/pos_dict/prob_emit.utf8 +0 -166
  80. data/ext/cppjieba/dict/pos_dict/prob_start.utf8 +0 -259
  81. data/ext/cppjieba/dict/pos_dict/prob_trans.utf8 +0 -5222
  82. data/ext/cppjieba/dict/stop_words.utf8 +0 -1534
  83. data/ext/cppjieba/dict/user.dict.utf8 +0 -4
  84. data/ext/cppjieba/include/cppjieba/DictTrie.hpp +0 -277
  85. data/ext/cppjieba/include/cppjieba/FullSegment.hpp +0 -93
  86. data/ext/cppjieba/include/cppjieba/HMMModel.hpp +0 -129
  87. data/ext/cppjieba/include/cppjieba/HMMSegment.hpp +0 -190
  88. data/ext/cppjieba/include/cppjieba/Jieba.hpp +0 -130
  89. data/ext/cppjieba/include/cppjieba/KeywordExtractor.hpp +0 -153
  90. data/ext/cppjieba/include/cppjieba/MPSegment.hpp +0 -137
  91. data/ext/cppjieba/include/cppjieba/MixSegment.hpp +0 -109
  92. data/ext/cppjieba/include/cppjieba/PosTagger.hpp +0 -77
  93. data/ext/cppjieba/include/cppjieba/PreFilter.hpp +0 -54
  94. data/ext/cppjieba/include/cppjieba/QuerySegment.hpp +0 -90
  95. data/ext/cppjieba/include/cppjieba/SegmentBase.hpp +0 -46
  96. data/ext/cppjieba/include/cppjieba/SegmentTagged.hpp +0 -23
  97. data/ext/cppjieba/include/cppjieba/TextRankExtractor.hpp +0 -190
  98. data/ext/cppjieba/include/cppjieba/Trie.hpp +0 -174
  99. data/ext/cppjieba/include/cppjieba/Unicode.hpp +0 -227
  100. data/ext/cppjieba/test/CMakeLists.txt +0 -5
  101. data/ext/cppjieba/test/demo.cpp +0 -80
  102. data/ext/cppjieba/test/load_test.cpp +0 -54
  103. data/ext/cppjieba/test/testdata/curl.res +0 -1
  104. data/ext/cppjieba/test/testdata/extra_dict/jieba.dict.small.utf8 +0 -109750
  105. data/ext/cppjieba/test/testdata/gbk_dict/hmm_model.gbk +0 -34
  106. data/ext/cppjieba/test/testdata/gbk_dict/jieba.dict.gbk +0 -348982
  107. data/ext/cppjieba/test/testdata/jieba.dict.0.1.utf8 +0 -93
  108. data/ext/cppjieba/test/testdata/jieba.dict.0.utf8 +0 -93
  109. data/ext/cppjieba/test/testdata/jieba.dict.1.utf8 +0 -67
  110. data/ext/cppjieba/test/testdata/jieba.dict.2.utf8 +0 -64
  111. data/ext/cppjieba/test/testdata/load_test.urls +0 -2
  112. data/ext/cppjieba/test/testdata/review.100 +0 -100
  113. data/ext/cppjieba/test/testdata/review.100.res +0 -200
  114. data/ext/cppjieba/test/testdata/server.conf +0 -19
  115. data/ext/cppjieba/test/testdata/testlines.gbk +0 -9
  116. data/ext/cppjieba/test/testdata/testlines.utf8 +0 -8
  117. data/ext/cppjieba/test/testdata/userdict.2.utf8 +0 -1
  118. data/ext/cppjieba/test/testdata/userdict.english +0 -2
  119. data/ext/cppjieba/test/testdata/userdict.utf8 +0 -8
  120. data/ext/cppjieba/test/testdata/weicheng.utf8 +0 -247
  121. data/ext/cppjieba/test/unittest/CMakeLists.txt +0 -24
  122. data/ext/cppjieba/test/unittest/gtest_main.cpp +0 -39
  123. data/ext/cppjieba/test/unittest/jieba_test.cpp +0 -133
  124. data/ext/cppjieba/test/unittest/keyword_extractor_test.cpp +0 -79
  125. data/ext/cppjieba/test/unittest/pos_tagger_test.cpp +0 -41
  126. data/ext/cppjieba/test/unittest/pre_filter_test.cpp +0 -43
  127. data/ext/cppjieba/test/unittest/segments_test.cpp +0 -256
  128. data/ext/cppjieba/test/unittest/textrank_test.cpp +0 -86
  129. data/ext/cppjieba/test/unittest/trie_test.cpp +0 -177
  130. data/ext/cppjieba/test/unittest/unicode_test.cpp +0 -43
@@ -1,746 +0,0 @@
1
- // Copyright 2008, Google Inc.
2
- // All rights reserved.
3
- //
4
- // Redistribution and use in source and binary forms, with or without
5
- // modification, are permitted provided that the following conditions are
6
- // met:
7
- //
8
- // * Redistributions of source code must retain the above copyright
9
- // notice, this list of conditions and the following disclaimer.
10
- // * Redistributions in binary form must reproduce the above
11
- // copyright notice, this list of conditions and the following disclaimer
12
- // in the documentation and/or other materials provided with the
13
- // distribution.
14
- // * Neither the name of Google Inc. nor the names of its
15
- // contributors may be used to endorse or promote products derived from
16
- // this software without specific prior written permission.
17
- //
18
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
- // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
- // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
- // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
- //
30
- // Author: wan@google.com (Zhanyong Wan)
31
-
32
- #include "gtest/internal/gtest-port.h"
33
-
34
- #include <limits.h>
35
- #include <stdlib.h>
36
- #include <stdio.h>
37
- #include <string.h>
38
-
39
- #if GTEST_OS_WINDOWS_MOBILE
40
- # include <windows.h> // For TerminateProcess()
41
- #elif GTEST_OS_WINDOWS
42
- # include <io.h>
43
- # include <sys/stat.h>
44
- #else
45
- # include <unistd.h>
46
- #endif // GTEST_OS_WINDOWS_MOBILE
47
-
48
- #if GTEST_OS_MAC
49
- # include <mach/mach_init.h>
50
- # include <mach/task.h>
51
- # include <mach/vm_map.h>
52
- #endif // GTEST_OS_MAC
53
-
54
- #include "gtest/gtest-spi.h"
55
- #include "gtest/gtest-message.h"
56
- #include "gtest/internal/gtest-internal.h"
57
- #include "gtest/internal/gtest-string.h"
58
-
59
- // Indicates that this translation unit is part of Google Test's
60
- // implementation. It must come before gtest-internal-inl.h is
61
- // included, or there will be a compiler error. This trick is to
62
- // prevent a user from accidentally including gtest-internal-inl.h in
63
- // his code.
64
- #define GTEST_IMPLEMENTATION_ 1
65
- #include "src/gtest-internal-inl.h"
66
- #undef GTEST_IMPLEMENTATION_
67
-
68
- namespace testing {
69
- namespace internal {
70
-
71
- #if defined(_MSC_VER) || defined(__BORLANDC__)
72
- // MSVC and C++Builder do not provide a definition of STDERR_FILENO.
73
- const int kStdOutFileno = 1;
74
- const int kStdErrFileno = 2;
75
- #else
76
- const int kStdOutFileno = STDOUT_FILENO;
77
- const int kStdErrFileno = STDERR_FILENO;
78
- #endif // _MSC_VER
79
-
80
- #if GTEST_OS_MAC
81
-
82
- // Returns the number of threads running in the process, or 0 to indicate that
83
- // we cannot detect it.
84
- size_t GetThreadCount() {
85
- const task_t task = mach_task_self();
86
- mach_msg_type_number_t thread_count;
87
- thread_act_array_t thread_list;
88
- const kern_return_t status = task_threads(task, &thread_list, &thread_count);
89
- if (status == KERN_SUCCESS) {
90
- // task_threads allocates resources in thread_list and we need to free them
91
- // to avoid leaks.
92
- vm_deallocate(task,
93
- reinterpret_cast<vm_address_t>(thread_list),
94
- sizeof(thread_t) * thread_count);
95
- return static_cast<size_t>(thread_count);
96
- } else {
97
- return 0;
98
- }
99
- }
100
-
101
- #else
102
-
103
- size_t GetThreadCount() {
104
- // There's no portable way to detect the number of threads, so we just
105
- // return 0 to indicate that we cannot detect it.
106
- return 0;
107
- }
108
-
109
- #endif // GTEST_OS_MAC
110
-
111
- #if GTEST_USES_POSIX_RE
112
-
113
- // Implements RE. Currently only needed for death tests.
114
-
115
- RE::~RE() {
116
- if (is_valid_) {
117
- // regfree'ing an invalid regex might crash because the content
118
- // of the regex is undefined. Since the regex's are essentially
119
- // the same, one cannot be valid (or invalid) without the other
120
- // being so too.
121
- regfree(&partial_regex_);
122
- regfree(&full_regex_);
123
- }
124
- free(const_cast<char*>(pattern_));
125
- }
126
-
127
- // Returns true iff regular expression re matches the entire str.
128
- bool RE::FullMatch(const char* str, const RE& re) {
129
- if (!re.is_valid_) return false;
130
-
131
- regmatch_t match;
132
- return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
133
- }
134
-
135
- // Returns true iff regular expression re matches a substring of str
136
- // (including str itself).
137
- bool RE::PartialMatch(const char* str, const RE& re) {
138
- if (!re.is_valid_) return false;
139
-
140
- regmatch_t match;
141
- return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
142
- }
143
-
144
- // Initializes an RE from its string representation.
145
- void RE::Init(const char* regex) {
146
- pattern_ = posix::StrDup(regex);
147
-
148
- // Reserves enough bytes to hold the regular expression used for a
149
- // full match.
150
- const size_t full_regex_len = strlen(regex) + 10;
151
- char* const full_pattern = new char[full_regex_len];
152
-
153
- snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
154
- is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
155
- // We want to call regcomp(&partial_regex_, ...) even if the
156
- // previous expression returns false. Otherwise partial_regex_ may
157
- // not be properly initialized can may cause trouble when it's
158
- // freed.
159
- //
160
- // Some implementation of POSIX regex (e.g. on at least some
161
- // versions of Cygwin) doesn't accept the empty string as a valid
162
- // regex. We change it to an equivalent form "()" to be safe.
163
- if (is_valid_) {
164
- const char* const partial_regex = (*regex == '\0') ? "()" : regex;
165
- is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
166
- }
167
- EXPECT_TRUE(is_valid_)
168
- << "Regular expression \"" << regex
169
- << "\" is not a valid POSIX Extended regular expression.";
170
-
171
- delete[] full_pattern;
172
- }
173
-
174
- #elif GTEST_USES_SIMPLE_RE
175
-
176
- // Returns true iff ch appears anywhere in str (excluding the
177
- // terminating '\0' character).
178
- bool IsInSet(char ch, const char* str) {
179
- return ch != '\0' && strchr(str, ch) != NULL;
180
- }
181
-
182
- // Returns true iff ch belongs to the given classification. Unlike
183
- // similar functions in <ctype.h>, these aren't affected by the
184
- // current locale.
185
- bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; }
186
- bool IsAsciiPunct(char ch) {
187
- return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
188
- }
189
- bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
190
- bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
191
- bool IsAsciiWordChar(char ch) {
192
- return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
193
- ('0' <= ch && ch <= '9') || ch == '_';
194
- }
195
-
196
- // Returns true iff "\\c" is a supported escape sequence.
197
- bool IsValidEscape(char c) {
198
- return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
199
- }
200
-
201
- // Returns true iff the given atom (specified by escaped and pattern)
202
- // matches ch. The result is undefined if the atom is invalid.
203
- bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
204
- if (escaped) { // "\\p" where p is pattern_char.
205
- switch (pattern_char) {
206
- case 'd': return IsAsciiDigit(ch);
207
- case 'D': return !IsAsciiDigit(ch);
208
- case 'f': return ch == '\f';
209
- case 'n': return ch == '\n';
210
- case 'r': return ch == '\r';
211
- case 's': return IsAsciiWhiteSpace(ch);
212
- case 'S': return !IsAsciiWhiteSpace(ch);
213
- case 't': return ch == '\t';
214
- case 'v': return ch == '\v';
215
- case 'w': return IsAsciiWordChar(ch);
216
- case 'W': return !IsAsciiWordChar(ch);
217
- }
218
- return IsAsciiPunct(pattern_char) && pattern_char == ch;
219
- }
220
-
221
- return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
222
- }
223
-
224
- // Helper function used by ValidateRegex() to format error messages.
225
- String FormatRegexSyntaxError(const char* regex, int index) {
226
- return (Message() << "Syntax error at index " << index
227
- << " in simple regular expression \"" << regex << "\": ").GetString();
228
- }
229
-
230
- // Generates non-fatal failures and returns false if regex is invalid;
231
- // otherwise returns true.
232
- bool ValidateRegex(const char* regex) {
233
- if (regex == NULL) {
234
- // TODO(wan@google.com): fix the source file location in the
235
- // assertion failures to match where the regex is used in user
236
- // code.
237
- ADD_FAILURE() << "NULL is not a valid simple regular expression.";
238
- return false;
239
- }
240
-
241
- bool is_valid = true;
242
-
243
- // True iff ?, *, or + can follow the previous atom.
244
- bool prev_repeatable = false;
245
- for (int i = 0; regex[i]; i++) {
246
- if (regex[i] == '\\') { // An escape sequence
247
- i++;
248
- if (regex[i] == '\0') {
249
- ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
250
- << "'\\' cannot appear at the end.";
251
- return false;
252
- }
253
-
254
- if (!IsValidEscape(regex[i])) {
255
- ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
256
- << "invalid escape sequence \"\\" << regex[i] << "\".";
257
- is_valid = false;
258
- }
259
- prev_repeatable = true;
260
- } else { // Not an escape sequence.
261
- const char ch = regex[i];
262
-
263
- if (ch == '^' && i > 0) {
264
- ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
265
- << "'^' can only appear at the beginning.";
266
- is_valid = false;
267
- } else if (ch == '$' && regex[i + 1] != '\0') {
268
- ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
269
- << "'$' can only appear at the end.";
270
- is_valid = false;
271
- } else if (IsInSet(ch, "()[]{}|")) {
272
- ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
273
- << "'" << ch << "' is unsupported.";
274
- is_valid = false;
275
- } else if (IsRepeat(ch) && !prev_repeatable) {
276
- ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
277
- << "'" << ch << "' can only follow a repeatable token.";
278
- is_valid = false;
279
- }
280
-
281
- prev_repeatable = !IsInSet(ch, "^$?*+");
282
- }
283
- }
284
-
285
- return is_valid;
286
- }
287
-
288
- // Matches a repeated regex atom followed by a valid simple regular
289
- // expression. The regex atom is defined as c if escaped is false,
290
- // or \c otherwise. repeat is the repetition meta character (?, *,
291
- // or +). The behavior is undefined if str contains too many
292
- // characters to be indexable by size_t, in which case the test will
293
- // probably time out anyway. We are fine with this limitation as
294
- // std::string has it too.
295
- bool MatchRepetitionAndRegexAtHead(
296
- bool escaped, char c, char repeat, const char* regex,
297
- const char* str) {
298
- const size_t min_count = (repeat == '+') ? 1 : 0;
299
- const size_t max_count = (repeat == '?') ? 1 :
300
- static_cast<size_t>(-1) - 1;
301
- // We cannot call numeric_limits::max() as it conflicts with the
302
- // max() macro on Windows.
303
-
304
- for (size_t i = 0; i <= max_count; ++i) {
305
- // We know that the atom matches each of the first i characters in str.
306
- if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
307
- // We have enough matches at the head, and the tail matches too.
308
- // Since we only care about *whether* the pattern matches str
309
- // (as opposed to *how* it matches), there is no need to find a
310
- // greedy match.
311
- return true;
312
- }
313
- if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i]))
314
- return false;
315
- }
316
- return false;
317
- }
318
-
319
- // Returns true iff regex matches a prefix of str. regex must be a
320
- // valid simple regular expression and not start with "^", or the
321
- // result is undefined.
322
- bool MatchRegexAtHead(const char* regex, const char* str) {
323
- if (*regex == '\0') // An empty regex matches a prefix of anything.
324
- return true;
325
-
326
- // "$" only matches the end of a string. Note that regex being
327
- // valid guarantees that there's nothing after "$" in it.
328
- if (*regex == '$')
329
- return *str == '\0';
330
-
331
- // Is the first thing in regex an escape sequence?
332
- const bool escaped = *regex == '\\';
333
- if (escaped)
334
- ++regex;
335
- if (IsRepeat(regex[1])) {
336
- // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
337
- // here's an indirect recursion. It terminates as the regex gets
338
- // shorter in each recursion.
339
- return MatchRepetitionAndRegexAtHead(
340
- escaped, regex[0], regex[1], regex + 2, str);
341
- } else {
342
- // regex isn't empty, isn't "$", and doesn't start with a
343
- // repetition. We match the first atom of regex with the first
344
- // character of str and recurse.
345
- return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
346
- MatchRegexAtHead(regex + 1, str + 1);
347
- }
348
- }
349
-
350
- // Returns true iff regex matches any substring of str. regex must be
351
- // a valid simple regular expression, or the result is undefined.
352
- //
353
- // The algorithm is recursive, but the recursion depth doesn't exceed
354
- // the regex length, so we won't need to worry about running out of
355
- // stack space normally. In rare cases the time complexity can be
356
- // exponential with respect to the regex length + the string length,
357
- // but usually it's must faster (often close to linear).
358
- bool MatchRegexAnywhere(const char* regex, const char* str) {
359
- if (regex == NULL || str == NULL)
360
- return false;
361
-
362
- if (*regex == '^')
363
- return MatchRegexAtHead(regex + 1, str);
364
-
365
- // A successful match can be anywhere in str.
366
- do {
367
- if (MatchRegexAtHead(regex, str))
368
- return true;
369
- } while (*str++ != '\0');
370
- return false;
371
- }
372
-
373
- // Implements the RE class.
374
-
375
- RE::~RE() {
376
- free(const_cast<char*>(pattern_));
377
- free(const_cast<char*>(full_pattern_));
378
- }
379
-
380
- // Returns true iff regular expression re matches the entire str.
381
- bool RE::FullMatch(const char* str, const RE& re) {
382
- return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
383
- }
384
-
385
- // Returns true iff regular expression re matches a substring of str
386
- // (including str itself).
387
- bool RE::PartialMatch(const char* str, const RE& re) {
388
- return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
389
- }
390
-
391
- // Initializes an RE from its string representation.
392
- void RE::Init(const char* regex) {
393
- pattern_ = full_pattern_ = NULL;
394
- if (regex != NULL) {
395
- pattern_ = posix::StrDup(regex);
396
- }
397
-
398
- is_valid_ = ValidateRegex(regex);
399
- if (!is_valid_) {
400
- // No need to calculate the full pattern when the regex is invalid.
401
- return;
402
- }
403
-
404
- const size_t len = strlen(regex);
405
- // Reserves enough bytes to hold the regular expression used for a
406
- // full match: we need space to prepend a '^', append a '$', and
407
- // terminate the string with '\0'.
408
- char* buffer = static_cast<char*>(malloc(len + 3));
409
- full_pattern_ = buffer;
410
-
411
- if (*regex != '^')
412
- *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'.
413
-
414
- // We don't use snprintf or strncpy, as they trigger a warning when
415
- // compiled with VC++ 8.0.
416
- memcpy(buffer, regex, len);
417
- buffer += len;
418
-
419
- if (len == 0 || regex[len - 1] != '$')
420
- *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'.
421
-
422
- *buffer = '\0';
423
- }
424
-
425
- #endif // GTEST_USES_POSIX_RE
426
-
427
- const char kUnknownFile[] = "unknown file";
428
-
429
- // Formats a source file path and a line number as they would appear
430
- // in an error message from the compiler used to compile this code.
431
- GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
432
- const char* const file_name = file == NULL ? kUnknownFile : file;
433
-
434
- if (line < 0) {
435
- return String::Format("%s:", file_name).c_str();
436
- }
437
- #ifdef _MSC_VER
438
- return String::Format("%s(%d):", file_name, line).c_str();
439
- #else
440
- return String::Format("%s:%d:", file_name, line).c_str();
441
- #endif // _MSC_VER
442
- }
443
-
444
- // Formats a file location for compiler-independent XML output.
445
- // Although this function is not platform dependent, we put it next to
446
- // FormatFileLocation in order to contrast the two functions.
447
- // Note that FormatCompilerIndependentFileLocation() does NOT append colon
448
- // to the file location it produces, unlike FormatFileLocation().
449
- GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(
450
- const char* file, int line) {
451
- const char* const file_name = file == NULL ? kUnknownFile : file;
452
-
453
- if (line < 0)
454
- return file_name;
455
- else
456
- return String::Format("%s:%d", file_name, line).c_str();
457
- }
458
-
459
-
460
- GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
461
- : severity_(severity) {
462
- const char* const marker =
463
- severity == GTEST_INFO ? "[ INFO ]" :
464
- severity == GTEST_WARNING ? "[WARNING]" :
465
- severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]";
466
- GetStream() << ::std::endl << marker << " "
467
- << FormatFileLocation(file, line).c_str() << ": ";
468
- }
469
-
470
- // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
471
- GTestLog::~GTestLog() {
472
- GetStream() << ::std::endl;
473
- if (severity_ == GTEST_FATAL) {
474
- fflush(stderr);
475
- posix::Abort();
476
- }
477
- }
478
- // Disable Microsoft deprecation warnings for POSIX functions called from
479
- // this class (creat, dup, dup2, and close)
480
- #ifdef _MSC_VER
481
- # pragma warning(push)
482
- # pragma warning(disable: 4996)
483
- #endif // _MSC_VER
484
-
485
- #if GTEST_HAS_STREAM_REDIRECTION
486
-
487
- // Object that captures an output stream (stdout/stderr).
488
- class CapturedStream {
489
- public:
490
- // The ctor redirects the stream to a temporary file.
491
- CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
492
-
493
- # if GTEST_OS_WINDOWS
494
- char temp_dir_path[MAX_PATH + 1] = { '\0' }; // NOLINT
495
- char temp_file_path[MAX_PATH + 1] = { '\0' }; // NOLINT
496
-
497
- ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
498
- const UINT success = ::GetTempFileNameA(temp_dir_path,
499
- "gtest_redir",
500
- 0, // Generate unique file name.
501
- temp_file_path);
502
- GTEST_CHECK_(success != 0)
503
- << "Unable to create a temporary file in " << temp_dir_path;
504
- const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
505
- GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file "
506
- << temp_file_path;
507
- filename_ = temp_file_path;
508
- # else
509
- // There's no guarantee that a test has write access to the
510
- // current directory, so we create the temporary file in the /tmp
511
- // directory instead.
512
- char name_template[] = "/tmp/captured_stream.XXXXXX";
513
- const int captured_fd = mkstemp(name_template);
514
- filename_ = name_template;
515
- # endif // GTEST_OS_WINDOWS
516
- fflush(NULL);
517
- dup2(captured_fd, fd_);
518
- close(captured_fd);
519
- }
520
-
521
- ~CapturedStream() {
522
- remove(filename_.c_str());
523
- }
524
-
525
- String GetCapturedString() {
526
- if (uncaptured_fd_ != -1) {
527
- // Restores the original stream.
528
- fflush(NULL);
529
- dup2(uncaptured_fd_, fd_);
530
- close(uncaptured_fd_);
531
- uncaptured_fd_ = -1;
532
- }
533
-
534
- FILE* const file = posix::FOpen(filename_.c_str(), "r");
535
- const String content = ReadEntireFile(file);
536
- posix::FClose(file);
537
- return content;
538
- }
539
-
540
- private:
541
- // Reads the entire content of a file as a String.
542
- static String ReadEntireFile(FILE* file);
543
-
544
- // Returns the size (in bytes) of a file.
545
- static size_t GetFileSize(FILE* file);
546
-
547
- const int fd_; // A stream to capture.
548
- int uncaptured_fd_;
549
- // Name of the temporary file holding the stderr output.
550
- ::std::string filename_;
551
-
552
- GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
553
- };
554
-
555
- // Returns the size (in bytes) of a file.
556
- size_t CapturedStream::GetFileSize(FILE* file) {
557
- fseek(file, 0, SEEK_END);
558
- return static_cast<size_t>(ftell(file));
559
- }
560
-
561
- // Reads the entire content of a file as a string.
562
- String CapturedStream::ReadEntireFile(FILE* file) {
563
- const size_t file_size = GetFileSize(file);
564
- char* const buffer = new char[file_size];
565
-
566
- size_t bytes_last_read = 0; // # of bytes read in the last fread()
567
- size_t bytes_read = 0; // # of bytes read so far
568
-
569
- fseek(file, 0, SEEK_SET);
570
-
571
- // Keeps reading the file until we cannot read further or the
572
- // pre-determined file size is reached.
573
- do {
574
- bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file);
575
- bytes_read += bytes_last_read;
576
- } while (bytes_last_read > 0 && bytes_read < file_size);
577
-
578
- const String content(buffer, bytes_read);
579
- delete[] buffer;
580
-
581
- return content;
582
- }
583
-
584
- # ifdef _MSC_VER
585
- # pragma warning(pop)
586
- # endif // _MSC_VER
587
-
588
- static CapturedStream* g_captured_stderr = NULL;
589
- static CapturedStream* g_captured_stdout = NULL;
590
-
591
- // Starts capturing an output stream (stdout/stderr).
592
- void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
593
- if (*stream != NULL) {
594
- GTEST_LOG_(FATAL) << "Only one " << stream_name
595
- << " capturer can exist at a time.";
596
- }
597
- *stream = new CapturedStream(fd);
598
- }
599
-
600
- // Stops capturing the output stream and returns the captured string.
601
- String GetCapturedStream(CapturedStream** captured_stream) {
602
- const String content = (*captured_stream)->GetCapturedString();
603
-
604
- delete *captured_stream;
605
- *captured_stream = NULL;
606
-
607
- return content;
608
- }
609
-
610
- // Starts capturing stdout.
611
- void CaptureStdout() {
612
- CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
613
- }
614
-
615
- // Starts capturing stderr.
616
- void CaptureStderr() {
617
- CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
618
- }
619
-
620
- // Stops capturing stdout and returns the captured string.
621
- String GetCapturedStdout() { return GetCapturedStream(&g_captured_stdout); }
622
-
623
- // Stops capturing stderr and returns the captured string.
624
- String GetCapturedStderr() { return GetCapturedStream(&g_captured_stderr); }
625
-
626
- #endif // GTEST_HAS_STREAM_REDIRECTION
627
-
628
- #if GTEST_HAS_DEATH_TEST
629
-
630
- // A copy of all command line arguments. Set by InitGoogleTest().
631
- ::std::vector<String> g_argvs;
632
-
633
- // Returns the command line as a vector of strings.
634
- const ::std::vector<String>& GetArgvs() { return g_argvs; }
635
-
636
- #endif // GTEST_HAS_DEATH_TEST
637
-
638
- #if GTEST_OS_WINDOWS_MOBILE
639
- namespace posix {
640
- void Abort() {
641
- DebugBreak();
642
- TerminateProcess(GetCurrentProcess(), 1);
643
- }
644
- } // namespace posix
645
- #endif // GTEST_OS_WINDOWS_MOBILE
646
-
647
- // Returns the name of the environment variable corresponding to the
648
- // given flag. For example, FlagToEnvVar("foo") will return
649
- // "GTEST_FOO" in the open-source version.
650
- static String FlagToEnvVar(const char* flag) {
651
- const String full_flag =
652
- (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();
653
-
654
- Message env_var;
655
- for (size_t i = 0; i != full_flag.length(); i++) {
656
- env_var << ToUpper(full_flag.c_str()[i]);
657
- }
658
-
659
- return env_var.GetString();
660
- }
661
-
662
- // Parses 'str' for a 32-bit signed integer. If successful, writes
663
- // the result to *value and returns true; otherwise leaves *value
664
- // unchanged and returns false.
665
- bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
666
- // Parses the environment variable as a decimal integer.
667
- char* end = NULL;
668
- const long long_value = strtol(str, &end, 10); // NOLINT
669
-
670
- // Has strtol() consumed all characters in the string?
671
- if (*end != '\0') {
672
- // No - an invalid character was encountered.
673
- Message msg;
674
- msg << "WARNING: " << src_text
675
- << " is expected to be a 32-bit integer, but actually"
676
- << " has value \"" << str << "\".\n";
677
- printf("%s", msg.GetString().c_str());
678
- fflush(stdout);
679
- return false;
680
- }
681
-
682
- // Is the parsed value in the range of an Int32?
683
- const Int32 result = static_cast<Int32>(long_value);
684
- if (long_value == LONG_MAX || long_value == LONG_MIN ||
685
- // The parsed value overflows as a long. (strtol() returns
686
- // LONG_MAX or LONG_MIN when the input overflows.)
687
- result != long_value
688
- // The parsed value overflows as an Int32.
689
- ) {
690
- Message msg;
691
- msg << "WARNING: " << src_text
692
- << " is expected to be a 32-bit integer, but actually"
693
- << " has value " << str << ", which overflows.\n";
694
- printf("%s", msg.GetString().c_str());
695
- fflush(stdout);
696
- return false;
697
- }
698
-
699
- *value = result;
700
- return true;
701
- }
702
-
703
- // Reads and returns the Boolean environment variable corresponding to
704
- // the given flag; if it's not set, returns default_value.
705
- //
706
- // The value is considered true iff it's not "0".
707
- bool BoolFromGTestEnv(const char* flag, bool default_value) {
708
- const String env_var = FlagToEnvVar(flag);
709
- const char* const string_value = posix::GetEnv(env_var.c_str());
710
- return string_value == NULL ?
711
- default_value : strcmp(string_value, "0") != 0;
712
- }
713
-
714
- // Reads and returns a 32-bit integer stored in the environment
715
- // variable corresponding to the given flag; if it isn't set or
716
- // doesn't represent a valid 32-bit integer, returns default_value.
717
- Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
718
- const String env_var = FlagToEnvVar(flag);
719
- const char* const string_value = posix::GetEnv(env_var.c_str());
720
- if (string_value == NULL) {
721
- // The environment variable is not set.
722
- return default_value;
723
- }
724
-
725
- Int32 result = default_value;
726
- if (!ParseInt32(Message() << "Environment variable " << env_var,
727
- string_value, &result)) {
728
- printf("The default value %s is used.\n",
729
- (Message() << default_value).GetString().c_str());
730
- fflush(stdout);
731
- return default_value;
732
- }
733
-
734
- return result;
735
- }
736
-
737
- // Reads and returns the string environment variable corresponding to
738
- // the given flag; if it's not set, returns default_value.
739
- const char* StringFromGTestEnv(const char* flag, const char* default_value) {
740
- const String env_var = FlagToEnvVar(flag);
741
- const char* const value = posix::GetEnv(env_var.c_str());
742
- return value == NULL ? default_value : value;
743
- }
744
-
745
- } // namespace internal
746
- } // namespace testing