rapidjson 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +84 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +110 -0
  5. data/Rakefile +20 -0
  6. data/ext/rapidjson/buffer.hh +66 -0
  7. data/ext/rapidjson/cext.cc +77 -0
  8. data/ext/rapidjson/cext.hh +20 -0
  9. data/ext/rapidjson/encoder.hh +150 -0
  10. data/ext/rapidjson/extconf.rb +19 -0
  11. data/ext/rapidjson/parser.hh +149 -0
  12. data/ext/rapidjson/rapidjson/include/rapidjson/allocators.h +692 -0
  13. data/ext/rapidjson/rapidjson/include/rapidjson/cursorstreamwrapper.h +78 -0
  14. data/ext/rapidjson/rapidjson/include/rapidjson/document.h +3027 -0
  15. data/ext/rapidjson/rapidjson/include/rapidjson/encodedstream.h +299 -0
  16. data/ext/rapidjson/rapidjson/include/rapidjson/encodings.h +716 -0
  17. data/ext/rapidjson/rapidjson/include/rapidjson/error/en.h +122 -0
  18. data/ext/rapidjson/rapidjson/include/rapidjson/error/error.h +216 -0
  19. data/ext/rapidjson/rapidjson/include/rapidjson/filereadstream.h +99 -0
  20. data/ext/rapidjson/rapidjson/include/rapidjson/filewritestream.h +104 -0
  21. data/ext/rapidjson/rapidjson/include/rapidjson/fwd.h +151 -0
  22. data/ext/rapidjson/rapidjson/include/rapidjson/internal/biginteger.h +297 -0
  23. data/ext/rapidjson/rapidjson/include/rapidjson/internal/clzll.h +71 -0
  24. data/ext/rapidjson/rapidjson/include/rapidjson/internal/diyfp.h +261 -0
  25. data/ext/rapidjson/rapidjson/include/rapidjson/internal/dtoa.h +249 -0
  26. data/ext/rapidjson/rapidjson/include/rapidjson/internal/ieee754.h +78 -0
  27. data/ext/rapidjson/rapidjson/include/rapidjson/internal/itoa.h +308 -0
  28. data/ext/rapidjson/rapidjson/include/rapidjson/internal/meta.h +186 -0
  29. data/ext/rapidjson/rapidjson/include/rapidjson/internal/pow10.h +55 -0
  30. data/ext/rapidjson/rapidjson/include/rapidjson/internal/regex.h +739 -0
  31. data/ext/rapidjson/rapidjson/include/rapidjson/internal/stack.h +232 -0
  32. data/ext/rapidjson/rapidjson/include/rapidjson/internal/strfunc.h +83 -0
  33. data/ext/rapidjson/rapidjson/include/rapidjson/internal/strtod.h +293 -0
  34. data/ext/rapidjson/rapidjson/include/rapidjson/internal/swap.h +46 -0
  35. data/ext/rapidjson/rapidjson/include/rapidjson/istreamwrapper.h +128 -0
  36. data/ext/rapidjson/rapidjson/include/rapidjson/memorybuffer.h +70 -0
  37. data/ext/rapidjson/rapidjson/include/rapidjson/memorystream.h +71 -0
  38. data/ext/rapidjson/rapidjson/include/rapidjson/msinttypes/inttypes.h +316 -0
  39. data/ext/rapidjson/rapidjson/include/rapidjson/msinttypes/stdint.h +300 -0
  40. data/ext/rapidjson/rapidjson/include/rapidjson/ostreamwrapper.h +81 -0
  41. data/ext/rapidjson/rapidjson/include/rapidjson/pointer.h +1482 -0
  42. data/ext/rapidjson/rapidjson/include/rapidjson/prettywriter.h +277 -0
  43. data/ext/rapidjson/rapidjson/include/rapidjson/rapidjson.h +741 -0
  44. data/ext/rapidjson/rapidjson/include/rapidjson/reader.h +2246 -0
  45. data/ext/rapidjson/rapidjson/include/rapidjson/schema.h +2795 -0
  46. data/ext/rapidjson/rapidjson/include/rapidjson/stream.h +223 -0
  47. data/ext/rapidjson/rapidjson/include/rapidjson/stringbuffer.h +121 -0
  48. data/ext/rapidjson/rapidjson/include/rapidjson/uri.h +481 -0
  49. data/ext/rapidjson/rapidjson/include/rapidjson/writer.h +710 -0
  50. data/lib/rapidjson/json_gem.rb +36 -0
  51. data/lib/rapidjson/version.rb +5 -0
  52. data/lib/rapidjson.rb +9 -0
  53. metadata +98 -0
@@ -0,0 +1,739 @@
1
+ // Tencent is pleased to support the open source community by making RapidJSON available.
2
+ //
3
+ // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
4
+ //
5
+ // Licensed under the MIT License (the "License"); you may not use this file except
6
+ // in compliance with the License. You may obtain a copy of the License at
7
+ //
8
+ // http://opensource.org/licenses/MIT
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software distributed
11
+ // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
+ // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
+ // specific language governing permissions and limitations under the License.
14
+
15
+ #ifndef RAPIDJSON_INTERNAL_REGEX_H_
16
+ #define RAPIDJSON_INTERNAL_REGEX_H_
17
+
18
+ #include "../allocators.h"
19
+ #include "../stream.h"
20
+ #include "stack.h"
21
+
22
+ #ifdef __clang__
23
+ RAPIDJSON_DIAG_PUSH
24
+ RAPIDJSON_DIAG_OFF(padded)
25
+ RAPIDJSON_DIAG_OFF(switch-enum)
26
+ #elif defined(_MSC_VER)
27
+ RAPIDJSON_DIAG_PUSH
28
+ RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
29
+ #endif
30
+
31
+ #ifdef __GNUC__
32
+ RAPIDJSON_DIAG_PUSH
33
+ RAPIDJSON_DIAG_OFF(effc++)
34
+ #endif
35
+
36
+ #ifndef RAPIDJSON_REGEX_VERBOSE
37
+ #define RAPIDJSON_REGEX_VERBOSE 0
38
+ #endif
39
+
40
+ RAPIDJSON_NAMESPACE_BEGIN
41
+ namespace internal {
42
+
43
+ ///////////////////////////////////////////////////////////////////////////////
44
+ // DecodedStream
45
+
46
+ template <typename SourceStream, typename Encoding>
47
+ class DecodedStream {
48
+ public:
49
+ DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
50
+ unsigned Peek() { return codepoint_; }
51
+ unsigned Take() {
52
+ unsigned c = codepoint_;
53
+ if (c) // No further decoding when '\0'
54
+ Decode();
55
+ return c;
56
+ }
57
+
58
+ private:
59
+ void Decode() {
60
+ if (!Encoding::Decode(ss_, &codepoint_))
61
+ codepoint_ = 0;
62
+ }
63
+
64
+ SourceStream& ss_;
65
+ unsigned codepoint_;
66
+ };
67
+
68
+ ///////////////////////////////////////////////////////////////////////////////
69
+ // GenericRegex
70
+
71
+ static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1
72
+ static const SizeType kRegexInvalidRange = ~SizeType(0);
73
+
74
+ template <typename Encoding, typename Allocator>
75
+ class GenericRegexSearch;
76
+
77
+ //! Regular expression engine with subset of ECMAscript grammar.
78
+ /*!
79
+ Supported regular expression syntax:
80
+ - \c ab Concatenation
81
+ - \c a|b Alternation
82
+ - \c a? Zero or one
83
+ - \c a* Zero or more
84
+ - \c a+ One or more
85
+ - \c a{3} Exactly 3 times
86
+ - \c a{3,} At least 3 times
87
+ - \c a{3,5} 3 to 5 times
88
+ - \c (ab) Grouping
89
+ - \c ^a At the beginning
90
+ - \c a$ At the end
91
+ - \c . Any character
92
+ - \c [abc] Character classes
93
+ - \c [a-c] Character class range
94
+ - \c [a-z0-9_] Character class combination
95
+ - \c [^abc] Negated character classes
96
+ - \c [^a-c] Negated character class range
97
+ - \c [\b] Backspace (U+0008)
98
+ - \c \\| \\\\ ... Escape characters
99
+ - \c \\f Form feed (U+000C)
100
+ - \c \\n Line feed (U+000A)
101
+ - \c \\r Carriage return (U+000D)
102
+ - \c \\t Tab (U+0009)
103
+ - \c \\v Vertical tab (U+000B)
104
+
105
+ \note This is a Thompson NFA engine, implemented with reference to
106
+ Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).",
107
+ https://swtch.com/~rsc/regexp/regexp1.html
108
+ */
109
+ template <typename Encoding, typename Allocator = CrtAllocator>
110
+ class GenericRegex {
111
+ public:
112
+ typedef Encoding EncodingType;
113
+ typedef typename Encoding::Ch Ch;
114
+ template <typename, typename> friend class GenericRegexSearch;
115
+
116
+ GenericRegex(const Ch* source, Allocator* allocator = 0) :
117
+ ownAllocator_(allocator ? 0 : RAPIDJSON_NEW(Allocator)()), allocator_(allocator ? allocator : ownAllocator_),
118
+ states_(allocator_, 256), ranges_(allocator_, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(),
119
+ anchorBegin_(), anchorEnd_()
120
+ {
121
+ GenericStringStream<Encoding> ss(source);
122
+ DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss);
123
+ Parse(ds);
124
+ }
125
+
126
+ ~GenericRegex()
127
+ {
128
+ RAPIDJSON_DELETE(ownAllocator_);
129
+ }
130
+
131
+ bool IsValid() const {
132
+ return root_ != kRegexInvalidState;
133
+ }
134
+
135
+ private:
136
+ enum Operator {
137
+ kZeroOrOne,
138
+ kZeroOrMore,
139
+ kOneOrMore,
140
+ kConcatenation,
141
+ kAlternation,
142
+ kLeftParenthesis
143
+ };
144
+
145
+ static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.'
146
+ static const unsigned kRangeCharacterClass = 0xFFFFFFFE;
147
+ static const unsigned kRangeNegationFlag = 0x80000000;
148
+
149
+ struct Range {
150
+ unsigned start; //
151
+ unsigned end;
152
+ SizeType next;
153
+ };
154
+
155
+ struct State {
156
+ SizeType out; //!< Equals to kInvalid for matching state
157
+ SizeType out1; //!< Equals to non-kInvalid for split
158
+ SizeType rangeStart;
159
+ unsigned codepoint;
160
+ };
161
+
162
+ struct Frag {
163
+ Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {}
164
+ SizeType start;
165
+ SizeType out; //!< link-list of all output states
166
+ SizeType minIndex;
167
+ };
168
+
169
+ State& GetState(SizeType index) {
170
+ RAPIDJSON_ASSERT(index < stateCount_);
171
+ return states_.template Bottom<State>()[index];
172
+ }
173
+
174
+ const State& GetState(SizeType index) const {
175
+ RAPIDJSON_ASSERT(index < stateCount_);
176
+ return states_.template Bottom<State>()[index];
177
+ }
178
+
179
+ Range& GetRange(SizeType index) {
180
+ RAPIDJSON_ASSERT(index < rangeCount_);
181
+ return ranges_.template Bottom<Range>()[index];
182
+ }
183
+
184
+ const Range& GetRange(SizeType index) const {
185
+ RAPIDJSON_ASSERT(index < rangeCount_);
186
+ return ranges_.template Bottom<Range>()[index];
187
+ }
188
+
189
+ template <typename InputStream>
190
+ void Parse(DecodedStream<InputStream, Encoding>& ds) {
191
+ Stack<Allocator> operandStack(allocator_, 256); // Frag
192
+ Stack<Allocator> operatorStack(allocator_, 256); // Operator
193
+ Stack<Allocator> atomCountStack(allocator_, 256); // unsigned (Atom per parenthesis)
194
+
195
+ *atomCountStack.template Push<unsigned>() = 0;
196
+
197
+ unsigned codepoint;
198
+ while (ds.Peek() != 0) {
199
+ switch (codepoint = ds.Take()) {
200
+ case '^':
201
+ anchorBegin_ = true;
202
+ break;
203
+
204
+ case '$':
205
+ anchorEnd_ = true;
206
+ break;
207
+
208
+ case '|':
209
+ while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
210
+ if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
211
+ return;
212
+ *operatorStack.template Push<Operator>() = kAlternation;
213
+ *atomCountStack.template Top<unsigned>() = 0;
214
+ break;
215
+
216
+ case '(':
217
+ *operatorStack.template Push<Operator>() = kLeftParenthesis;
218
+ *atomCountStack.template Push<unsigned>() = 0;
219
+ break;
220
+
221
+ case ')':
222
+ while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
223
+ if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
224
+ return;
225
+ if (operatorStack.Empty())
226
+ return;
227
+ operatorStack.template Pop<Operator>(1);
228
+ atomCountStack.template Pop<unsigned>(1);
229
+ ImplicitConcatenation(atomCountStack, operatorStack);
230
+ break;
231
+
232
+ case '?':
233
+ if (!Eval(operandStack, kZeroOrOne))
234
+ return;
235
+ break;
236
+
237
+ case '*':
238
+ if (!Eval(operandStack, kZeroOrMore))
239
+ return;
240
+ break;
241
+
242
+ case '+':
243
+ if (!Eval(operandStack, kOneOrMore))
244
+ return;
245
+ break;
246
+
247
+ case '{':
248
+ {
249
+ unsigned n, m;
250
+ if (!ParseUnsigned(ds, &n))
251
+ return;
252
+
253
+ if (ds.Peek() == ',') {
254
+ ds.Take();
255
+ if (ds.Peek() == '}')
256
+ m = kInfinityQuantifier;
257
+ else if (!ParseUnsigned(ds, &m) || m < n)
258
+ return;
259
+ }
260
+ else
261
+ m = n;
262
+
263
+ if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
264
+ return;
265
+ ds.Take();
266
+ }
267
+ break;
268
+
269
+ case '.':
270
+ PushOperand(operandStack, kAnyCharacterClass);
271
+ ImplicitConcatenation(atomCountStack, operatorStack);
272
+ break;
273
+
274
+ case '[':
275
+ {
276
+ SizeType range;
277
+ if (!ParseRange(ds, &range))
278
+ return;
279
+ SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
280
+ GetState(s).rangeStart = range;
281
+ *operandStack.template Push<Frag>() = Frag(s, s, s);
282
+ }
283
+ ImplicitConcatenation(atomCountStack, operatorStack);
284
+ break;
285
+
286
+ case '\\': // Escape character
287
+ if (!CharacterEscape(ds, &codepoint))
288
+ return; // Unsupported escape character
289
+ // fall through to default
290
+ RAPIDJSON_DELIBERATE_FALLTHROUGH;
291
+
292
+ default: // Pattern character
293
+ PushOperand(operandStack, codepoint);
294
+ ImplicitConcatenation(atomCountStack, operatorStack);
295
+ }
296
+ }
297
+
298
+ while (!operatorStack.Empty())
299
+ if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
300
+ return;
301
+
302
+ // Link the operand to matching state.
303
+ if (operandStack.GetSize() == sizeof(Frag)) {
304
+ Frag* e = operandStack.template Pop<Frag>(1);
305
+ Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
306
+ root_ = e->start;
307
+
308
+ #if RAPIDJSON_REGEX_VERBOSE
309
+ printf("root: %d\n", root_);
310
+ for (SizeType i = 0; i < stateCount_ ; i++) {
311
+ State& s = GetState(i);
312
+ printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
313
+ }
314
+ printf("\n");
315
+ #endif
316
+ }
317
+ }
318
+
319
+ SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
320
+ State* s = states_.template Push<State>();
321
+ s->out = out;
322
+ s->out1 = out1;
323
+ s->codepoint = codepoint;
324
+ s->rangeStart = kRegexInvalidRange;
325
+ return stateCount_++;
326
+ }
327
+
328
+ void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
329
+ SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
330
+ *operandStack.template Push<Frag>() = Frag(s, s, s);
331
+ }
332
+
333
+ void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
334
+ if (*atomCountStack.template Top<unsigned>())
335
+ *operatorStack.template Push<Operator>() = kConcatenation;
336
+ (*atomCountStack.template Top<unsigned>())++;
337
+ }
338
+
339
+ SizeType Append(SizeType l1, SizeType l2) {
340
+ SizeType old = l1;
341
+ while (GetState(l1).out != kRegexInvalidState)
342
+ l1 = GetState(l1).out;
343
+ GetState(l1).out = l2;
344
+ return old;
345
+ }
346
+
347
+ void Patch(SizeType l, SizeType s) {
348
+ for (SizeType next; l != kRegexInvalidState; l = next) {
349
+ next = GetState(l).out;
350
+ GetState(l).out = s;
351
+ }
352
+ }
353
+
354
+ bool Eval(Stack<Allocator>& operandStack, Operator op) {
355
+ switch (op) {
356
+ case kConcatenation:
357
+ RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
358
+ {
359
+ Frag e2 = *operandStack.template Pop<Frag>(1);
360
+ Frag e1 = *operandStack.template Pop<Frag>(1);
361
+ Patch(e1.out, e2.start);
362
+ *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
363
+ }
364
+ return true;
365
+
366
+ case kAlternation:
367
+ if (operandStack.GetSize() >= sizeof(Frag) * 2) {
368
+ Frag e2 = *operandStack.template Pop<Frag>(1);
369
+ Frag e1 = *operandStack.template Pop<Frag>(1);
370
+ SizeType s = NewState(e1.start, e2.start, 0);
371
+ *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
372
+ return true;
373
+ }
374
+ return false;
375
+
376
+ case kZeroOrOne:
377
+ if (operandStack.GetSize() >= sizeof(Frag)) {
378
+ Frag e = *operandStack.template Pop<Frag>(1);
379
+ SizeType s = NewState(kRegexInvalidState, e.start, 0);
380
+ *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
381
+ return true;
382
+ }
383
+ return false;
384
+
385
+ case kZeroOrMore:
386
+ if (operandStack.GetSize() >= sizeof(Frag)) {
387
+ Frag e = *operandStack.template Pop<Frag>(1);
388
+ SizeType s = NewState(kRegexInvalidState, e.start, 0);
389
+ Patch(e.out, s);
390
+ *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
391
+ return true;
392
+ }
393
+ return false;
394
+
395
+ case kOneOrMore:
396
+ if (operandStack.GetSize() >= sizeof(Frag)) {
397
+ Frag e = *operandStack.template Pop<Frag>(1);
398
+ SizeType s = NewState(kRegexInvalidState, e.start, 0);
399
+ Patch(e.out, s);
400
+ *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
401
+ return true;
402
+ }
403
+ return false;
404
+
405
+ default:
406
+ // syntax error (e.g. unclosed kLeftParenthesis)
407
+ return false;
408
+ }
409
+ }
410
+
411
+ bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
412
+ RAPIDJSON_ASSERT(n <= m);
413
+ RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag));
414
+
415
+ if (n == 0) {
416
+ if (m == 0) // a{0} not support
417
+ return false;
418
+ else if (m == kInfinityQuantifier)
419
+ Eval(operandStack, kZeroOrMore); // a{0,} -> a*
420
+ else {
421
+ Eval(operandStack, kZeroOrOne); // a{0,5} -> a?
422
+ for (unsigned i = 0; i < m - 1; i++)
423
+ CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a?
424
+ for (unsigned i = 0; i < m - 1; i++)
425
+ Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
426
+ }
427
+ return true;
428
+ }
429
+
430
+ for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
431
+ CloneTopOperand(operandStack);
432
+
433
+ if (m == kInfinityQuantifier)
434
+ Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
435
+ else if (m > n) {
436
+ CloneTopOperand(operandStack); // a{3,5} -> a a a a
437
+ Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a?
438
+ for (unsigned i = n; i < m - 1; i++)
439
+ CloneTopOperand(operandStack); // a{3,5} -> a a a a? a?
440
+ for (unsigned i = n; i < m; i++)
441
+ Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
442
+ }
443
+
444
+ for (unsigned i = 0; i < n - 1; i++)
445
+ Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
446
+
447
+ return true;
448
+ }
449
+
450
+ static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
451
+
452
+ void CloneTopOperand(Stack<Allocator>& operandStack) {
453
+ const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
454
+ SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
455
+ State* s = states_.template Push<State>(count);
456
+ memcpy(s, &GetState(src.minIndex), count * sizeof(State));
457
+ for (SizeType j = 0; j < count; j++) {
458
+ if (s[j].out != kRegexInvalidState)
459
+ s[j].out += count;
460
+ if (s[j].out1 != kRegexInvalidState)
461
+ s[j].out1 += count;
462
+ }
463
+ *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
464
+ stateCount_ += count;
465
+ }
466
+
467
+ template <typename InputStream>
468
+ bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u) {
469
+ unsigned r = 0;
470
+ if (ds.Peek() < '0' || ds.Peek() > '9')
471
+ return false;
472
+ while (ds.Peek() >= '0' && ds.Peek() <= '9') {
473
+ if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
474
+ return false; // overflow
475
+ r = r * 10 + (ds.Take() - '0');
476
+ }
477
+ *u = r;
478
+ return true;
479
+ }
480
+
481
+ template <typename InputStream>
482
+ bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range) {
483
+ bool isBegin = true;
484
+ bool negate = false;
485
+ int step = 0;
486
+ SizeType start = kRegexInvalidRange;
487
+ SizeType current = kRegexInvalidRange;
488
+ unsigned codepoint;
489
+ while ((codepoint = ds.Take()) != 0) {
490
+ if (isBegin) {
491
+ isBegin = false;
492
+ if (codepoint == '^') {
493
+ negate = true;
494
+ continue;
495
+ }
496
+ }
497
+
498
+ switch (codepoint) {
499
+ case ']':
500
+ if (start == kRegexInvalidRange)
501
+ return false; // Error: nothing inside []
502
+ if (step == 2) { // Add trailing '-'
503
+ SizeType r = NewRange('-');
504
+ RAPIDJSON_ASSERT(current != kRegexInvalidRange);
505
+ GetRange(current).next = r;
506
+ }
507
+ if (negate)
508
+ GetRange(start).start |= kRangeNegationFlag;
509
+ *range = start;
510
+ return true;
511
+
512
+ case '\\':
513
+ if (ds.Peek() == 'b') {
514
+ ds.Take();
515
+ codepoint = 0x0008; // Escape backspace character
516
+ }
517
+ else if (!CharacterEscape(ds, &codepoint))
518
+ return false;
519
+ // fall through to default
520
+ RAPIDJSON_DELIBERATE_FALLTHROUGH;
521
+
522
+ default:
523
+ switch (step) {
524
+ case 1:
525
+ if (codepoint == '-') {
526
+ step++;
527
+ break;
528
+ }
529
+ // fall through to step 0 for other characters
530
+ RAPIDJSON_DELIBERATE_FALLTHROUGH;
531
+
532
+ case 0:
533
+ {
534
+ SizeType r = NewRange(codepoint);
535
+ if (current != kRegexInvalidRange)
536
+ GetRange(current).next = r;
537
+ if (start == kRegexInvalidRange)
538
+ start = r;
539
+ current = r;
540
+ }
541
+ step = 1;
542
+ break;
543
+
544
+ default:
545
+ RAPIDJSON_ASSERT(step == 2);
546
+ GetRange(current).end = codepoint;
547
+ step = 0;
548
+ }
549
+ }
550
+ }
551
+ return false;
552
+ }
553
+
554
+ SizeType NewRange(unsigned codepoint) {
555
+ Range* r = ranges_.template Push<Range>();
556
+ r->start = r->end = codepoint;
557
+ r->next = kRegexInvalidRange;
558
+ return rangeCount_++;
559
+ }
560
+
561
+ template <typename InputStream>
562
+ bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) {
563
+ unsigned codepoint;
564
+ switch (codepoint = ds.Take()) {
565
+ case '^':
566
+ case '$':
567
+ case '|':
568
+ case '(':
569
+ case ')':
570
+ case '?':
571
+ case '*':
572
+ case '+':
573
+ case '.':
574
+ case '[':
575
+ case ']':
576
+ case '{':
577
+ case '}':
578
+ case '\\':
579
+ *escapedCodepoint = codepoint; return true;
580
+ case 'f': *escapedCodepoint = 0x000C; return true;
581
+ case 'n': *escapedCodepoint = 0x000A; return true;
582
+ case 'r': *escapedCodepoint = 0x000D; return true;
583
+ case 't': *escapedCodepoint = 0x0009; return true;
584
+ case 'v': *escapedCodepoint = 0x000B; return true;
585
+ default:
586
+ return false; // Unsupported escape character
587
+ }
588
+ }
589
+
590
+ Allocator* ownAllocator_;
591
+ Allocator* allocator_;
592
+ Stack<Allocator> states_;
593
+ Stack<Allocator> ranges_;
594
+ SizeType root_;
595
+ SizeType stateCount_;
596
+ SizeType rangeCount_;
597
+
598
+ static const unsigned kInfinityQuantifier = ~0u;
599
+
600
+ // For SearchWithAnchoring()
601
+ bool anchorBegin_;
602
+ bool anchorEnd_;
603
+ };
604
+
605
+ template <typename RegexType, typename Allocator = CrtAllocator>
606
+ class GenericRegexSearch {
607
+ public:
608
+ typedef typename RegexType::EncodingType Encoding;
609
+ typedef typename Encoding::Ch Ch;
610
+
611
+ GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) :
612
+ regex_(regex), allocator_(allocator), ownAllocator_(0),
613
+ state0_(allocator, 0), state1_(allocator, 0), stateSet_()
614
+ {
615
+ RAPIDJSON_ASSERT(regex_.IsValid());
616
+ if (!allocator_)
617
+ ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
618
+ stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize()));
619
+ state0_.template Reserve<SizeType>(regex_.stateCount_);
620
+ state1_.template Reserve<SizeType>(regex_.stateCount_);
621
+ }
622
+
623
+ ~GenericRegexSearch() {
624
+ Allocator::Free(stateSet_);
625
+ RAPIDJSON_DELETE(ownAllocator_);
626
+ }
627
+
628
+ template <typename InputStream>
629
+ bool Match(InputStream& is) {
630
+ return SearchWithAnchoring(is, true, true);
631
+ }
632
+
633
+ bool Match(const Ch* s) {
634
+ GenericStringStream<Encoding> is(s);
635
+ return Match(is);
636
+ }
637
+
638
+ template <typename InputStream>
639
+ bool Search(InputStream& is) {
640
+ return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_);
641
+ }
642
+
643
+ bool Search(const Ch* s) {
644
+ GenericStringStream<Encoding> is(s);
645
+ return Search(is);
646
+ }
647
+
648
+ private:
649
+ typedef typename RegexType::State State;
650
+ typedef typename RegexType::Range Range;
651
+
652
+ template <typename InputStream>
653
+ bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) {
654
+ DecodedStream<InputStream, Encoding> ds(is);
655
+
656
+ state0_.Clear();
657
+ Stack<Allocator> *current = &state0_, *next = &state1_;
658
+ const size_t stateSetSize = GetStateSetSize();
659
+ std::memset(stateSet_, 0, stateSetSize);
660
+
661
+ bool matched = AddState(*current, regex_.root_);
662
+ unsigned codepoint;
663
+ while (!current->Empty() && (codepoint = ds.Take()) != 0) {
664
+ std::memset(stateSet_, 0, stateSetSize);
665
+ next->Clear();
666
+ matched = false;
667
+ for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
668
+ const State& sr = regex_.GetState(*s);
669
+ if (sr.codepoint == codepoint ||
670
+ sr.codepoint == RegexType::kAnyCharacterClass ||
671
+ (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
672
+ {
673
+ matched = AddState(*next, sr.out) || matched;
674
+ if (!anchorEnd && matched)
675
+ return true;
676
+ }
677
+ if (!anchorBegin)
678
+ AddState(*next, regex_.root_);
679
+ }
680
+ internal::Swap(current, next);
681
+ }
682
+
683
+ return matched;
684
+ }
685
+
686
+ size_t GetStateSetSize() const {
687
+ return (regex_.stateCount_ + 31) / 32 * 4;
688
+ }
689
+
690
+ // Return whether the added states is a match state
691
+ bool AddState(Stack<Allocator>& l, SizeType index) {
692
+ RAPIDJSON_ASSERT(index != kRegexInvalidState);
693
+
694
+ const State& s = regex_.GetState(index);
695
+ if (s.out1 != kRegexInvalidState) { // Split
696
+ bool matched = AddState(l, s.out);
697
+ return AddState(l, s.out1) || matched;
698
+ }
699
+ else if (!(stateSet_[index >> 5] & (1u << (index & 31)))) {
700
+ stateSet_[index >> 5] |= (1u << (index & 31));
701
+ *l.template PushUnsafe<SizeType>() = index;
702
+ }
703
+ return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
704
+ }
705
+
706
+ bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
707
+ bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0;
708
+ while (rangeIndex != kRegexInvalidRange) {
709
+ const Range& r = regex_.GetRange(rangeIndex);
710
+ if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
711
+ return yes;
712
+ rangeIndex = r.next;
713
+ }
714
+ return !yes;
715
+ }
716
+
717
+ const RegexType& regex_;
718
+ Allocator* allocator_;
719
+ Allocator* ownAllocator_;
720
+ Stack<Allocator> state0_;
721
+ Stack<Allocator> state1_;
722
+ uint32_t* stateSet_;
723
+ };
724
+
725
+ typedef GenericRegex<UTF8<> > Regex;
726
+ typedef GenericRegexSearch<Regex> RegexSearch;
727
+
728
+ } // namespace internal
729
+ RAPIDJSON_NAMESPACE_END
730
+
731
+ #ifdef __GNUC__
732
+ RAPIDJSON_DIAG_POP
733
+ #endif
734
+
735
+ #if defined(__clang__) || defined(_MSC_VER)
736
+ RAPIDJSON_DIAG_POP
737
+ #endif
738
+
739
+ #endif // RAPIDJSON_INTERNAL_REGEX_H_