sq_detailed_metrics 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/extconf.rb +26 -0
- data/include/half.hpp +4575 -0
- data/include/msgpack.h +24 -0
- data/include/msgpack/fbuffer.h +42 -0
- data/include/msgpack/gcc_atomic.h +25 -0
- data/include/msgpack/object.h +118 -0
- data/include/msgpack/pack.h +174 -0
- data/include/msgpack/pack_define.h +18 -0
- data/include/msgpack/pack_template.h +952 -0
- data/include/msgpack/sbuffer.h +115 -0
- data/include/msgpack/sysdep.h +221 -0
- data/include/msgpack/timestamp.h +58 -0
- data/include/msgpack/unpack.h +281 -0
- data/include/msgpack/unpack_define.h +89 -0
- data/include/msgpack/unpack_template.h +471 -0
- data/include/msgpack/util.h +15 -0
- data/include/msgpack/version.h +38 -0
- data/include/msgpack/version_master.h +3 -0
- data/include/msgpack/vrefbuffer.h +144 -0
- data/include/msgpack/zbuffer.h +205 -0
- data/include/msgpack/zone.h +163 -0
- data/include/rapidjson/allocators.h +271 -0
- data/include/rapidjson/document.h +2575 -0
- data/include/rapidjson/encodedstream.h +299 -0
- data/include/rapidjson/encodings.h +716 -0
- data/include/rapidjson/error/en.h +74 -0
- data/include/rapidjson/error/error.h +155 -0
- data/include/rapidjson/filereadstream.h +99 -0
- data/include/rapidjson/filewritestream.h +104 -0
- data/include/rapidjson/fwd.h +151 -0
- data/include/rapidjson/internal/biginteger.h +290 -0
- data/include/rapidjson/internal/diyfp.h +258 -0
- data/include/rapidjson/internal/dtoa.h +245 -0
- data/include/rapidjson/internal/ieee754.h +78 -0
- data/include/rapidjson/internal/itoa.h +304 -0
- data/include/rapidjson/internal/meta.h +181 -0
- data/include/rapidjson/internal/pow10.h +55 -0
- data/include/rapidjson/internal/regex.h +701 -0
- data/include/rapidjson/internal/stack.h +230 -0
- data/include/rapidjson/internal/strfunc.h +55 -0
- data/include/rapidjson/internal/strtod.h +269 -0
- data/include/rapidjson/internal/swap.h +46 -0
- data/include/rapidjson/istreamwrapper.h +115 -0
- data/include/rapidjson/memorybuffer.h +70 -0
- data/include/rapidjson/memorystream.h +71 -0
- data/include/rapidjson/msinttypes/inttypes.h +316 -0
- data/include/rapidjson/msinttypes/stdint.h +300 -0
- data/include/rapidjson/ostreamwrapper.h +81 -0
- data/include/rapidjson/pointer.h +1358 -0
- data/include/rapidjson/prettywriter.h +255 -0
- data/include/rapidjson/rapidjson.h +615 -0
- data/include/rapidjson/reader.h +1879 -0
- data/include/rapidjson/schema.h +2006 -0
- data/include/rapidjson/stream.h +179 -0
- data/include/rapidjson/stringbuffer.h +117 -0
- data/include/rapidjson/writer.h +610 -0
- data/include/xxhash.h +328 -0
- data/json_conv.cpp +284 -0
- data/json_conv.hpp +17 -0
- data/metrics.cpp +239 -0
- data/metrics.hpp +84 -0
- data/msgpack/objectc.c +482 -0
- data/msgpack/unpack.c +703 -0
- data/msgpack/version.c +22 -0
- data/msgpack/vrefbuffer.c +250 -0
- data/msgpack/zone.c +222 -0
- data/sq_detailed_metrics.cpp +248 -0
- metadata +199 -0
@@ -0,0 +1,181 @@
|
|
1
|
+
// Tencent is pleased to support the open source community by making RapidJSON available.
|
2
|
+
//
|
3
|
+
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
4
|
+
//
|
5
|
+
// Licensed under the MIT License (the "License"); you may not use this file except
|
6
|
+
// in compliance with the License. You may obtain a copy of the License at
|
7
|
+
//
|
8
|
+
// http://opensource.org/licenses/MIT
|
9
|
+
//
|
10
|
+
// Unless required by applicable law or agreed to in writing, software distributed
|
11
|
+
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
12
|
+
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
13
|
+
// specific language governing permissions and limitations under the License.
|
14
|
+
|
15
|
+
#ifndef RAPIDJSON_INTERNAL_META_H_
|
16
|
+
#define RAPIDJSON_INTERNAL_META_H_
|
17
|
+
|
18
|
+
#include "../rapidjson.h"
|
19
|
+
|
20
|
+
#ifdef __GNUC__
|
21
|
+
RAPIDJSON_DIAG_PUSH
|
22
|
+
RAPIDJSON_DIAG_OFF(effc++)
|
23
|
+
#endif
|
24
|
+
#if defined(_MSC_VER)
|
25
|
+
RAPIDJSON_DIAG_PUSH
|
26
|
+
RAPIDJSON_DIAG_OFF(6334)
|
27
|
+
#endif
|
28
|
+
|
29
|
+
#if RAPIDJSON_HAS_CXX11_TYPETRAITS
|
30
|
+
#include <type_traits>
|
31
|
+
#endif
|
32
|
+
|
33
|
+
//@cond RAPIDJSON_INTERNAL
|
34
|
+
RAPIDJSON_NAMESPACE_BEGIN
|
35
|
+
namespace internal {
|
36
|
+
|
37
|
+
// Helper to wrap/convert arbitrary types to void, useful for arbitrary type matching
|
38
|
+
template <typename T> struct Void { typedef void Type; };
|
39
|
+
|
40
|
+
///////////////////////////////////////////////////////////////////////////////
|
41
|
+
// BoolType, TrueType, FalseType
|
42
|
+
//
|
43
|
+
template <bool Cond> struct BoolType {
|
44
|
+
static const bool Value = Cond;
|
45
|
+
typedef BoolType Type;
|
46
|
+
};
|
47
|
+
typedef BoolType<true> TrueType;
|
48
|
+
typedef BoolType<false> FalseType;
|
49
|
+
|
50
|
+
|
51
|
+
///////////////////////////////////////////////////////////////////////////////
|
52
|
+
// SelectIf, BoolExpr, NotExpr, AndExpr, OrExpr
|
53
|
+
//
|
54
|
+
|
55
|
+
template <bool C> struct SelectIfImpl { template <typename T1, typename T2> struct Apply { typedef T1 Type; }; };
|
56
|
+
template <> struct SelectIfImpl<false> { template <typename T1, typename T2> struct Apply { typedef T2 Type; }; };
|
57
|
+
template <bool C, typename T1, typename T2> struct SelectIfCond : SelectIfImpl<C>::template Apply<T1,T2> {};
|
58
|
+
template <typename C, typename T1, typename T2> struct SelectIf : SelectIfCond<C::Value, T1, T2> {};
|
59
|
+
|
60
|
+
template <bool Cond1, bool Cond2> struct AndExprCond : FalseType {};
|
61
|
+
template <> struct AndExprCond<true, true> : TrueType {};
|
62
|
+
template <bool Cond1, bool Cond2> struct OrExprCond : TrueType {};
|
63
|
+
template <> struct OrExprCond<false, false> : FalseType {};
|
64
|
+
|
65
|
+
template <typename C> struct BoolExpr : SelectIf<C,TrueType,FalseType>::Type {};
|
66
|
+
template <typename C> struct NotExpr : SelectIf<C,FalseType,TrueType>::Type {};
|
67
|
+
template <typename C1, typename C2> struct AndExpr : AndExprCond<C1::Value, C2::Value>::Type {};
|
68
|
+
template <typename C1, typename C2> struct OrExpr : OrExprCond<C1::Value, C2::Value>::Type {};
|
69
|
+
|
70
|
+
|
71
|
+
///////////////////////////////////////////////////////////////////////////////
|
72
|
+
// AddConst, MaybeAddConst, RemoveConst
|
73
|
+
template <typename T> struct AddConst { typedef const T Type; };
|
74
|
+
template <bool Constify, typename T> struct MaybeAddConst : SelectIfCond<Constify, const T, T> {};
|
75
|
+
template <typename T> struct RemoveConst { typedef T Type; };
|
76
|
+
template <typename T> struct RemoveConst<const T> { typedef T Type; };
|
77
|
+
|
78
|
+
|
79
|
+
///////////////////////////////////////////////////////////////////////////////
|
80
|
+
// IsSame, IsConst, IsMoreConst, IsPointer
|
81
|
+
//
|
82
|
+
template <typename T, typename U> struct IsSame : FalseType {};
|
83
|
+
template <typename T> struct IsSame<T, T> : TrueType {};
|
84
|
+
|
85
|
+
template <typename T> struct IsConst : FalseType {};
|
86
|
+
template <typename T> struct IsConst<const T> : TrueType {};
|
87
|
+
|
88
|
+
template <typename CT, typename T>
|
89
|
+
struct IsMoreConst
|
90
|
+
: AndExpr<IsSame<typename RemoveConst<CT>::Type, typename RemoveConst<T>::Type>,
|
91
|
+
BoolType<IsConst<CT>::Value >= IsConst<T>::Value> >::Type {};
|
92
|
+
|
93
|
+
template <typename T> struct IsPointer : FalseType {};
|
94
|
+
template <typename T> struct IsPointer<T*> : TrueType {};
|
95
|
+
|
96
|
+
///////////////////////////////////////////////////////////////////////////////
|
97
|
+
// IsBaseOf
|
98
|
+
//
|
99
|
+
#if RAPIDJSON_HAS_CXX11_TYPETRAITS
|
100
|
+
|
101
|
+
template <typename B, typename D> struct IsBaseOf
|
102
|
+
: BoolType< ::std::is_base_of<B,D>::value> {};
|
103
|
+
|
104
|
+
#else // simplified version adopted from Boost
|
105
|
+
|
106
|
+
template<typename B, typename D> struct IsBaseOfImpl {
|
107
|
+
RAPIDJSON_STATIC_ASSERT(sizeof(B) != 0);
|
108
|
+
RAPIDJSON_STATIC_ASSERT(sizeof(D) != 0);
|
109
|
+
|
110
|
+
typedef char (&Yes)[1];
|
111
|
+
typedef char (&No) [2];
|
112
|
+
|
113
|
+
template <typename T>
|
114
|
+
static Yes Check(const D*, T);
|
115
|
+
static No Check(const B*, int);
|
116
|
+
|
117
|
+
struct Host {
|
118
|
+
operator const B*() const;
|
119
|
+
operator const D*();
|
120
|
+
};
|
121
|
+
|
122
|
+
enum { Value = (sizeof(Check(Host(), 0)) == sizeof(Yes)) };
|
123
|
+
};
|
124
|
+
|
125
|
+
template <typename B, typename D> struct IsBaseOf
|
126
|
+
: OrExpr<IsSame<B, D>, BoolExpr<IsBaseOfImpl<B, D> > >::Type {};
|
127
|
+
|
128
|
+
#endif // RAPIDJSON_HAS_CXX11_TYPETRAITS
|
129
|
+
|
130
|
+
|
131
|
+
//////////////////////////////////////////////////////////////////////////
|
132
|
+
// EnableIf / DisableIf
|
133
|
+
//
|
134
|
+
template <bool Condition, typename T = void> struct EnableIfCond { typedef T Type; };
|
135
|
+
template <typename T> struct EnableIfCond<false, T> { /* empty */ };
|
136
|
+
|
137
|
+
template <bool Condition, typename T = void> struct DisableIfCond { typedef T Type; };
|
138
|
+
template <typename T> struct DisableIfCond<true, T> { /* empty */ };
|
139
|
+
|
140
|
+
template <typename Condition, typename T = void>
|
141
|
+
struct EnableIf : EnableIfCond<Condition::Value, T> {};
|
142
|
+
|
143
|
+
template <typename Condition, typename T = void>
|
144
|
+
struct DisableIf : DisableIfCond<Condition::Value, T> {};
|
145
|
+
|
146
|
+
// SFINAE helpers
|
147
|
+
struct SfinaeTag {};
|
148
|
+
template <typename T> struct RemoveSfinaeTag;
|
149
|
+
template <typename T> struct RemoveSfinaeTag<SfinaeTag&(*)(T)> { typedef T Type; };
|
150
|
+
|
151
|
+
#define RAPIDJSON_REMOVEFPTR_(type) \
|
152
|
+
typename ::RAPIDJSON_NAMESPACE::internal::RemoveSfinaeTag \
|
153
|
+
< ::RAPIDJSON_NAMESPACE::internal::SfinaeTag&(*) type>::Type
|
154
|
+
|
155
|
+
#define RAPIDJSON_ENABLEIF(cond) \
|
156
|
+
typename ::RAPIDJSON_NAMESPACE::internal::EnableIf \
|
157
|
+
<RAPIDJSON_REMOVEFPTR_(cond)>::Type * = NULL
|
158
|
+
|
159
|
+
#define RAPIDJSON_DISABLEIF(cond) \
|
160
|
+
typename ::RAPIDJSON_NAMESPACE::internal::DisableIf \
|
161
|
+
<RAPIDJSON_REMOVEFPTR_(cond)>::Type * = NULL
|
162
|
+
|
163
|
+
#define RAPIDJSON_ENABLEIF_RETURN(cond,returntype) \
|
164
|
+
typename ::RAPIDJSON_NAMESPACE::internal::EnableIf \
|
165
|
+
<RAPIDJSON_REMOVEFPTR_(cond), \
|
166
|
+
RAPIDJSON_REMOVEFPTR_(returntype)>::Type
|
167
|
+
|
168
|
+
#define RAPIDJSON_DISABLEIF_RETURN(cond,returntype) \
|
169
|
+
typename ::RAPIDJSON_NAMESPACE::internal::DisableIf \
|
170
|
+
<RAPIDJSON_REMOVEFPTR_(cond), \
|
171
|
+
RAPIDJSON_REMOVEFPTR_(returntype)>::Type
|
172
|
+
|
173
|
+
} // namespace internal
|
174
|
+
RAPIDJSON_NAMESPACE_END
|
175
|
+
//@endcond
|
176
|
+
|
177
|
+
#if defined(__GNUC__) || defined(_MSC_VER)
|
178
|
+
RAPIDJSON_DIAG_POP
|
179
|
+
#endif
|
180
|
+
|
181
|
+
#endif // RAPIDJSON_INTERNAL_META_H_
|
@@ -0,0 +1,55 @@
|
|
1
|
+
// Tencent is pleased to support the open source community by making RapidJSON available.
|
2
|
+
//
|
3
|
+
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
4
|
+
//
|
5
|
+
// Licensed under the MIT License (the "License"); you may not use this file except
|
6
|
+
// in compliance with the License. You may obtain a copy of the License at
|
7
|
+
//
|
8
|
+
// http://opensource.org/licenses/MIT
|
9
|
+
//
|
10
|
+
// Unless required by applicable law or agreed to in writing, software distributed
|
11
|
+
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
12
|
+
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
13
|
+
// specific language governing permissions and limitations under the License.
|
14
|
+
|
15
|
+
#ifndef RAPIDJSON_POW10_
|
16
|
+
#define RAPIDJSON_POW10_
|
17
|
+
|
18
|
+
#include "../rapidjson.h"
|
19
|
+
|
20
|
+
RAPIDJSON_NAMESPACE_BEGIN
|
21
|
+
namespace internal {
|
22
|
+
|
23
|
+
//! Computes integer powers of 10 in double (10.0^n).
|
24
|
+
/*! This function uses lookup table for fast and accurate results.
|
25
|
+
\param n non-negative exponent. Must <= 308.
|
26
|
+
\return 10.0^n
|
27
|
+
*/
|
28
|
+
inline double Pow10(int n) {
|
29
|
+
static const double e[] = { // 1e-0...1e308: 309 * 8 bytes = 2472 bytes
|
30
|
+
1e+0,
|
31
|
+
1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20,
|
32
|
+
1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40,
|
33
|
+
1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60,
|
34
|
+
1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
|
35
|
+
1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100,
|
36
|
+
1e+101,1e+102,1e+103,1e+104,1e+105,1e+106,1e+107,1e+108,1e+109,1e+110,1e+111,1e+112,1e+113,1e+114,1e+115,1e+116,1e+117,1e+118,1e+119,1e+120,
|
37
|
+
1e+121,1e+122,1e+123,1e+124,1e+125,1e+126,1e+127,1e+128,1e+129,1e+130,1e+131,1e+132,1e+133,1e+134,1e+135,1e+136,1e+137,1e+138,1e+139,1e+140,
|
38
|
+
1e+141,1e+142,1e+143,1e+144,1e+145,1e+146,1e+147,1e+148,1e+149,1e+150,1e+151,1e+152,1e+153,1e+154,1e+155,1e+156,1e+157,1e+158,1e+159,1e+160,
|
39
|
+
1e+161,1e+162,1e+163,1e+164,1e+165,1e+166,1e+167,1e+168,1e+169,1e+170,1e+171,1e+172,1e+173,1e+174,1e+175,1e+176,1e+177,1e+178,1e+179,1e+180,
|
40
|
+
1e+181,1e+182,1e+183,1e+184,1e+185,1e+186,1e+187,1e+188,1e+189,1e+190,1e+191,1e+192,1e+193,1e+194,1e+195,1e+196,1e+197,1e+198,1e+199,1e+200,
|
41
|
+
1e+201,1e+202,1e+203,1e+204,1e+205,1e+206,1e+207,1e+208,1e+209,1e+210,1e+211,1e+212,1e+213,1e+214,1e+215,1e+216,1e+217,1e+218,1e+219,1e+220,
|
42
|
+
1e+221,1e+222,1e+223,1e+224,1e+225,1e+226,1e+227,1e+228,1e+229,1e+230,1e+231,1e+232,1e+233,1e+234,1e+235,1e+236,1e+237,1e+238,1e+239,1e+240,
|
43
|
+
1e+241,1e+242,1e+243,1e+244,1e+245,1e+246,1e+247,1e+248,1e+249,1e+250,1e+251,1e+252,1e+253,1e+254,1e+255,1e+256,1e+257,1e+258,1e+259,1e+260,
|
44
|
+
1e+261,1e+262,1e+263,1e+264,1e+265,1e+266,1e+267,1e+268,1e+269,1e+270,1e+271,1e+272,1e+273,1e+274,1e+275,1e+276,1e+277,1e+278,1e+279,1e+280,
|
45
|
+
1e+281,1e+282,1e+283,1e+284,1e+285,1e+286,1e+287,1e+288,1e+289,1e+290,1e+291,1e+292,1e+293,1e+294,1e+295,1e+296,1e+297,1e+298,1e+299,1e+300,
|
46
|
+
1e+301,1e+302,1e+303,1e+304,1e+305,1e+306,1e+307,1e+308
|
47
|
+
};
|
48
|
+
RAPIDJSON_ASSERT(n >= 0 && n <= 308);
|
49
|
+
return e[n];
|
50
|
+
}
|
51
|
+
|
52
|
+
} // namespace internal
|
53
|
+
RAPIDJSON_NAMESPACE_END
|
54
|
+
|
55
|
+
#endif // RAPIDJSON_POW10_
|
@@ -0,0 +1,701 @@
|
|
1
|
+
// Tencent is pleased to support the open source community by making RapidJSON available.
|
2
|
+
//
|
3
|
+
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
4
|
+
//
|
5
|
+
// Licensed under the MIT License (the "License"); you may not use this file except
|
6
|
+
// in compliance with the License. You may obtain a copy of the License at
|
7
|
+
//
|
8
|
+
// http://opensource.org/licenses/MIT
|
9
|
+
//
|
10
|
+
// Unless required by applicable law or agreed to in writing, software distributed
|
11
|
+
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
12
|
+
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
13
|
+
// specific language governing permissions and limitations under the License.
|
14
|
+
|
15
|
+
#ifndef RAPIDJSON_INTERNAL_REGEX_H_
|
16
|
+
#define RAPIDJSON_INTERNAL_REGEX_H_
|
17
|
+
|
18
|
+
#include "../allocators.h"
|
19
|
+
#include "../stream.h"
|
20
|
+
#include "stack.h"
|
21
|
+
|
22
|
+
#ifdef __clang__
|
23
|
+
RAPIDJSON_DIAG_PUSH
|
24
|
+
RAPIDJSON_DIAG_OFF(padded)
|
25
|
+
RAPIDJSON_DIAG_OFF(switch-enum)
|
26
|
+
RAPIDJSON_DIAG_OFF(implicit-fallthrough)
|
27
|
+
#endif
|
28
|
+
|
29
|
+
#ifdef __GNUC__
|
30
|
+
RAPIDJSON_DIAG_PUSH
|
31
|
+
RAPIDJSON_DIAG_OFF(effc++)
|
32
|
+
#endif
|
33
|
+
|
34
|
+
#ifdef _MSC_VER
|
35
|
+
RAPIDJSON_DIAG_PUSH
|
36
|
+
RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
|
37
|
+
#endif
|
38
|
+
|
39
|
+
#ifndef RAPIDJSON_REGEX_VERBOSE
|
40
|
+
#define RAPIDJSON_REGEX_VERBOSE 0
|
41
|
+
#endif
|
42
|
+
|
43
|
+
RAPIDJSON_NAMESPACE_BEGIN
|
44
|
+
namespace internal {
|
45
|
+
|
46
|
+
///////////////////////////////////////////////////////////////////////////////
|
47
|
+
// GenericRegex
|
48
|
+
|
49
|
+
static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1
|
50
|
+
static const SizeType kRegexInvalidRange = ~SizeType(0);
|
51
|
+
|
52
|
+
//! Regular expression engine with subset of ECMAscript grammar.
|
53
|
+
/*!
|
54
|
+
Supported regular expression syntax:
|
55
|
+
- \c ab Concatenation
|
56
|
+
- \c a|b Alternation
|
57
|
+
- \c a? Zero or one
|
58
|
+
- \c a* Zero or more
|
59
|
+
- \c a+ One or more
|
60
|
+
- \c a{3} Exactly 3 times
|
61
|
+
- \c a{3,} At least 3 times
|
62
|
+
- \c a{3,5} 3 to 5 times
|
63
|
+
- \c (ab) Grouping
|
64
|
+
- \c ^a At the beginning
|
65
|
+
- \c a$ At the end
|
66
|
+
- \c . Any character
|
67
|
+
- \c [abc] Character classes
|
68
|
+
- \c [a-c] Character class range
|
69
|
+
- \c [a-z0-9_] Character class combination
|
70
|
+
- \c [^abc] Negated character classes
|
71
|
+
- \c [^a-c] Negated character class range
|
72
|
+
- \c [\b] Backspace (U+0008)
|
73
|
+
- \c \\| \\\\ ... Escape characters
|
74
|
+
- \c \\f Form feed (U+000C)
|
75
|
+
- \c \\n Line feed (U+000A)
|
76
|
+
- \c \\r Carriage return (U+000D)
|
77
|
+
- \c \\t Tab (U+0009)
|
78
|
+
- \c \\v Vertical tab (U+000B)
|
79
|
+
|
80
|
+
\note This is a Thompson NFA engine, implemented with reference to
|
81
|
+
Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).",
|
82
|
+
https://swtch.com/~rsc/regexp/regexp1.html
|
83
|
+
*/
|
84
|
+
template <typename Encoding, typename Allocator = CrtAllocator>
|
85
|
+
class GenericRegex {
|
86
|
+
public:
|
87
|
+
typedef typename Encoding::Ch Ch;
|
88
|
+
|
89
|
+
GenericRegex(const Ch* source, Allocator* allocator = 0) :
|
90
|
+
states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(),
|
91
|
+
stateSet_(), state0_(allocator, 0), state1_(allocator, 0), anchorBegin_(), anchorEnd_()
|
92
|
+
{
|
93
|
+
GenericStringStream<Encoding> ss(source);
|
94
|
+
DecodedStream<GenericStringStream<Encoding> > ds(ss);
|
95
|
+
Parse(ds);
|
96
|
+
}
|
97
|
+
|
98
|
+
~GenericRegex() {
|
99
|
+
Allocator::Free(stateSet_);
|
100
|
+
}
|
101
|
+
|
102
|
+
bool IsValid() const {
|
103
|
+
return root_ != kRegexInvalidState;
|
104
|
+
}
|
105
|
+
|
106
|
+
template <typename InputStream>
|
107
|
+
bool Match(InputStream& is) const {
|
108
|
+
return SearchWithAnchoring(is, true, true);
|
109
|
+
}
|
110
|
+
|
111
|
+
bool Match(const Ch* s) const {
|
112
|
+
GenericStringStream<Encoding> is(s);
|
113
|
+
return Match(is);
|
114
|
+
}
|
115
|
+
|
116
|
+
template <typename InputStream>
|
117
|
+
bool Search(InputStream& is) const {
|
118
|
+
return SearchWithAnchoring(is, anchorBegin_, anchorEnd_);
|
119
|
+
}
|
120
|
+
|
121
|
+
bool Search(const Ch* s) const {
|
122
|
+
GenericStringStream<Encoding> is(s);
|
123
|
+
return Search(is);
|
124
|
+
}
|
125
|
+
|
126
|
+
private:
|
127
|
+
enum Operator {
|
128
|
+
kZeroOrOne,
|
129
|
+
kZeroOrMore,
|
130
|
+
kOneOrMore,
|
131
|
+
kConcatenation,
|
132
|
+
kAlternation,
|
133
|
+
kLeftParenthesis
|
134
|
+
};
|
135
|
+
|
136
|
+
static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.'
|
137
|
+
static const unsigned kRangeCharacterClass = 0xFFFFFFFE;
|
138
|
+
static const unsigned kRangeNegationFlag = 0x80000000;
|
139
|
+
|
140
|
+
struct Range {
|
141
|
+
unsigned start; //
|
142
|
+
unsigned end;
|
143
|
+
SizeType next;
|
144
|
+
};
|
145
|
+
|
146
|
+
struct State {
|
147
|
+
SizeType out; //!< Equals to kInvalid for matching state
|
148
|
+
SizeType out1; //!< Equals to non-kInvalid for split
|
149
|
+
SizeType rangeStart;
|
150
|
+
unsigned codepoint;
|
151
|
+
};
|
152
|
+
|
153
|
+
struct Frag {
|
154
|
+
Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {}
|
155
|
+
SizeType start;
|
156
|
+
SizeType out; //!< link-list of all output states
|
157
|
+
SizeType minIndex;
|
158
|
+
};
|
159
|
+
|
160
|
+
template <typename SourceStream>
|
161
|
+
class DecodedStream {
|
162
|
+
public:
|
163
|
+
DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
|
164
|
+
unsigned Peek() { return codepoint_; }
|
165
|
+
unsigned Take() {
|
166
|
+
unsigned c = codepoint_;
|
167
|
+
if (c) // No further decoding when '\0'
|
168
|
+
Decode();
|
169
|
+
return c;
|
170
|
+
}
|
171
|
+
|
172
|
+
private:
|
173
|
+
void Decode() {
|
174
|
+
if (!Encoding::Decode(ss_, &codepoint_))
|
175
|
+
codepoint_ = 0;
|
176
|
+
}
|
177
|
+
|
178
|
+
SourceStream& ss_;
|
179
|
+
unsigned codepoint_;
|
180
|
+
};
|
181
|
+
|
182
|
+
State& GetState(SizeType index) {
|
183
|
+
RAPIDJSON_ASSERT(index < stateCount_);
|
184
|
+
return states_.template Bottom<State>()[index];
|
185
|
+
}
|
186
|
+
|
187
|
+
const State& GetState(SizeType index) const {
|
188
|
+
RAPIDJSON_ASSERT(index < stateCount_);
|
189
|
+
return states_.template Bottom<State>()[index];
|
190
|
+
}
|
191
|
+
|
192
|
+
Range& GetRange(SizeType index) {
|
193
|
+
RAPIDJSON_ASSERT(index < rangeCount_);
|
194
|
+
return ranges_.template Bottom<Range>()[index];
|
195
|
+
}
|
196
|
+
|
197
|
+
const Range& GetRange(SizeType index) const {
|
198
|
+
RAPIDJSON_ASSERT(index < rangeCount_);
|
199
|
+
return ranges_.template Bottom<Range>()[index];
|
200
|
+
}
|
201
|
+
|
202
|
+
template <typename InputStream>
|
203
|
+
void Parse(DecodedStream<InputStream>& ds) {
|
204
|
+
Allocator allocator;
|
205
|
+
Stack<Allocator> operandStack(&allocator, 256); // Frag
|
206
|
+
Stack<Allocator> operatorStack(&allocator, 256); // Operator
|
207
|
+
Stack<Allocator> atomCountStack(&allocator, 256); // unsigned (Atom per parenthesis)
|
208
|
+
|
209
|
+
*atomCountStack.template Push<unsigned>() = 0;
|
210
|
+
|
211
|
+
unsigned codepoint;
|
212
|
+
while (ds.Peek() != 0) {
|
213
|
+
switch (codepoint = ds.Take()) {
|
214
|
+
case '^':
|
215
|
+
anchorBegin_ = true;
|
216
|
+
break;
|
217
|
+
|
218
|
+
case '$':
|
219
|
+
anchorEnd_ = true;
|
220
|
+
break;
|
221
|
+
|
222
|
+
case '|':
|
223
|
+
while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
|
224
|
+
if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
|
225
|
+
return;
|
226
|
+
*operatorStack.template Push<Operator>() = kAlternation;
|
227
|
+
*atomCountStack.template Top<unsigned>() = 0;
|
228
|
+
break;
|
229
|
+
|
230
|
+
case '(':
|
231
|
+
*operatorStack.template Push<Operator>() = kLeftParenthesis;
|
232
|
+
*atomCountStack.template Push<unsigned>() = 0;
|
233
|
+
break;
|
234
|
+
|
235
|
+
case ')':
|
236
|
+
while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
|
237
|
+
if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
|
238
|
+
return;
|
239
|
+
if (operatorStack.Empty())
|
240
|
+
return;
|
241
|
+
operatorStack.template Pop<Operator>(1);
|
242
|
+
atomCountStack.template Pop<unsigned>(1);
|
243
|
+
ImplicitConcatenation(atomCountStack, operatorStack);
|
244
|
+
break;
|
245
|
+
|
246
|
+
case '?':
|
247
|
+
if (!Eval(operandStack, kZeroOrOne))
|
248
|
+
return;
|
249
|
+
break;
|
250
|
+
|
251
|
+
case '*':
|
252
|
+
if (!Eval(operandStack, kZeroOrMore))
|
253
|
+
return;
|
254
|
+
break;
|
255
|
+
|
256
|
+
case '+':
|
257
|
+
if (!Eval(operandStack, kOneOrMore))
|
258
|
+
return;
|
259
|
+
break;
|
260
|
+
|
261
|
+
case '{':
|
262
|
+
{
|
263
|
+
unsigned n, m;
|
264
|
+
if (!ParseUnsigned(ds, &n))
|
265
|
+
return;
|
266
|
+
|
267
|
+
if (ds.Peek() == ',') {
|
268
|
+
ds.Take();
|
269
|
+
if (ds.Peek() == '}')
|
270
|
+
m = kInfinityQuantifier;
|
271
|
+
else if (!ParseUnsigned(ds, &m) || m < n)
|
272
|
+
return;
|
273
|
+
}
|
274
|
+
else
|
275
|
+
m = n;
|
276
|
+
|
277
|
+
if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
|
278
|
+
return;
|
279
|
+
ds.Take();
|
280
|
+
}
|
281
|
+
break;
|
282
|
+
|
283
|
+
case '.':
|
284
|
+
PushOperand(operandStack, kAnyCharacterClass);
|
285
|
+
ImplicitConcatenation(atomCountStack, operatorStack);
|
286
|
+
break;
|
287
|
+
|
288
|
+
case '[':
|
289
|
+
{
|
290
|
+
SizeType range;
|
291
|
+
if (!ParseRange(ds, &range))
|
292
|
+
return;
|
293
|
+
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
|
294
|
+
GetState(s).rangeStart = range;
|
295
|
+
*operandStack.template Push<Frag>() = Frag(s, s, s);
|
296
|
+
}
|
297
|
+
ImplicitConcatenation(atomCountStack, operatorStack);
|
298
|
+
break;
|
299
|
+
|
300
|
+
case '\\': // Escape character
|
301
|
+
if (!CharacterEscape(ds, &codepoint))
|
302
|
+
return; // Unsupported escape character
|
303
|
+
// fall through to default
|
304
|
+
|
305
|
+
default: // Pattern character
|
306
|
+
PushOperand(operandStack, codepoint);
|
307
|
+
ImplicitConcatenation(atomCountStack, operatorStack);
|
308
|
+
}
|
309
|
+
}
|
310
|
+
|
311
|
+
while (!operatorStack.Empty())
|
312
|
+
if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
|
313
|
+
return;
|
314
|
+
|
315
|
+
// Link the operand to matching state.
|
316
|
+
if (operandStack.GetSize() == sizeof(Frag)) {
|
317
|
+
Frag* e = operandStack.template Pop<Frag>(1);
|
318
|
+
Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
|
319
|
+
root_ = e->start;
|
320
|
+
|
321
|
+
#if RAPIDJSON_REGEX_VERBOSE
|
322
|
+
printf("root: %d\n", root_);
|
323
|
+
for (SizeType i = 0; i < stateCount_ ; i++) {
|
324
|
+
State& s = GetState(i);
|
325
|
+
printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
|
326
|
+
}
|
327
|
+
printf("\n");
|
328
|
+
#endif
|
329
|
+
}
|
330
|
+
|
331
|
+
// Preallocate buffer for SearchWithAnchoring()
|
332
|
+
RAPIDJSON_ASSERT(stateSet_ == 0);
|
333
|
+
if (stateCount_ > 0) {
|
334
|
+
stateSet_ = static_cast<unsigned*>(states_.GetAllocator().Malloc(GetStateSetSize()));
|
335
|
+
state0_.template Reserve<SizeType>(stateCount_);
|
336
|
+
state1_.template Reserve<SizeType>(stateCount_);
|
337
|
+
}
|
338
|
+
}
|
339
|
+
|
340
|
+
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
|
341
|
+
State* s = states_.template Push<State>();
|
342
|
+
s->out = out;
|
343
|
+
s->out1 = out1;
|
344
|
+
s->codepoint = codepoint;
|
345
|
+
s->rangeStart = kRegexInvalidRange;
|
346
|
+
return stateCount_++;
|
347
|
+
}
|
348
|
+
|
349
|
+
void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
|
350
|
+
SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
|
351
|
+
*operandStack.template Push<Frag>() = Frag(s, s, s);
|
352
|
+
}
|
353
|
+
|
354
|
+
void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
|
355
|
+
if (*atomCountStack.template Top<unsigned>())
|
356
|
+
*operatorStack.template Push<Operator>() = kConcatenation;
|
357
|
+
(*atomCountStack.template Top<unsigned>())++;
|
358
|
+
}
|
359
|
+
|
360
|
+
SizeType Append(SizeType l1, SizeType l2) {
|
361
|
+
SizeType old = l1;
|
362
|
+
while (GetState(l1).out != kRegexInvalidState)
|
363
|
+
l1 = GetState(l1).out;
|
364
|
+
GetState(l1).out = l2;
|
365
|
+
return old;
|
366
|
+
}
|
367
|
+
|
368
|
+
void Patch(SizeType l, SizeType s) {
|
369
|
+
for (SizeType next; l != kRegexInvalidState; l = next) {
|
370
|
+
next = GetState(l).out;
|
371
|
+
GetState(l).out = s;
|
372
|
+
}
|
373
|
+
}
|
374
|
+
|
375
|
+
bool Eval(Stack<Allocator>& operandStack, Operator op) {
|
376
|
+
switch (op) {
|
377
|
+
case kConcatenation:
|
378
|
+
RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
|
379
|
+
{
|
380
|
+
Frag e2 = *operandStack.template Pop<Frag>(1);
|
381
|
+
Frag e1 = *operandStack.template Pop<Frag>(1);
|
382
|
+
Patch(e1.out, e2.start);
|
383
|
+
*operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
|
384
|
+
}
|
385
|
+
return true;
|
386
|
+
|
387
|
+
case kAlternation:
|
388
|
+
if (operandStack.GetSize() >= sizeof(Frag) * 2) {
|
389
|
+
Frag e2 = *operandStack.template Pop<Frag>(1);
|
390
|
+
Frag e1 = *operandStack.template Pop<Frag>(1);
|
391
|
+
SizeType s = NewState(e1.start, e2.start, 0);
|
392
|
+
*operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
|
393
|
+
return true;
|
394
|
+
}
|
395
|
+
return false;
|
396
|
+
|
397
|
+
case kZeroOrOne:
|
398
|
+
if (operandStack.GetSize() >= sizeof(Frag)) {
|
399
|
+
Frag e = *operandStack.template Pop<Frag>(1);
|
400
|
+
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
401
|
+
*operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
|
402
|
+
return true;
|
403
|
+
}
|
404
|
+
return false;
|
405
|
+
|
406
|
+
case kZeroOrMore:
|
407
|
+
if (operandStack.GetSize() >= sizeof(Frag)) {
|
408
|
+
Frag e = *operandStack.template Pop<Frag>(1);
|
409
|
+
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
410
|
+
Patch(e.out, s);
|
411
|
+
*operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
|
412
|
+
return true;
|
413
|
+
}
|
414
|
+
return false;
|
415
|
+
|
416
|
+
default:
|
417
|
+
RAPIDJSON_ASSERT(op == kOneOrMore);
|
418
|
+
if (operandStack.GetSize() >= sizeof(Frag)) {
|
419
|
+
Frag e = *operandStack.template Pop<Frag>(1);
|
420
|
+
SizeType s = NewState(kRegexInvalidState, e.start, 0);
|
421
|
+
Patch(e.out, s);
|
422
|
+
*operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
|
423
|
+
return true;
|
424
|
+
}
|
425
|
+
return false;
|
426
|
+
}
|
427
|
+
}
|
428
|
+
|
429
|
+
bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
|
430
|
+
RAPIDJSON_ASSERT(n <= m);
|
431
|
+
RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag));
|
432
|
+
|
433
|
+
if (n == 0) {
|
434
|
+
if (m == 0) // a{0} not support
|
435
|
+
return false;
|
436
|
+
else if (m == kInfinityQuantifier)
|
437
|
+
Eval(operandStack, kZeroOrMore); // a{0,} -> a*
|
438
|
+
else {
|
439
|
+
Eval(operandStack, kZeroOrOne); // a{0,5} -> a?
|
440
|
+
for (unsigned i = 0; i < m - 1; i++)
|
441
|
+
CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a?
|
442
|
+
for (unsigned i = 0; i < m - 1; i++)
|
443
|
+
Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
|
444
|
+
}
|
445
|
+
return true;
|
446
|
+
}
|
447
|
+
|
448
|
+
for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
|
449
|
+
CloneTopOperand(operandStack);
|
450
|
+
|
451
|
+
if (m == kInfinityQuantifier)
|
452
|
+
Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
|
453
|
+
else if (m > n) {
|
454
|
+
CloneTopOperand(operandStack); // a{3,5} -> a a a a
|
455
|
+
Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a?
|
456
|
+
for (unsigned i = n; i < m - 1; i++)
|
457
|
+
CloneTopOperand(operandStack); // a{3,5} -> a a a a? a?
|
458
|
+
for (unsigned i = n; i < m; i++)
|
459
|
+
Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
|
460
|
+
}
|
461
|
+
|
462
|
+
for (unsigned i = 0; i < n - 1; i++)
|
463
|
+
Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
|
464
|
+
|
465
|
+
return true;
|
466
|
+
}
|
467
|
+
|
468
|
+
static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
|
469
|
+
|
470
|
+
void CloneTopOperand(Stack<Allocator>& operandStack) {
|
471
|
+
const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
|
472
|
+
SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
|
473
|
+
State* s = states_.template Push<State>(count);
|
474
|
+
memcpy(s, &GetState(src.minIndex), count * sizeof(State));
|
475
|
+
for (SizeType j = 0; j < count; j++) {
|
476
|
+
if (s[j].out != kRegexInvalidState)
|
477
|
+
s[j].out += count;
|
478
|
+
if (s[j].out1 != kRegexInvalidState)
|
479
|
+
s[j].out1 += count;
|
480
|
+
}
|
481
|
+
*operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
|
482
|
+
stateCount_ += count;
|
483
|
+
}
|
484
|
+
|
485
|
+
template <typename InputStream>
|
486
|
+
bool ParseUnsigned(DecodedStream<InputStream>& ds, unsigned* u) {
|
487
|
+
unsigned r = 0;
|
488
|
+
if (ds.Peek() < '0' || ds.Peek() > '9')
|
489
|
+
return false;
|
490
|
+
while (ds.Peek() >= '0' && ds.Peek() <= '9') {
|
491
|
+
if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
|
492
|
+
return false; // overflow
|
493
|
+
r = r * 10 + (ds.Take() - '0');
|
494
|
+
}
|
495
|
+
*u = r;
|
496
|
+
return true;
|
497
|
+
}
|
498
|
+
|
499
|
+
template <typename InputStream>
|
500
|
+
bool ParseRange(DecodedStream<InputStream>& ds, SizeType* range) {
|
501
|
+
bool isBegin = true;
|
502
|
+
bool negate = false;
|
503
|
+
int step = 0;
|
504
|
+
SizeType start = kRegexInvalidRange;
|
505
|
+
SizeType current = kRegexInvalidRange;
|
506
|
+
unsigned codepoint;
|
507
|
+
while ((codepoint = ds.Take()) != 0) {
|
508
|
+
if (isBegin) {
|
509
|
+
isBegin = false;
|
510
|
+
if (codepoint == '^') {
|
511
|
+
negate = true;
|
512
|
+
continue;
|
513
|
+
}
|
514
|
+
}
|
515
|
+
|
516
|
+
switch (codepoint) {
|
517
|
+
case ']':
|
518
|
+
if (start == kRegexInvalidRange)
|
519
|
+
return false; // Error: nothing inside []
|
520
|
+
if (step == 2) { // Add trailing '-'
|
521
|
+
SizeType r = NewRange('-');
|
522
|
+
RAPIDJSON_ASSERT(current != kRegexInvalidRange);
|
523
|
+
GetRange(current).next = r;
|
524
|
+
}
|
525
|
+
if (negate)
|
526
|
+
GetRange(start).start |= kRangeNegationFlag;
|
527
|
+
*range = start;
|
528
|
+
return true;
|
529
|
+
|
530
|
+
case '\\':
|
531
|
+
if (ds.Peek() == 'b') {
|
532
|
+
ds.Take();
|
533
|
+
codepoint = 0x0008; // Escape backspace character
|
534
|
+
}
|
535
|
+
else if (!CharacterEscape(ds, &codepoint))
|
536
|
+
return false;
|
537
|
+
// fall through to default
|
538
|
+
|
539
|
+
default:
|
540
|
+
switch (step) {
|
541
|
+
case 1:
|
542
|
+
if (codepoint == '-') {
|
543
|
+
step++;
|
544
|
+
break;
|
545
|
+
}
|
546
|
+
// fall through to step 0 for other characters
|
547
|
+
|
548
|
+
case 0:
|
549
|
+
{
|
550
|
+
SizeType r = NewRange(codepoint);
|
551
|
+
if (current != kRegexInvalidRange)
|
552
|
+
GetRange(current).next = r;
|
553
|
+
if (start == kRegexInvalidRange)
|
554
|
+
start = r;
|
555
|
+
current = r;
|
556
|
+
}
|
557
|
+
step = 1;
|
558
|
+
break;
|
559
|
+
|
560
|
+
default:
|
561
|
+
RAPIDJSON_ASSERT(step == 2);
|
562
|
+
GetRange(current).end = codepoint;
|
563
|
+
step = 0;
|
564
|
+
}
|
565
|
+
}
|
566
|
+
}
|
567
|
+
return false;
|
568
|
+
}
|
569
|
+
|
570
|
+
SizeType NewRange(unsigned codepoint) {
|
571
|
+
Range* r = ranges_.template Push<Range>();
|
572
|
+
r->start = r->end = codepoint;
|
573
|
+
r->next = kRegexInvalidRange;
|
574
|
+
return rangeCount_++;
|
575
|
+
}
|
576
|
+
|
577
|
+
template <typename InputStream>
|
578
|
+
bool CharacterEscape(DecodedStream<InputStream>& ds, unsigned* escapedCodepoint) {
|
579
|
+
unsigned codepoint;
|
580
|
+
switch (codepoint = ds.Take()) {
|
581
|
+
case '^':
|
582
|
+
case '$':
|
583
|
+
case '|':
|
584
|
+
case '(':
|
585
|
+
case ')':
|
586
|
+
case '?':
|
587
|
+
case '*':
|
588
|
+
case '+':
|
589
|
+
case '.':
|
590
|
+
case '[':
|
591
|
+
case ']':
|
592
|
+
case '{':
|
593
|
+
case '}':
|
594
|
+
case '\\':
|
595
|
+
*escapedCodepoint = codepoint; return true;
|
596
|
+
case 'f': *escapedCodepoint = 0x000C; return true;
|
597
|
+
case 'n': *escapedCodepoint = 0x000A; return true;
|
598
|
+
case 'r': *escapedCodepoint = 0x000D; return true;
|
599
|
+
case 't': *escapedCodepoint = 0x0009; return true;
|
600
|
+
case 'v': *escapedCodepoint = 0x000B; return true;
|
601
|
+
default:
|
602
|
+
return false; // Unsupported escape character
|
603
|
+
}
|
604
|
+
}
|
605
|
+
|
606
|
+
template <typename InputStream>
|
607
|
+
bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) const {
|
608
|
+
RAPIDJSON_ASSERT(IsValid());
|
609
|
+
DecodedStream<InputStream> ds(is);
|
610
|
+
|
611
|
+
state0_.Clear();
|
612
|
+
Stack<Allocator> *current = &state0_, *next = &state1_;
|
613
|
+
const size_t stateSetSize = GetStateSetSize();
|
614
|
+
std::memset(stateSet_, 0, stateSetSize);
|
615
|
+
|
616
|
+
bool matched = AddState(*current, root_);
|
617
|
+
unsigned codepoint;
|
618
|
+
while (!current->Empty() && (codepoint = ds.Take()) != 0) {
|
619
|
+
std::memset(stateSet_, 0, stateSetSize);
|
620
|
+
next->Clear();
|
621
|
+
matched = false;
|
622
|
+
for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
|
623
|
+
const State& sr = GetState(*s);
|
624
|
+
if (sr.codepoint == codepoint ||
|
625
|
+
sr.codepoint == kAnyCharacterClass ||
|
626
|
+
(sr.codepoint == kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
|
627
|
+
{
|
628
|
+
matched = AddState(*next, sr.out) || matched;
|
629
|
+
if (!anchorEnd && matched)
|
630
|
+
return true;
|
631
|
+
}
|
632
|
+
if (!anchorBegin)
|
633
|
+
AddState(*next, root_);
|
634
|
+
}
|
635
|
+
internal::Swap(current, next);
|
636
|
+
}
|
637
|
+
|
638
|
+
return matched;
|
639
|
+
}
|
640
|
+
|
641
|
+
size_t GetStateSetSize() const {
|
642
|
+
return (stateCount_ + 31) / 32 * 4;
|
643
|
+
}
|
644
|
+
|
645
|
+
// Return whether the added states is a match state
|
646
|
+
bool AddState(Stack<Allocator>& l, SizeType index) const {
|
647
|
+
RAPIDJSON_ASSERT(index != kRegexInvalidState);
|
648
|
+
|
649
|
+
const State& s = GetState(index);
|
650
|
+
if (s.out1 != kRegexInvalidState) { // Split
|
651
|
+
bool matched = AddState(l, s.out);
|
652
|
+
return AddState(l, s.out1) || matched;
|
653
|
+
}
|
654
|
+
else if (!(stateSet_[index >> 5] & (1 << (index & 31)))) {
|
655
|
+
stateSet_[index >> 5] |= (1 << (index & 31));
|
656
|
+
*l.template PushUnsafe<SizeType>() = index;
|
657
|
+
}
|
658
|
+
return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
|
659
|
+
}
|
660
|
+
|
661
|
+
bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
|
662
|
+
bool yes = (GetRange(rangeIndex).start & kRangeNegationFlag) == 0;
|
663
|
+
while (rangeIndex != kRegexInvalidRange) {
|
664
|
+
const Range& r = GetRange(rangeIndex);
|
665
|
+
if (codepoint >= (r.start & ~kRangeNegationFlag) && codepoint <= r.end)
|
666
|
+
return yes;
|
667
|
+
rangeIndex = r.next;
|
668
|
+
}
|
669
|
+
return !yes;
|
670
|
+
}
|
671
|
+
|
672
|
+
Stack<Allocator> states_;
|
673
|
+
Stack<Allocator> ranges_;
|
674
|
+
SizeType root_;
|
675
|
+
SizeType stateCount_;
|
676
|
+
SizeType rangeCount_;
|
677
|
+
|
678
|
+
static const unsigned kInfinityQuantifier = ~0u;
|
679
|
+
|
680
|
+
// For SearchWithAnchoring()
|
681
|
+
uint32_t* stateSet_; // allocated by states_.GetAllocator()
|
682
|
+
mutable Stack<Allocator> state0_;
|
683
|
+
mutable Stack<Allocator> state1_;
|
684
|
+
bool anchorBegin_;
|
685
|
+
bool anchorEnd_;
|
686
|
+
};
|
687
|
+
|
688
|
+
typedef GenericRegex<UTF8<> > Regex;
|
689
|
+
|
690
|
+
} // namespace internal
|
691
|
+
RAPIDJSON_NAMESPACE_END
|
692
|
+
|
693
|
+
#ifdef __clang__
|
694
|
+
RAPIDJSON_DIAG_POP
|
695
|
+
#endif
|
696
|
+
|
697
|
+
#ifdef _MSC_VER
|
698
|
+
RAPIDJSON_DIAG_POP
|
699
|
+
#endif
|
700
|
+
|
701
|
+
#endif // RAPIDJSON_INTERNAL_REGEX_H_
|