isotree 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (118) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/LICENSE.txt +2 -2
  4. data/README.md +22 -1
  5. data/ext/isotree/ext.cpp +26 -0
  6. data/ext/isotree/extconf.rb +3 -3
  7. data/lib/isotree.rb +1 -0
  8. data/lib/isotree/isolation_forest.rb +86 -1
  9. data/lib/isotree/version.rb +1 -1
  10. data/vendor/cereal/LICENSE +24 -0
  11. data/vendor/cereal/README.md +85 -0
  12. data/vendor/cereal/include/cereal/access.hpp +351 -0
  13. data/vendor/cereal/include/cereal/archives/adapters.hpp +163 -0
  14. data/vendor/cereal/include/cereal/archives/binary.hpp +169 -0
  15. data/vendor/cereal/include/cereal/archives/json.hpp +1019 -0
  16. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +334 -0
  17. data/vendor/cereal/include/cereal/archives/xml.hpp +956 -0
  18. data/vendor/cereal/include/cereal/cereal.hpp +1089 -0
  19. data/vendor/cereal/include/cereal/details/helpers.hpp +422 -0
  20. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +796 -0
  21. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +65 -0
  22. data/vendor/cereal/include/cereal/details/static_object.hpp +127 -0
  23. data/vendor/cereal/include/cereal/details/traits.hpp +1411 -0
  24. data/vendor/cereal/include/cereal/details/util.hpp +84 -0
  25. data/vendor/cereal/include/cereal/external/base64.hpp +134 -0
  26. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +284 -0
  27. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +78 -0
  28. data/vendor/cereal/include/cereal/external/rapidjson/document.h +2652 -0
  29. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +299 -0
  30. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +716 -0
  31. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +74 -0
  32. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +161 -0
  33. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +99 -0
  34. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +104 -0
  35. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +151 -0
  36. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +290 -0
  37. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +271 -0
  38. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +245 -0
  39. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +78 -0
  40. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +308 -0
  41. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +186 -0
  42. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +55 -0
  43. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +740 -0
  44. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +232 -0
  45. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +69 -0
  46. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +290 -0
  47. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +46 -0
  48. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +128 -0
  49. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +70 -0
  50. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +71 -0
  51. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +316 -0
  52. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +300 -0
  53. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +81 -0
  54. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +1414 -0
  55. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +277 -0
  56. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +656 -0
  57. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +2230 -0
  58. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +2497 -0
  59. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +223 -0
  60. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +121 -0
  61. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +709 -0
  62. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +52 -0
  63. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +406 -0
  64. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +2624 -0
  65. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +175 -0
  66. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +428 -0
  67. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +123 -0
  68. data/vendor/cereal/include/cereal/macros.hpp +154 -0
  69. data/vendor/cereal/include/cereal/specialize.hpp +139 -0
  70. data/vendor/cereal/include/cereal/types/array.hpp +79 -0
  71. data/vendor/cereal/include/cereal/types/atomic.hpp +55 -0
  72. data/vendor/cereal/include/cereal/types/base_class.hpp +203 -0
  73. data/vendor/cereal/include/cereal/types/bitset.hpp +176 -0
  74. data/vendor/cereal/include/cereal/types/boost_variant.hpp +164 -0
  75. data/vendor/cereal/include/cereal/types/chrono.hpp +72 -0
  76. data/vendor/cereal/include/cereal/types/common.hpp +129 -0
  77. data/vendor/cereal/include/cereal/types/complex.hpp +56 -0
  78. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +73 -0
  79. data/vendor/cereal/include/cereal/types/deque.hpp +62 -0
  80. data/vendor/cereal/include/cereal/types/forward_list.hpp +68 -0
  81. data/vendor/cereal/include/cereal/types/functional.hpp +43 -0
  82. data/vendor/cereal/include/cereal/types/list.hpp +62 -0
  83. data/vendor/cereal/include/cereal/types/map.hpp +36 -0
  84. data/vendor/cereal/include/cereal/types/memory.hpp +425 -0
  85. data/vendor/cereal/include/cereal/types/optional.hpp +66 -0
  86. data/vendor/cereal/include/cereal/types/polymorphic.hpp +483 -0
  87. data/vendor/cereal/include/cereal/types/queue.hpp +132 -0
  88. data/vendor/cereal/include/cereal/types/set.hpp +103 -0
  89. data/vendor/cereal/include/cereal/types/stack.hpp +76 -0
  90. data/vendor/cereal/include/cereal/types/string.hpp +61 -0
  91. data/vendor/cereal/include/cereal/types/tuple.hpp +123 -0
  92. data/vendor/cereal/include/cereal/types/unordered_map.hpp +36 -0
  93. data/vendor/cereal/include/cereal/types/unordered_set.hpp +99 -0
  94. data/vendor/cereal/include/cereal/types/utility.hpp +47 -0
  95. data/vendor/cereal/include/cereal/types/valarray.hpp +89 -0
  96. data/vendor/cereal/include/cereal/types/variant.hpp +109 -0
  97. data/vendor/cereal/include/cereal/types/vector.hpp +112 -0
  98. data/vendor/cereal/include/cereal/version.hpp +52 -0
  99. data/vendor/isotree/LICENSE +1 -1
  100. data/vendor/isotree/README.md +2 -1
  101. data/vendor/isotree/src/RcppExports.cpp +44 -4
  102. data/vendor/isotree/src/Rwrapper.cpp +141 -51
  103. data/vendor/isotree/src/crit.cpp +1 -1
  104. data/vendor/isotree/src/dealloc.cpp +1 -1
  105. data/vendor/isotree/src/dist.cpp +6 -6
  106. data/vendor/isotree/src/extended.cpp +5 -5
  107. data/vendor/isotree/src/fit_model.cpp +30 -19
  108. data/vendor/isotree/src/helpers_iforest.cpp +26 -11
  109. data/vendor/isotree/src/impute.cpp +7 -7
  110. data/vendor/isotree/src/isoforest.cpp +7 -7
  111. data/vendor/isotree/src/isotree.hpp +27 -5
  112. data/vendor/isotree/src/merge_models.cpp +1 -1
  113. data/vendor/isotree/src/mult.cpp +1 -1
  114. data/vendor/isotree/src/predict.cpp +20 -16
  115. data/vendor/isotree/src/serialize.cpp +1 -1
  116. data/vendor/isotree/src/sql.cpp +545 -0
  117. data/vendor/isotree/src/utils.cpp +36 -44
  118. metadata +98 -92
@@ -0,0 +1,186 @@
1
+ // Tencent is pleased to support the open source community by making RapidJSON available.
2
+ //
3
+ // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4
+ //
5
+ // Licensed under the MIT License (the "License"); you may not use this file except
6
+ // in compliance with the License. You may obtain a copy of the License at
7
+ //
8
+ // http://opensource.org/licenses/MIT
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software distributed
11
+ // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
+ // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
+ // specific language governing permissions and limitations under the License.
14
+
15
+ #ifndef CEREAL_RAPIDJSON_INTERNAL_META_H_
16
+ #define CEREAL_RAPIDJSON_INTERNAL_META_H_
17
+
18
+ #include "../rapidjson.h"
19
+
20
+ #ifdef __GNUC__
21
+ CEREAL_RAPIDJSON_DIAG_PUSH
22
+ CEREAL_RAPIDJSON_DIAG_OFF(effc++)
23
+ #endif
24
+
25
+ #if defined(_MSC_VER) && !defined(__clang__)
26
+ CEREAL_RAPIDJSON_DIAG_PUSH
27
+ CEREAL_RAPIDJSON_DIAG_OFF(6334)
28
+ #endif
29
+
30
+ #if CEREAL_RAPIDJSON_HAS_CXX11_TYPETRAITS
31
+ #include <type_traits>
32
+ #endif
33
+
34
+ //@cond CEREAL_RAPIDJSON_INTERNAL
35
+ CEREAL_RAPIDJSON_NAMESPACE_BEGIN
36
+ namespace internal {
37
+
38
+ // Helper to wrap/convert arbitrary types to void, useful for arbitrary type matching
39
+ template <typename T> struct Void { typedef void Type; };
40
+
41
+ ///////////////////////////////////////////////////////////////////////////////
42
+ // BoolType, TrueType, FalseType
43
+ //
44
+ template <bool Cond> struct BoolType {
45
+ static const bool Value = Cond;
46
+ typedef BoolType Type;
47
+ };
48
+ typedef BoolType<true> TrueType;
49
+ typedef BoolType<false> FalseType;
50
+
51
+
52
+ ///////////////////////////////////////////////////////////////////////////////
53
+ // SelectIf, BoolExpr, NotExpr, AndExpr, OrExpr
54
+ //
55
+
56
+ template <bool C> struct SelectIfImpl { template <typename T1, typename T2> struct Apply { typedef T1 Type; }; };
57
+ template <> struct SelectIfImpl<false> { template <typename T1, typename T2> struct Apply { typedef T2 Type; }; };
58
+ template <bool C, typename T1, typename T2> struct SelectIfCond : SelectIfImpl<C>::template Apply<T1,T2> {};
59
+ template <typename C, typename T1, typename T2> struct SelectIf : SelectIfCond<C::Value, T1, T2> {};
60
+
61
+ template <bool Cond1, bool Cond2> struct AndExprCond : FalseType {};
62
+ template <> struct AndExprCond<true, true> : TrueType {};
63
+ template <bool Cond1, bool Cond2> struct OrExprCond : TrueType {};
64
+ template <> struct OrExprCond<false, false> : FalseType {};
65
+
66
+ template <typename C> struct BoolExpr : SelectIf<C,TrueType,FalseType>::Type {};
67
+ template <typename C> struct NotExpr : SelectIf<C,FalseType,TrueType>::Type {};
68
+ template <typename C1, typename C2> struct AndExpr : AndExprCond<C1::Value, C2::Value>::Type {};
69
+ template <typename C1, typename C2> struct OrExpr : OrExprCond<C1::Value, C2::Value>::Type {};
70
+
71
+
72
+ ///////////////////////////////////////////////////////////////////////////////
73
+ // AddConst, MaybeAddConst, RemoveConst
74
+ template <typename T> struct AddConst { typedef const T Type; };
75
+ template <bool Constify, typename T> struct MaybeAddConst : SelectIfCond<Constify, const T, T> {};
76
+ template <typename T> struct RemoveConst { typedef T Type; };
77
+ template <typename T> struct RemoveConst<const T> { typedef T Type; };
78
+
79
+
80
+ ///////////////////////////////////////////////////////////////////////////////
81
+ // IsSame, IsConst, IsMoreConst, IsPointer
82
+ //
83
+ template <typename T, typename U> struct IsSame : FalseType {};
84
+ template <typename T> struct IsSame<T, T> : TrueType {};
85
+
86
+ template <typename T> struct IsConst : FalseType {};
87
+ template <typename T> struct IsConst<const T> : TrueType {};
88
+
89
+ template <typename CT, typename T>
90
+ struct IsMoreConst
91
+ : AndExpr<IsSame<typename RemoveConst<CT>::Type, typename RemoveConst<T>::Type>,
92
+ BoolType<IsConst<CT>::Value >= IsConst<T>::Value> >::Type {};
93
+
94
+ template <typename T> struct IsPointer : FalseType {};
95
+ template <typename T> struct IsPointer<T*> : TrueType {};
96
+
97
+ ///////////////////////////////////////////////////////////////////////////////
98
+ // IsBaseOf
99
+ //
100
+ #if CEREAL_RAPIDJSON_HAS_CXX11_TYPETRAITS
101
+
102
+ template <typename B, typename D> struct IsBaseOf
103
+ : BoolType< ::std::is_base_of<B,D>::value> {};
104
+
105
+ #else // simplified version adopted from Boost
106
+
107
+ template<typename B, typename D> struct IsBaseOfImpl {
108
+ CEREAL_RAPIDJSON_STATIC_ASSERT(sizeof(B) != 0);
109
+ CEREAL_RAPIDJSON_STATIC_ASSERT(sizeof(D) != 0);
110
+
111
+ typedef char (&Yes)[1];
112
+ typedef char (&No) [2];
113
+
114
+ template <typename T>
115
+ static Yes Check(const D*, T);
116
+ static No Check(const B*, int);
117
+
118
+ struct Host {
119
+ operator const B*() const;
120
+ operator const D*();
121
+ };
122
+
123
+ enum { Value = (sizeof(Check(Host(), 0)) == sizeof(Yes)) };
124
+ };
125
+
126
+ template <typename B, typename D> struct IsBaseOf
127
+ : OrExpr<IsSame<B, D>, BoolExpr<IsBaseOfImpl<B, D> > >::Type {};
128
+
129
+ #endif // CEREAL_RAPIDJSON_HAS_CXX11_TYPETRAITS
130
+
131
+
132
+ //////////////////////////////////////////////////////////////////////////
133
+ // EnableIf / DisableIf
134
+ //
135
+ template <bool Condition, typename T = void> struct EnableIfCond { typedef T Type; };
136
+ template <typename T> struct EnableIfCond<false, T> { /* empty */ };
137
+
138
+ template <bool Condition, typename T = void> struct DisableIfCond { typedef T Type; };
139
+ template <typename T> struct DisableIfCond<true, T> { /* empty */ };
140
+
141
+ template <typename Condition, typename T = void>
142
+ struct EnableIf : EnableIfCond<Condition::Value, T> {};
143
+
144
+ template <typename Condition, typename T = void>
145
+ struct DisableIf : DisableIfCond<Condition::Value, T> {};
146
+
147
+ // SFINAE helpers
148
+ struct SfinaeTag {};
149
+ template <typename T> struct RemoveSfinaeTag;
150
+ template <typename T> struct RemoveSfinaeTag<SfinaeTag&(*)(T)> { typedef T Type; };
151
+
152
+ #define CEREAL_RAPIDJSON_REMOVEFPTR_(type) \
153
+ typename ::CEREAL_RAPIDJSON_NAMESPACE::internal::RemoveSfinaeTag \
154
+ < ::CEREAL_RAPIDJSON_NAMESPACE::internal::SfinaeTag&(*) type>::Type
155
+
156
+ #define CEREAL_RAPIDJSON_ENABLEIF(cond) \
157
+ typename ::CEREAL_RAPIDJSON_NAMESPACE::internal::EnableIf \
158
+ <CEREAL_RAPIDJSON_REMOVEFPTR_(cond)>::Type * = NULL
159
+
160
+ #define CEREAL_RAPIDJSON_DISABLEIF(cond) \
161
+ typename ::CEREAL_RAPIDJSON_NAMESPACE::internal::DisableIf \
162
+ <CEREAL_RAPIDJSON_REMOVEFPTR_(cond)>::Type * = NULL
163
+
164
+ #define CEREAL_RAPIDJSON_ENABLEIF_RETURN(cond,returntype) \
165
+ typename ::CEREAL_RAPIDJSON_NAMESPACE::internal::EnableIf \
166
+ <CEREAL_RAPIDJSON_REMOVEFPTR_(cond), \
167
+ CEREAL_RAPIDJSON_REMOVEFPTR_(returntype)>::Type
168
+
169
+ #define CEREAL_RAPIDJSON_DISABLEIF_RETURN(cond,returntype) \
170
+ typename ::CEREAL_RAPIDJSON_NAMESPACE::internal::DisableIf \
171
+ <CEREAL_RAPIDJSON_REMOVEFPTR_(cond), \
172
+ CEREAL_RAPIDJSON_REMOVEFPTR_(returntype)>::Type
173
+
174
+ } // namespace internal
175
+ CEREAL_RAPIDJSON_NAMESPACE_END
176
+ //@endcond
177
+
178
+ #if defined(_MSC_VER) && !defined(__clang__)
179
+ CEREAL_RAPIDJSON_DIAG_POP
180
+ #endif
181
+
182
+ #ifdef __GNUC__
183
+ CEREAL_RAPIDJSON_DIAG_POP
184
+ #endif
185
+
186
+ #endif // CEREAL_RAPIDJSON_INTERNAL_META_H_
@@ -0,0 +1,55 @@
1
+ // Tencent is pleased to support the open source community by making RapidJSON available.
2
+ //
3
+ // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4
+ //
5
+ // Licensed under the MIT License (the "License"); you may not use this file except
6
+ // in compliance with the License. You may obtain a copy of the License at
7
+ //
8
+ // http://opensource.org/licenses/MIT
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software distributed
11
+ // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
+ // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
+ // specific language governing permissions and limitations under the License.
14
+
15
+ #ifndef CEREAL_RAPIDJSON_POW10_
16
+ #define CEREAL_RAPIDJSON_POW10_
17
+
18
+ #include "../rapidjson.h"
19
+
20
+ CEREAL_RAPIDJSON_NAMESPACE_BEGIN
21
+ namespace internal {
22
+
23
+ //! Computes integer powers of 10 in double (10.0^n).
24
+ /*! This function uses lookup table for fast and accurate results.
25
+ \param n non-negative exponent. Must <= 308.
26
+ \return 10.0^n
27
+ */
28
+ inline double Pow10(int n) {
29
+ static const double e[] = { // 1e-0...1e308: 309 * 8 bytes = 2472 bytes
30
+ 1e+0,
31
+ 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20,
32
+ 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40,
33
+ 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60,
34
+ 1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
35
+ 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100,
36
+ 1e+101,1e+102,1e+103,1e+104,1e+105,1e+106,1e+107,1e+108,1e+109,1e+110,1e+111,1e+112,1e+113,1e+114,1e+115,1e+116,1e+117,1e+118,1e+119,1e+120,
37
+ 1e+121,1e+122,1e+123,1e+124,1e+125,1e+126,1e+127,1e+128,1e+129,1e+130,1e+131,1e+132,1e+133,1e+134,1e+135,1e+136,1e+137,1e+138,1e+139,1e+140,
38
+ 1e+141,1e+142,1e+143,1e+144,1e+145,1e+146,1e+147,1e+148,1e+149,1e+150,1e+151,1e+152,1e+153,1e+154,1e+155,1e+156,1e+157,1e+158,1e+159,1e+160,
39
+ 1e+161,1e+162,1e+163,1e+164,1e+165,1e+166,1e+167,1e+168,1e+169,1e+170,1e+171,1e+172,1e+173,1e+174,1e+175,1e+176,1e+177,1e+178,1e+179,1e+180,
40
+ 1e+181,1e+182,1e+183,1e+184,1e+185,1e+186,1e+187,1e+188,1e+189,1e+190,1e+191,1e+192,1e+193,1e+194,1e+195,1e+196,1e+197,1e+198,1e+199,1e+200,
41
+ 1e+201,1e+202,1e+203,1e+204,1e+205,1e+206,1e+207,1e+208,1e+209,1e+210,1e+211,1e+212,1e+213,1e+214,1e+215,1e+216,1e+217,1e+218,1e+219,1e+220,
42
+ 1e+221,1e+222,1e+223,1e+224,1e+225,1e+226,1e+227,1e+228,1e+229,1e+230,1e+231,1e+232,1e+233,1e+234,1e+235,1e+236,1e+237,1e+238,1e+239,1e+240,
43
+ 1e+241,1e+242,1e+243,1e+244,1e+245,1e+246,1e+247,1e+248,1e+249,1e+250,1e+251,1e+252,1e+253,1e+254,1e+255,1e+256,1e+257,1e+258,1e+259,1e+260,
44
+ 1e+261,1e+262,1e+263,1e+264,1e+265,1e+266,1e+267,1e+268,1e+269,1e+270,1e+271,1e+272,1e+273,1e+274,1e+275,1e+276,1e+277,1e+278,1e+279,1e+280,
45
+ 1e+281,1e+282,1e+283,1e+284,1e+285,1e+286,1e+287,1e+288,1e+289,1e+290,1e+291,1e+292,1e+293,1e+294,1e+295,1e+296,1e+297,1e+298,1e+299,1e+300,
46
+ 1e+301,1e+302,1e+303,1e+304,1e+305,1e+306,1e+307,1e+308
47
+ };
48
+ CEREAL_RAPIDJSON_ASSERT(n >= 0 && n <= 308);
49
+ return e[n];
50
+ }
51
+
52
+ } // namespace internal
53
+ CEREAL_RAPIDJSON_NAMESPACE_END
54
+
55
+ #endif // CEREAL_RAPIDJSON_POW10_
@@ -0,0 +1,740 @@
1
+ // Tencent is pleased to support the open source community by making RapidJSON available.
2
+ //
3
+ // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4
+ //
5
+ // Licensed under the MIT License (the "License"); you may not use this file except
6
+ // in compliance with the License. You may obtain a copy of the License at
7
+ //
8
+ // http://opensource.org/licenses/MIT
9
+ //
10
+ // Unless required by applicable law or agreed to in writing, software distributed
11
+ // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
+ // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
+ // specific language governing permissions and limitations under the License.
14
+
15
+ #ifndef CEREAL_RAPIDJSON_INTERNAL_REGEX_H_
16
+ #define CEREAL_RAPIDJSON_INTERNAL_REGEX_H_
17
+
18
+ #include "../allocators.h"
19
+ #include "../stream.h"
20
+ #include "stack.h"
21
+
22
+ #ifdef __clang__
23
+ CEREAL_RAPIDJSON_DIAG_PUSH
24
+ CEREAL_RAPIDJSON_DIAG_OFF(padded)
25
+ CEREAL_RAPIDJSON_DIAG_OFF(switch-enum)
26
+ CEREAL_RAPIDJSON_DIAG_OFF(implicit-fallthrough)
27
+ #elif defined(_MSC_VER)
28
+ CEREAL_RAPIDJSON_DIAG_PUSH
29
+ CEREAL_RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
30
+ #endif
31
+
32
+ #ifdef __GNUC__
33
+ CEREAL_RAPIDJSON_DIAG_PUSH
34
+ CEREAL_RAPIDJSON_DIAG_OFF(effc++)
35
+ #if __GNUC__ >= 7
36
+ CEREAL_RAPIDJSON_DIAG_OFF(implicit-fallthrough)
37
+ #endif
38
+ #endif
39
+
40
+ #ifndef CEREAL_RAPIDJSON_REGEX_VERBOSE
41
+ #define CEREAL_RAPIDJSON_REGEX_VERBOSE 0
42
+ #endif
43
+
44
+ CEREAL_RAPIDJSON_NAMESPACE_BEGIN
45
+ namespace internal {
46
+
47
+ ///////////////////////////////////////////////////////////////////////////////
48
+ // DecodedStream
49
+
50
+ template <typename SourceStream, typename Encoding>
51
+ class DecodedStream {
52
+ public:
53
+ DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
54
+ unsigned Peek() { return codepoint_; }
55
+ unsigned Take() {
56
+ unsigned c = codepoint_;
57
+ if (c) // No further decoding when '\0'
58
+ Decode();
59
+ return c;
60
+ }
61
+
62
+ private:
63
+ void Decode() {
64
+ if (!Encoding::Decode(ss_, &codepoint_))
65
+ codepoint_ = 0;
66
+ }
67
+
68
+ SourceStream& ss_;
69
+ unsigned codepoint_;
70
+ };
71
+
72
+ ///////////////////////////////////////////////////////////////////////////////
73
+ // GenericRegex
74
+
75
+ static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1
76
+ static const SizeType kRegexInvalidRange = ~SizeType(0);
77
+
78
+ template <typename Encoding, typename Allocator>
79
+ class GenericRegexSearch;
80
+
81
+ //! Regular expression engine with subset of ECMAscript grammar.
82
+ /*!
83
+ Supported regular expression syntax:
84
+ - \c ab Concatenation
85
+ - \c a|b Alternation
86
+ - \c a? Zero or one
87
+ - \c a* Zero or more
88
+ - \c a+ One or more
89
+ - \c a{3} Exactly 3 times
90
+ - \c a{3,} At least 3 times
91
+ - \c a{3,5} 3 to 5 times
92
+ - \c (ab) Grouping
93
+ - \c ^a At the beginning
94
+ - \c a$ At the end
95
+ - \c . Any character
96
+ - \c [abc] Character classes
97
+ - \c [a-c] Character class range
98
+ - \c [a-z0-9_] Character class combination
99
+ - \c [^abc] Negated character classes
100
+ - \c [^a-c] Negated character class range
101
+ - \c [\b] Backspace (U+0008)
102
+ - \c \\| \\\\ ... Escape characters
103
+ - \c \\f Form feed (U+000C)
104
+ - \c \\n Line feed (U+000A)
105
+ - \c \\r Carriage return (U+000D)
106
+ - \c \\t Tab (U+0009)
107
+ - \c \\v Vertical tab (U+000B)
108
+
109
+ \note This is a Thompson NFA engine, implemented with reference to
110
+ Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).",
111
+ https://swtch.com/~rsc/regexp/regexp1.html
112
+ */
113
+ template <typename Encoding, typename Allocator = CrtAllocator>
114
+ class GenericRegex {
115
+ public:
116
+ typedef Encoding EncodingType;
117
+ typedef typename Encoding::Ch Ch;
118
+ template <typename, typename> friend class GenericRegexSearch;
119
+
120
+ GenericRegex(const Ch* source, Allocator* allocator = 0) :
121
+ ownAllocator_(allocator ? 0 : CEREAL_RAPIDJSON_NEW(Allocator)()), allocator_(allocator ? allocator : ownAllocator_),
122
+ states_(allocator_, 256), ranges_(allocator_, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(),
123
+ anchorBegin_(), anchorEnd_()
124
+ {
125
+ GenericStringStream<Encoding> ss(source);
126
+ DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss);
127
+ Parse(ds);
128
+ }
129
+
130
+ ~GenericRegex()
131
+ {
132
+ CEREAL_RAPIDJSON_DELETE(ownAllocator_);
133
+ }
134
+
135
+ bool IsValid() const {
136
+ return root_ != kRegexInvalidState;
137
+ }
138
+
139
+ private:
140
+ enum Operator {
141
+ kZeroOrOne,
142
+ kZeroOrMore,
143
+ kOneOrMore,
144
+ kConcatenation,
145
+ kAlternation,
146
+ kLeftParenthesis
147
+ };
148
+
149
+ static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.'
150
+ static const unsigned kRangeCharacterClass = 0xFFFFFFFE;
151
+ static const unsigned kRangeNegationFlag = 0x80000000;
152
+
153
+ struct Range {
154
+ unsigned start; //
155
+ unsigned end;
156
+ SizeType next;
157
+ };
158
+
159
+ struct State {
160
+ SizeType out; //!< Equals to kInvalid for matching state
161
+ SizeType out1; //!< Equals to non-kInvalid for split
162
+ SizeType rangeStart;
163
+ unsigned codepoint;
164
+ };
165
+
166
+ struct Frag {
167
+ Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {}
168
+ SizeType start;
169
+ SizeType out; //!< link-list of all output states
170
+ SizeType minIndex;
171
+ };
172
+
173
+ State& GetState(SizeType index) {
174
+ CEREAL_RAPIDJSON_ASSERT(index < stateCount_);
175
+ return states_.template Bottom<State>()[index];
176
+ }
177
+
178
+ const State& GetState(SizeType index) const {
179
+ CEREAL_RAPIDJSON_ASSERT(index < stateCount_);
180
+ return states_.template Bottom<State>()[index];
181
+ }
182
+
183
+ Range& GetRange(SizeType index) {
184
+ CEREAL_RAPIDJSON_ASSERT(index < rangeCount_);
185
+ return ranges_.template Bottom<Range>()[index];
186
+ }
187
+
188
+ const Range& GetRange(SizeType index) const {
189
+ CEREAL_RAPIDJSON_ASSERT(index < rangeCount_);
190
+ return ranges_.template Bottom<Range>()[index];
191
+ }
192
+
193
+ template <typename InputStream>
194
+ void Parse(DecodedStream<InputStream, Encoding>& ds) {
195
+ Stack<Allocator> operandStack(allocator_, 256); // Frag
196
+ Stack<Allocator> operatorStack(allocator_, 256); // Operator
197
+ Stack<Allocator> atomCountStack(allocator_, 256); // unsigned (Atom per parenthesis)
198
+
199
+ *atomCountStack.template Push<unsigned>() = 0;
200
+
201
+ unsigned codepoint;
202
+ while (ds.Peek() != 0) {
203
+ switch (codepoint = ds.Take()) {
204
+ case '^':
205
+ anchorBegin_ = true;
206
+ break;
207
+
208
+ case '$':
209
+ anchorEnd_ = true;
210
+ break;
211
+
212
+ case '|':
213
+ while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
214
+ if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
215
+ return;
216
+ *operatorStack.template Push<Operator>() = kAlternation;
217
+ *atomCountStack.template Top<unsigned>() = 0;
218
+ break;
219
+
220
+ case '(':
221
+ *operatorStack.template Push<Operator>() = kLeftParenthesis;
222
+ *atomCountStack.template Push<unsigned>() = 0;
223
+ break;
224
+
225
+ case ')':
226
+ while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
227
+ if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
228
+ return;
229
+ if (operatorStack.Empty())
230
+ return;
231
+ operatorStack.template Pop<Operator>(1);
232
+ atomCountStack.template Pop<unsigned>(1);
233
+ ImplicitConcatenation(atomCountStack, operatorStack);
234
+ break;
235
+
236
+ case '?':
237
+ if (!Eval(operandStack, kZeroOrOne))
238
+ return;
239
+ break;
240
+
241
+ case '*':
242
+ if (!Eval(operandStack, kZeroOrMore))
243
+ return;
244
+ break;
245
+
246
+ case '+':
247
+ if (!Eval(operandStack, kOneOrMore))
248
+ return;
249
+ break;
250
+
251
+ case '{':
252
+ {
253
+ unsigned n, m;
254
+ if (!ParseUnsigned(ds, &n))
255
+ return;
256
+
257
+ if (ds.Peek() == ',') {
258
+ ds.Take();
259
+ if (ds.Peek() == '}')
260
+ m = kInfinityQuantifier;
261
+ else if (!ParseUnsigned(ds, &m) || m < n)
262
+ return;
263
+ }
264
+ else
265
+ m = n;
266
+
267
+ if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
268
+ return;
269
+ ds.Take();
270
+ }
271
+ break;
272
+
273
+ case '.':
274
+ PushOperand(operandStack, kAnyCharacterClass);
275
+ ImplicitConcatenation(atomCountStack, operatorStack);
276
+ break;
277
+
278
+ case '[':
279
+ {
280
+ SizeType range;
281
+ if (!ParseRange(ds, &range))
282
+ return;
283
+ SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
284
+ GetState(s).rangeStart = range;
285
+ *operandStack.template Push<Frag>() = Frag(s, s, s);
286
+ }
287
+ ImplicitConcatenation(atomCountStack, operatorStack);
288
+ break;
289
+
290
+ case '\\': // Escape character
291
+ if (!CharacterEscape(ds, &codepoint))
292
+ return; // Unsupported escape character
293
+ // fall through to default
294
+
295
+ default: // Pattern character
296
+ PushOperand(operandStack, codepoint);
297
+ ImplicitConcatenation(atomCountStack, operatorStack);
298
+ }
299
+ }
300
+
301
+ while (!operatorStack.Empty())
302
+ if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
303
+ return;
304
+
305
+ // Link the operand to matching state.
306
+ if (operandStack.GetSize() == sizeof(Frag)) {
307
+ Frag* e = operandStack.template Pop<Frag>(1);
308
+ Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
309
+ root_ = e->start;
310
+
311
+ #if CEREAL_RAPIDJSON_REGEX_VERBOSE
312
+ printf("root: %d\n", root_);
313
+ for (SizeType i = 0; i < stateCount_ ; i++) {
314
+ State& s = GetState(i);
315
+ printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
316
+ }
317
+ printf("\n");
318
+ #endif
319
+ }
320
+ }
321
+
322
+ SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
323
+ State* s = states_.template Push<State>();
324
+ s->out = out;
325
+ s->out1 = out1;
326
+ s->codepoint = codepoint;
327
+ s->rangeStart = kRegexInvalidRange;
328
+ return stateCount_++;
329
+ }
330
+
331
+ void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
332
+ SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
333
+ *operandStack.template Push<Frag>() = Frag(s, s, s);
334
+ }
335
+
336
+ void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
337
+ if (*atomCountStack.template Top<unsigned>())
338
+ *operatorStack.template Push<Operator>() = kConcatenation;
339
+ (*atomCountStack.template Top<unsigned>())++;
340
+ }
341
+
342
+ SizeType Append(SizeType l1, SizeType l2) {
343
+ SizeType old = l1;
344
+ while (GetState(l1).out != kRegexInvalidState)
345
+ l1 = GetState(l1).out;
346
+ GetState(l1).out = l2;
347
+ return old;
348
+ }
349
+
350
+ void Patch(SizeType l, SizeType s) {
351
+ for (SizeType next; l != kRegexInvalidState; l = next) {
352
+ next = GetState(l).out;
353
+ GetState(l).out = s;
354
+ }
355
+ }
356
+
357
+ bool Eval(Stack<Allocator>& operandStack, Operator op) {
358
+ switch (op) {
359
+ case kConcatenation:
360
+ CEREAL_RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
361
+ {
362
+ Frag e2 = *operandStack.template Pop<Frag>(1);
363
+ Frag e1 = *operandStack.template Pop<Frag>(1);
364
+ Patch(e1.out, e2.start);
365
+ *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
366
+ }
367
+ return true;
368
+
369
+ case kAlternation:
370
+ if (operandStack.GetSize() >= sizeof(Frag) * 2) {
371
+ Frag e2 = *operandStack.template Pop<Frag>(1);
372
+ Frag e1 = *operandStack.template Pop<Frag>(1);
373
+ SizeType s = NewState(e1.start, e2.start, 0);
374
+ *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
375
+ return true;
376
+ }
377
+ return false;
378
+
379
+ case kZeroOrOne:
380
+ if (operandStack.GetSize() >= sizeof(Frag)) {
381
+ Frag e = *operandStack.template Pop<Frag>(1);
382
+ SizeType s = NewState(kRegexInvalidState, e.start, 0);
383
+ *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
384
+ return true;
385
+ }
386
+ return false;
387
+
388
+ case kZeroOrMore:
389
+ if (operandStack.GetSize() >= sizeof(Frag)) {
390
+ Frag e = *operandStack.template Pop<Frag>(1);
391
+ SizeType s = NewState(kRegexInvalidState, e.start, 0);
392
+ Patch(e.out, s);
393
+ *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
394
+ return true;
395
+ }
396
+ return false;
397
+
398
+ case kOneOrMore:
399
+ if (operandStack.GetSize() >= sizeof(Frag)) {
400
+ Frag e = *operandStack.template Pop<Frag>(1);
401
+ SizeType s = NewState(kRegexInvalidState, e.start, 0);
402
+ Patch(e.out, s);
403
+ *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
404
+ return true;
405
+ }
406
+ return false;
407
+
408
+ default:
409
+ // syntax error (e.g. unclosed kLeftParenthesis)
410
+ return false;
411
+ }
412
+ }
413
+
414
+ bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
415
+ CEREAL_RAPIDJSON_ASSERT(n <= m);
416
+ CEREAL_RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag));
417
+
418
+ if (n == 0) {
419
+ if (m == 0) // a{0} not support
420
+ return false;
421
+ else if (m == kInfinityQuantifier)
422
+ Eval(operandStack, kZeroOrMore); // a{0,} -> a*
423
+ else {
424
+ Eval(operandStack, kZeroOrOne); // a{0,5} -> a?
425
+ for (unsigned i = 0; i < m - 1; i++)
426
+ CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a?
427
+ for (unsigned i = 0; i < m - 1; i++)
428
+ Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
429
+ }
430
+ return true;
431
+ }
432
+
433
+ for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a
434
+ CloneTopOperand(operandStack);
435
+
436
+ if (m == kInfinityQuantifier)
437
+ Eval(operandStack, kOneOrMore); // a{3,} -> a a a+
438
+ else if (m > n) {
439
+ CloneTopOperand(operandStack); // a{3,5} -> a a a a
440
+ Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a?
441
+ for (unsigned i = n; i < m - 1; i++)
442
+ CloneTopOperand(operandStack); // a{3,5} -> a a a a? a?
443
+ for (unsigned i = n; i < m; i++)
444
+ Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
445
+ }
446
+
447
+ for (unsigned i = 0; i < n - 1; i++)
448
+ Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
449
+
450
+ return true;
451
+ }
452
+
453
+ static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
454
+
455
+ void CloneTopOperand(Stack<Allocator>& operandStack) {
456
+ const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
457
+ SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
458
+ State* s = states_.template Push<State>(count);
459
+ memcpy(s, &GetState(src.minIndex), count * sizeof(State));
460
+ for (SizeType j = 0; j < count; j++) {
461
+ if (s[j].out != kRegexInvalidState)
462
+ s[j].out += count;
463
+ if (s[j].out1 != kRegexInvalidState)
464
+ s[j].out1 += count;
465
+ }
466
+ *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
467
+ stateCount_ += count;
468
+ }
469
+
470
+ template <typename InputStream>
471
+ bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u) {
472
+ unsigned r = 0;
473
+ if (ds.Peek() < '0' || ds.Peek() > '9')
474
+ return false;
475
+ while (ds.Peek() >= '0' && ds.Peek() <= '9') {
476
+ if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
477
+ return false; // overflow
478
+ r = r * 10 + (ds.Take() - '0');
479
+ }
480
+ *u = r;
481
+ return true;
482
+ }
483
+
484
+ template <typename InputStream>
485
+ bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range) {
486
+ bool isBegin = true;
487
+ bool negate = false;
488
+ int step = 0;
489
+ SizeType start = kRegexInvalidRange;
490
+ SizeType current = kRegexInvalidRange;
491
+ unsigned codepoint;
492
+ while ((codepoint = ds.Take()) != 0) {
493
+ if (isBegin) {
494
+ isBegin = false;
495
+ if (codepoint == '^') {
496
+ negate = true;
497
+ continue;
498
+ }
499
+ }
500
+
501
+ switch (codepoint) {
502
+ case ']':
503
+ if (start == kRegexInvalidRange)
504
+ return false; // Error: nothing inside []
505
+ if (step == 2) { // Add trailing '-'
506
+ SizeType r = NewRange('-');
507
+ CEREAL_RAPIDJSON_ASSERT(current != kRegexInvalidRange);
508
+ GetRange(current).next = r;
509
+ }
510
+ if (negate)
511
+ GetRange(start).start |= kRangeNegationFlag;
512
+ *range = start;
513
+ return true;
514
+
515
+ case '\\':
516
+ if (ds.Peek() == 'b') {
517
+ ds.Take();
518
+ codepoint = 0x0008; // Escape backspace character
519
+ }
520
+ else if (!CharacterEscape(ds, &codepoint))
521
+ return false;
522
+ // fall through to default
523
+
524
+ default:
525
+ switch (step) {
526
+ case 1:
527
+ if (codepoint == '-') {
528
+ step++;
529
+ break;
530
+ }
531
+ // fall through to step 0 for other characters
532
+
533
+ case 0:
534
+ {
535
+ SizeType r = NewRange(codepoint);
536
+ if (current != kRegexInvalidRange)
537
+ GetRange(current).next = r;
538
+ if (start == kRegexInvalidRange)
539
+ start = r;
540
+ current = r;
541
+ }
542
+ step = 1;
543
+ break;
544
+
545
+ default:
546
+ CEREAL_RAPIDJSON_ASSERT(step == 2);
547
+ GetRange(current).end = codepoint;
548
+ step = 0;
549
+ }
550
+ }
551
+ }
552
+ return false;
553
+ }
554
+
555
+ SizeType NewRange(unsigned codepoint) {
556
+ Range* r = ranges_.template Push<Range>();
557
+ r->start = r->end = codepoint;
558
+ r->next = kRegexInvalidRange;
559
+ return rangeCount_++;
560
+ }
561
+
562
+ template <typename InputStream>
563
+ bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) {
564
+ unsigned codepoint;
565
+ switch (codepoint = ds.Take()) {
566
+ case '^':
567
+ case '$':
568
+ case '|':
569
+ case '(':
570
+ case ')':
571
+ case '?':
572
+ case '*':
573
+ case '+':
574
+ case '.':
575
+ case '[':
576
+ case ']':
577
+ case '{':
578
+ case '}':
579
+ case '\\':
580
+ *escapedCodepoint = codepoint; return true;
581
+ case 'f': *escapedCodepoint = 0x000C; return true;
582
+ case 'n': *escapedCodepoint = 0x000A; return true;
583
+ case 'r': *escapedCodepoint = 0x000D; return true;
584
+ case 't': *escapedCodepoint = 0x0009; return true;
585
+ case 'v': *escapedCodepoint = 0x000B; return true;
586
+ default:
587
+ return false; // Unsupported escape character
588
+ }
589
+ }
590
+
591
+ Allocator* ownAllocator_;
592
+ Allocator* allocator_;
593
+ Stack<Allocator> states_;
594
+ Stack<Allocator> ranges_;
595
+ SizeType root_;
596
+ SizeType stateCount_;
597
+ SizeType rangeCount_;
598
+
599
+ static const unsigned kInfinityQuantifier = ~0u;
600
+
601
+ // For SearchWithAnchoring()
602
+ bool anchorBegin_;
603
+ bool anchorEnd_;
604
+ };
605
+
606
+ template <typename RegexType, typename Allocator = CrtAllocator>
607
+ class GenericRegexSearch {
608
+ public:
609
+ typedef typename RegexType::EncodingType Encoding;
610
+ typedef typename Encoding::Ch Ch;
611
+
612
+ GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) :
613
+ regex_(regex), allocator_(allocator), ownAllocator_(0),
614
+ state0_(allocator, 0), state1_(allocator, 0), stateSet_()
615
+ {
616
+ CEREAL_RAPIDJSON_ASSERT(regex_.IsValid());
617
+ if (!allocator_)
618
+ ownAllocator_ = allocator_ = CEREAL_RAPIDJSON_NEW(Allocator)();
619
+ stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize()));
620
+ state0_.template Reserve<SizeType>(regex_.stateCount_);
621
+ state1_.template Reserve<SizeType>(regex_.stateCount_);
622
+ }
623
+
624
+ ~GenericRegexSearch() {
625
+ Allocator::Free(stateSet_);
626
+ CEREAL_RAPIDJSON_DELETE(ownAllocator_);
627
+ }
628
+
629
+ template <typename InputStream>
630
+ bool Match(InputStream& is) {
631
+ return SearchWithAnchoring(is, true, true);
632
+ }
633
+
634
+ bool Match(const Ch* s) {
635
+ GenericStringStream<Encoding> is(s);
636
+ return Match(is);
637
+ }
638
+
639
+ template <typename InputStream>
640
+ bool Search(InputStream& is) {
641
+ return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_);
642
+ }
643
+
644
+ bool Search(const Ch* s) {
645
+ GenericStringStream<Encoding> is(s);
646
+ return Search(is);
647
+ }
648
+
649
+ private:
650
+ typedef typename RegexType::State State;
651
+ typedef typename RegexType::Range Range;
652
+
653
+ template <typename InputStream>
654
+ bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) {
655
+ DecodedStream<InputStream, Encoding> ds(is);
656
+
657
+ state0_.Clear();
658
+ Stack<Allocator> *current = &state0_, *next = &state1_;
659
+ const size_t stateSetSize = GetStateSetSize();
660
+ std::memset(stateSet_, 0, stateSetSize);
661
+
662
+ bool matched = AddState(*current, regex_.root_);
663
+ unsigned codepoint;
664
+ while (!current->Empty() && (codepoint = ds.Take()) != 0) {
665
+ std::memset(stateSet_, 0, stateSetSize);
666
+ next->Clear();
667
+ matched = false;
668
+ for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
669
+ const State& sr = regex_.GetState(*s);
670
+ if (sr.codepoint == codepoint ||
671
+ sr.codepoint == RegexType::kAnyCharacterClass ||
672
+ (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
673
+ {
674
+ matched = AddState(*next, sr.out) || matched;
675
+ if (!anchorEnd && matched)
676
+ return true;
677
+ }
678
+ if (!anchorBegin)
679
+ AddState(*next, regex_.root_);
680
+ }
681
+ internal::Swap(current, next);
682
+ }
683
+
684
+ return matched;
685
+ }
686
+
687
+ size_t GetStateSetSize() const {
688
+ return (regex_.stateCount_ + 31) / 32 * 4;
689
+ }
690
+
691
+ // Return whether the added states is a match state
692
+ bool AddState(Stack<Allocator>& l, SizeType index) {
693
+ CEREAL_RAPIDJSON_ASSERT(index != kRegexInvalidState);
694
+
695
+ const State& s = regex_.GetState(index);
696
+ if (s.out1 != kRegexInvalidState) { // Split
697
+ bool matched = AddState(l, s.out);
698
+ return AddState(l, s.out1) || matched;
699
+ }
700
+ else if (!(stateSet_[index >> 5] & (1u << (index & 31)))) {
701
+ stateSet_[index >> 5] |= (1u << (index & 31));
702
+ *l.template PushUnsafe<SizeType>() = index;
703
+ }
704
+ return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
705
+ }
706
+
707
+ bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
708
+ bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0;
709
+ while (rangeIndex != kRegexInvalidRange) {
710
+ const Range& r = regex_.GetRange(rangeIndex);
711
+ if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
712
+ return yes;
713
+ rangeIndex = r.next;
714
+ }
715
+ return !yes;
716
+ }
717
+
718
+ const RegexType& regex_;
719
+ Allocator* allocator_;
720
+ Allocator* ownAllocator_;
721
+ Stack<Allocator> state0_;
722
+ Stack<Allocator> state1_;
723
+ uint32_t* stateSet_;
724
+ };
725
+
726
+ typedef GenericRegex<UTF8<> > Regex;
727
+ typedef GenericRegexSearch<Regex> RegexSearch;
728
+
729
+ } // namespace internal
730
+ CEREAL_RAPIDJSON_NAMESPACE_END
731
+
732
+ #ifdef __GNUC__
733
+ CEREAL_RAPIDJSON_DIAG_POP
734
+ #endif
735
+
736
+ #if defined(__clang__) || defined(_MSC_VER)
737
+ CEREAL_RAPIDJSON_DIAG_POP
738
+ #endif
739
+
740
+ #endif // CEREAL_RAPIDJSON_INTERNAL_REGEX_H_