re2 1.20.12 → 1.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/binding.gyp +2 -0
- package/lib/addon.cc +54 -1
- package/lib/exec.cc +34 -52
- package/lib/match.cc +20 -26
- package/lib/replace.cc +67 -78
- package/lib/search.cc +8 -7
- package/lib/split.cc +14 -18
- package/lib/str-val.cc +112 -0
- package/lib/str-val.h +32 -0
- package/lib/test.cc +18 -49
- package/lib/util.cc +0 -29
- package/lib/util.h +0 -15
- package/lib/wrapped_re2.h +33 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -353,6 +353,7 @@ console.log('re2_res : ' + re2_res); // prints: re2_res : abc,a,b,c
|
|
|
353
353
|
|
|
354
354
|
## Release history
|
|
355
355
|
|
|
356
|
+
- 1.21.0 *Fixed the performance problem reported by [matthewvalentine](https://github.com/matthewvalentine) (thx!). The change improves performance for multiple use cases.*
|
|
356
357
|
- 1.20.12 *Updated deps. Maintenance chores. Fixes for buffer-related bugs: `exec()` index (reported by [matthewvalentine](https://github.com/matthewvalentine), thx) and `match()` index.*
|
|
357
358
|
- 1.20.11 *Updated deps. Added support for Node 22 (thx, [Elton Leong](https://github.com/eltonkl)).*
|
|
358
359
|
- 1.20.10 *Updated deps. Removed files the pack used for development (thx, [Haruaki OTAKE](https://github.com/aaharu)). Added arm64 Linux prebilds (thx, [Christopher M](https://github.com/cmanou)). Fixed non-`npm` `corepack` problem (thx, [Steven](https://github.com/styfle)).*
|
package/binding.gyp
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
{
|
|
4
4
|
"target_name": "re2",
|
|
5
5
|
"sources": [
|
|
6
|
+
"lib/str-val.cc",
|
|
6
7
|
"lib/addon.cc",
|
|
7
8
|
"lib/new.cc",
|
|
8
9
|
"lib/exec.cc",
|
|
@@ -14,6 +15,7 @@
|
|
|
14
15
|
"lib/to_string.cc",
|
|
15
16
|
"lib/accessors.cc",
|
|
16
17
|
"lib/util.cc",
|
|
18
|
+
"lib/str-val.cc",
|
|
17
19
|
"vendor/re2/re2/bitmap256.cc",
|
|
18
20
|
"vendor/re2/re2/bitstate.cc",
|
|
19
21
|
"vendor/re2/re2/compile.cc",
|
package/lib/addon.cc
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#include "./wrapped_re2.h"
|
|
2
2
|
|
|
3
|
-
#include
|
|
3
|
+
#include "./str-val.h"
|
|
4
4
|
|
|
5
5
|
static NAN_METHOD(GetUtf8Length)
|
|
6
6
|
{
|
|
@@ -91,3 +91,56 @@ NODE_MODULE_INIT()
|
|
|
91
91
|
Nan::HandleScope scope;
|
|
92
92
|
Nan::Set(module->ToObject(context).ToLocalChecked(), Nan::New("exports").ToLocalChecked(), WrappedRE2::Init());
|
|
93
93
|
}
|
|
94
|
+
|
|
95
|
+
// private methods
|
|
96
|
+
|
|
97
|
+
void WrappedRE2::dropLastString()
|
|
98
|
+
{
|
|
99
|
+
lastString.Reset();
|
|
100
|
+
if (lastStringValue)
|
|
101
|
+
{
|
|
102
|
+
delete lastStringValue;
|
|
103
|
+
lastStringValue = nullptr;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
inline size_t countBytes(const char *data, size_t from, size_t n)
|
|
108
|
+
{
|
|
109
|
+
for (; n > 0; --n)
|
|
110
|
+
{
|
|
111
|
+
size_t s = getUtf8CharSize(data[from]);
|
|
112
|
+
from += s;
|
|
113
|
+
if (s == 4 && n >= 2)
|
|
114
|
+
--n; // this utf8 character will take two utf16 characters
|
|
115
|
+
// the decrement above is protected to avoid an overflow of an unsigned integer
|
|
116
|
+
}
|
|
117
|
+
return from;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
void WrappedRE2::prepareLastString(const v8::Local<v8::Value> &arg, bool ignoreLastIndex)
|
|
121
|
+
{
|
|
122
|
+
size_t startFrom = ignoreLastIndex ? 0 : lastIndex;
|
|
123
|
+
|
|
124
|
+
if (node::Buffer::HasInstance(arg))
|
|
125
|
+
{
|
|
126
|
+
dropLastString();
|
|
127
|
+
lastStringValue = new StrValBuffer(arg, startFrom);
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// String
|
|
132
|
+
|
|
133
|
+
// check if the same string is already in the cache
|
|
134
|
+
if (lastString == arg && lastStringValue)
|
|
135
|
+
{
|
|
136
|
+
if (!global && !sticky)
|
|
137
|
+
return; // we are good
|
|
138
|
+
lastStringValue->setIndex(startFrom);
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
dropLastString();
|
|
143
|
+
lastString.Reset(arg);
|
|
144
|
+
static_cast<v8::PersistentBase<v8::Value>&>(lastString).SetWeak();
|
|
145
|
+
lastStringValue = new StrValString(arg, startFrom);
|
|
146
|
+
};
|
package/lib/exec.cc
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
#include "./wrapped_re2.h"
|
|
2
|
-
#include "./
|
|
2
|
+
#include "./str-val.h"
|
|
3
3
|
|
|
4
4
|
#include <vector>
|
|
5
5
|
|
|
6
|
-
#include <node_buffer.h>
|
|
7
|
-
|
|
8
6
|
NAN_METHOD(WrappedRE2::Exec)
|
|
9
7
|
{
|
|
10
8
|
|
|
@@ -17,45 +15,17 @@ NAN_METHOD(WrappedRE2::Exec)
|
|
|
17
15
|
return;
|
|
18
16
|
}
|
|
19
17
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
return;
|
|
24
|
-
}
|
|
18
|
+
re2->prepareLastString(info[0]);
|
|
19
|
+
StrValBase &str = *re2->lastStringValue;
|
|
20
|
+
if (str.isBad) return; // throws an exception
|
|
25
21
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
if (str.isBuffer)
|
|
29
|
-
{
|
|
30
|
-
if ((re2->global || re2->sticky) && re2->lastIndex)
|
|
31
|
-
{
|
|
32
|
-
if (re2->lastIndex > str.size)
|
|
33
|
-
{
|
|
34
|
-
re2->lastIndex = 0;
|
|
35
|
-
info.GetReturnValue().SetNull();
|
|
36
|
-
return;
|
|
37
|
-
}
|
|
38
|
-
lastIndex = re2->lastIndex;
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
else
|
|
22
|
+
if (re2->global || re2->sticky)
|
|
42
23
|
{
|
|
43
|
-
if (
|
|
24
|
+
if (!str.isIndexValid)
|
|
44
25
|
{
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
info.GetReturnValue().SetNull();
|
|
49
|
-
return;
|
|
50
|
-
}
|
|
51
|
-
for (size_t n = re2->lastIndex; n; --n)
|
|
52
|
-
{
|
|
53
|
-
size_t s = getUtf8CharSize(str.data[lastIndex]);
|
|
54
|
-
lastIndex += s;
|
|
55
|
-
if (s == 4 && n >= 2)
|
|
56
|
-
--n; // this utf8 character will take two utf16 characters
|
|
57
|
-
// the decrement above is protected to avoid an overflow of an unsigned integer
|
|
58
|
-
}
|
|
26
|
+
re2->lastIndex = 0;
|
|
27
|
+
info.GetReturnValue().SetNull();
|
|
28
|
+
return;
|
|
59
29
|
}
|
|
60
30
|
}
|
|
61
31
|
|
|
@@ -63,7 +33,7 @@ NAN_METHOD(WrappedRE2::Exec)
|
|
|
63
33
|
|
|
64
34
|
std::vector<re2::StringPiece> groups(re2->regexp.NumberOfCapturingGroups() + 1);
|
|
65
35
|
|
|
66
|
-
if (!re2->regexp.Match(str,
|
|
36
|
+
if (!re2->regexp.Match(str, str.byteIndex, str.size, re2->sticky ? re2::RE2::ANCHOR_START : re2::RE2::UNANCHORED, &groups[0], groups.size()))
|
|
67
37
|
{
|
|
68
38
|
if (re2->global || re2->sticky)
|
|
69
39
|
{
|
|
@@ -87,9 +57,10 @@ NAN_METHOD(WrappedRE2::Exec)
|
|
|
87
57
|
if (data)
|
|
88
58
|
{
|
|
89
59
|
Nan::Set(result, i, Nan::CopyBuffer(data, item.size()).ToLocalChecked());
|
|
90
|
-
if (re2->hasIndices)
|
|
60
|
+
if (re2->hasIndices)
|
|
61
|
+
{
|
|
91
62
|
auto pair = Nan::New<v8::Array>();
|
|
92
|
-
auto offset = data - str.data -
|
|
63
|
+
auto offset = data - str.data - str.byteIndex;
|
|
93
64
|
auto length = item.size();
|
|
94
65
|
Nan::Set(pair, 0, Nan::New<v8::Integer>(indexOffset + static_cast<int>(offset)));
|
|
95
66
|
Nan::Set(pair, 1, Nan::New<v8::Integer>(indexOffset + static_cast<int>(offset + length)));
|
|
@@ -99,12 +70,13 @@ NAN_METHOD(WrappedRE2::Exec)
|
|
|
99
70
|
else
|
|
100
71
|
{
|
|
101
72
|
Nan::Set(result, i, Nan::Undefined());
|
|
102
|
-
if (re2->hasIndices)
|
|
73
|
+
if (re2->hasIndices)
|
|
74
|
+
{
|
|
103
75
|
Nan::Set(indices, i, Nan::Undefined());
|
|
104
76
|
}
|
|
105
77
|
}
|
|
106
78
|
}
|
|
107
|
-
Nan::Set(result, Nan::New("index").ToLocalChecked(), Nan::New<v8::Integer>(indexOffset + static_cast<int>(groups[0].data() - str.data -
|
|
79
|
+
Nan::Set(result, Nan::New("index").ToLocalChecked(), Nan::New<v8::Integer>(indexOffset + static_cast<int>(groups[0].data() - str.data - str.byteIndex)));
|
|
108
80
|
}
|
|
109
81
|
else
|
|
110
82
|
{
|
|
@@ -115,9 +87,10 @@ NAN_METHOD(WrappedRE2::Exec)
|
|
|
115
87
|
if (data)
|
|
116
88
|
{
|
|
117
89
|
Nan::Set(result, i, Nan::New(data, item.size()).ToLocalChecked());
|
|
118
|
-
if (re2->hasIndices)
|
|
90
|
+
if (re2->hasIndices)
|
|
91
|
+
{
|
|
119
92
|
auto pair = Nan::New<v8::Array>();
|
|
120
|
-
auto offset = getUtf16Length(str.data +
|
|
93
|
+
auto offset = getUtf16Length(str.data + str.byteIndex, data);
|
|
121
94
|
auto length = getUtf16Length(data, data + item.size());
|
|
122
95
|
Nan::Set(pair, 0, Nan::New<v8::Integer>(indexOffset + static_cast<int>(offset)));
|
|
123
96
|
Nan::Set(pair, 1, Nan::New<v8::Integer>(indexOffset + static_cast<int>(offset + length)));
|
|
@@ -127,17 +100,23 @@ NAN_METHOD(WrappedRE2::Exec)
|
|
|
127
100
|
else
|
|
128
101
|
{
|
|
129
102
|
Nan::Set(result, i, Nan::Undefined());
|
|
130
|
-
if (re2->hasIndices)
|
|
103
|
+
if (re2->hasIndices)
|
|
104
|
+
{
|
|
131
105
|
Nan::Set(indices, i, Nan::Undefined());
|
|
132
106
|
}
|
|
133
107
|
}
|
|
134
108
|
}
|
|
135
|
-
Nan::Set(
|
|
109
|
+
Nan::Set(
|
|
110
|
+
result,
|
|
111
|
+
Nan::New("index").ToLocalChecked(),
|
|
112
|
+
Nan::New<v8::Integer>(indexOffset +
|
|
113
|
+
static_cast<int>(getUtf16Length(str.data + str.byteIndex, groups[0].data()))));
|
|
136
114
|
}
|
|
137
115
|
|
|
138
116
|
if (re2->global || re2->sticky)
|
|
139
117
|
{
|
|
140
|
-
re2->lastIndex +=
|
|
118
|
+
re2->lastIndex +=
|
|
119
|
+
str.isBuffer ? groups[0].data() - str.data + groups[0].size() - str.byteIndex : getUtf16Length(str.data + str.byteIndex, groups[0].data() + groups[0].size());
|
|
141
120
|
}
|
|
142
121
|
|
|
143
122
|
Nan::Set(result, Nan::New("input").ToLocalChecked(), info[0]);
|
|
@@ -159,7 +138,8 @@ NAN_METHOD(WrappedRE2::Exec)
|
|
|
159
138
|
|
|
160
139
|
Nan::Set(result, Nan::New("groups").ToLocalChecked(), groups);
|
|
161
140
|
|
|
162
|
-
if (re2->hasIndices)
|
|
141
|
+
if (re2->hasIndices)
|
|
142
|
+
{
|
|
163
143
|
auto indexGroups = Nan::New<v8::Object>();
|
|
164
144
|
Nan::SetPrototype(indexGroups, Nan::Null());
|
|
165
145
|
|
|
@@ -178,12 +158,14 @@ NAN_METHOD(WrappedRE2::Exec)
|
|
|
178
158
|
else
|
|
179
159
|
{
|
|
180
160
|
Nan::Set(result, Nan::New("groups").ToLocalChecked(), Nan::Undefined());
|
|
181
|
-
if (re2->hasIndices)
|
|
161
|
+
if (re2->hasIndices)
|
|
162
|
+
{
|
|
182
163
|
Nan::Set(indices, Nan::New("groups").ToLocalChecked(), Nan::Undefined());
|
|
183
164
|
}
|
|
184
165
|
}
|
|
185
166
|
|
|
186
|
-
if (re2->hasIndices)
|
|
167
|
+
if (re2->hasIndices)
|
|
168
|
+
{
|
|
187
169
|
Nan::Set(result, Nan::New("indices").ToLocalChecked(), indices);
|
|
188
170
|
}
|
|
189
171
|
|
package/lib/match.cc
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#include "./wrapped_re2.h"
|
|
2
|
-
#include "./
|
|
2
|
+
#include "./str-val.h"
|
|
3
3
|
|
|
4
4
|
#include <vector>
|
|
5
5
|
|
|
@@ -15,15 +15,19 @@ NAN_METHOD(WrappedRE2::Match)
|
|
|
15
15
|
return;
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
re2->prepareLastString(info[0], re2->global);
|
|
19
|
+
StrValBase &str = *re2->lastStringValue;
|
|
20
|
+
if (str.isBad) return; // throws an exception
|
|
21
|
+
|
|
22
|
+
if (!str.isIndexValid)
|
|
20
23
|
{
|
|
24
|
+
re2->lastIndex = 0;
|
|
25
|
+
info.GetReturnValue().SetNull();
|
|
21
26
|
return;
|
|
22
27
|
}
|
|
23
28
|
|
|
24
29
|
std::vector<re2::StringPiece> groups;
|
|
25
|
-
|
|
26
|
-
size_t lastIndex = 0;
|
|
30
|
+
size_t byteIndex = 0;
|
|
27
31
|
auto anchor = re2::RE2::UNANCHORED;
|
|
28
32
|
|
|
29
33
|
// actual work
|
|
@@ -39,10 +43,10 @@ NAN_METHOD(WrappedRE2::Match)
|
|
|
39
43
|
anchor = re2::RE2::ANCHOR_START;
|
|
40
44
|
}
|
|
41
45
|
|
|
42
|
-
while (re2->regexp.Match(str,
|
|
46
|
+
while (re2->regexp.Match(str, byteIndex, str.size, anchor, &match, 1))
|
|
43
47
|
{
|
|
44
48
|
groups.push_back(match);
|
|
45
|
-
|
|
49
|
+
byteIndex = match.data() - str.data + match.size();
|
|
46
50
|
}
|
|
47
51
|
|
|
48
52
|
if (groups.empty())
|
|
@@ -57,24 +61,15 @@ NAN_METHOD(WrappedRE2::Match)
|
|
|
57
61
|
|
|
58
62
|
if (re2->sticky)
|
|
59
63
|
{
|
|
60
|
-
|
|
61
|
-
{
|
|
62
|
-
size_t s = getUtf8CharSize(a.data[lastIndex]);
|
|
63
|
-
lastIndex += s;
|
|
64
|
-
if (s == 4 && n >= 2)
|
|
65
|
-
--n; // this utf8 character will take two utf16 characters
|
|
66
|
-
// the decrement above is protected to avoid an overflow of an unsigned integer
|
|
67
|
-
}
|
|
64
|
+
byteIndex = str.byteIndex;
|
|
68
65
|
anchor = RE2::ANCHOR_START;
|
|
69
66
|
}
|
|
70
67
|
|
|
71
68
|
groups.resize(re2->regexp.NumberOfCapturingGroups() + 1);
|
|
72
|
-
if (!re2->regexp.Match(str,
|
|
69
|
+
if (!re2->regexp.Match(str, byteIndex, str.size, anchor, &groups[0], groups.size()))
|
|
73
70
|
{
|
|
74
71
|
if (re2->sticky)
|
|
75
|
-
{
|
|
76
72
|
re2->lastIndex = 0;
|
|
77
|
-
}
|
|
78
73
|
info.GetReturnValue().SetNull();
|
|
79
74
|
return;
|
|
80
75
|
}
|
|
@@ -84,7 +79,7 @@ NAN_METHOD(WrappedRE2::Match)
|
|
|
84
79
|
|
|
85
80
|
auto result = Nan::New<v8::Array>(), indices = Nan::New<v8::Array>();
|
|
86
81
|
|
|
87
|
-
if (
|
|
82
|
+
if (str.isBuffer)
|
|
88
83
|
{
|
|
89
84
|
for (size_t i = 0, n = groups.size(); i < n; ++i)
|
|
90
85
|
{
|
|
@@ -96,7 +91,7 @@ NAN_METHOD(WrappedRE2::Match)
|
|
|
96
91
|
if (!re2->global && re2->hasIndices)
|
|
97
92
|
{
|
|
98
93
|
auto pair = Nan::New<v8::Array>();
|
|
99
|
-
auto offset = data -
|
|
94
|
+
auto offset = data - str.data - byteIndex;
|
|
100
95
|
auto length = item.size();
|
|
101
96
|
Nan::Set(pair, 0, Nan::New<v8::Integer>(static_cast<int>(offset)));
|
|
102
97
|
Nan::Set(pair, 1, Nan::New<v8::Integer>(static_cast<int>(offset + length)));
|
|
@@ -107,14 +102,12 @@ NAN_METHOD(WrappedRE2::Match)
|
|
|
107
102
|
{
|
|
108
103
|
Nan::Set(result, i, Nan::Undefined());
|
|
109
104
|
if (!re2->global && re2->hasIndices)
|
|
110
|
-
{
|
|
111
105
|
Nan::Set(indices, i, Nan::Undefined());
|
|
112
|
-
}
|
|
113
106
|
}
|
|
114
107
|
}
|
|
115
108
|
if (!re2->global)
|
|
116
109
|
{
|
|
117
|
-
Nan::Set(result, Nan::New("index").ToLocalChecked(), Nan::New<v8::Integer>(static_cast<int>(groups[0].data() -
|
|
110
|
+
Nan::Set(result, Nan::New("index").ToLocalChecked(), Nan::New<v8::Integer>(static_cast<int>(groups[0].data() - str.data)));
|
|
118
111
|
Nan::Set(result, Nan::New("input").ToLocalChecked(), info[0]);
|
|
119
112
|
}
|
|
120
113
|
}
|
|
@@ -130,7 +123,7 @@ NAN_METHOD(WrappedRE2::Match)
|
|
|
130
123
|
if (!re2->global && re2->hasIndices)
|
|
131
124
|
{
|
|
132
125
|
auto pair = Nan::New<v8::Array>();
|
|
133
|
-
auto offset = getUtf16Length(
|
|
126
|
+
auto offset = getUtf16Length(str.data + byteIndex, data);
|
|
134
127
|
auto length = getUtf16Length(data, data + item.size());
|
|
135
128
|
Nan::Set(pair, 0, Nan::New<v8::Integer>(static_cast<int>(offset)));
|
|
136
129
|
Nan::Set(pair, 1, Nan::New<v8::Integer>(static_cast<int>(offset + length)));
|
|
@@ -148,7 +141,7 @@ NAN_METHOD(WrappedRE2::Match)
|
|
|
148
141
|
}
|
|
149
142
|
if (!re2->global)
|
|
150
143
|
{
|
|
151
|
-
Nan::Set(result, Nan::New("index").ToLocalChecked(), Nan::New<v8::Integer>(static_cast<int>(getUtf16Length(
|
|
144
|
+
Nan::Set(result, Nan::New("index").ToLocalChecked(), Nan::New<v8::Integer>(static_cast<int>(getUtf16Length(str.data, groups[0].data()))));
|
|
152
145
|
Nan::Set(result, Nan::New("input").ToLocalChecked(), info[0]);
|
|
153
146
|
}
|
|
154
147
|
}
|
|
@@ -159,7 +152,8 @@ NAN_METHOD(WrappedRE2::Match)
|
|
|
159
152
|
}
|
|
160
153
|
else if (re2->sticky)
|
|
161
154
|
{
|
|
162
|
-
re2->lastIndex +=
|
|
155
|
+
re2->lastIndex +=
|
|
156
|
+
str.isBuffer ? groups[0].data() - str.data + groups[0].size() - byteIndex : getUtf16Length(str.data + byteIndex, groups[0].data() + groups[0].size());
|
|
163
157
|
}
|
|
164
158
|
|
|
165
159
|
if (!re2->global)
|
package/lib/replace.cc
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
#include "./wrapped_re2.h"
|
|
2
|
-
#include "./
|
|
2
|
+
#include "./str-val.h"
|
|
3
3
|
|
|
4
4
|
#include <algorithm>
|
|
5
5
|
#include <memory>
|
|
6
6
|
#include <string>
|
|
7
7
|
#include <vector>
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
inline int getMaxSubmatch(
|
|
10
|
+
const char *data,
|
|
11
|
+
size_t size,
|
|
12
|
+
const std::map<std::string, int> &namedGroups)
|
|
12
13
|
{
|
|
13
14
|
int maxSubmatch = 0, index, index2;
|
|
14
15
|
const char *nameBegin;
|
|
@@ -86,7 +87,12 @@ inline int getMaxSubmatch(const char *data, size_t size, const std::map<std::str
|
|
|
86
87
|
return maxSubmatch;
|
|
87
88
|
}
|
|
88
89
|
|
|
89
|
-
inline std::string replace(
|
|
90
|
+
inline std::string replace(
|
|
91
|
+
const char *data,
|
|
92
|
+
size_t size,
|
|
93
|
+
const std::vector<re2::StringPiece> &groups,
|
|
94
|
+
const re2::StringPiece &str,
|
|
95
|
+
const std::map<std::string, int> &namedGroups)
|
|
90
96
|
{
|
|
91
97
|
std::string result;
|
|
92
98
|
size_t index, index2;
|
|
@@ -205,7 +211,11 @@ inline std::string replace(const char *data, size_t size, const std::vector<re2:
|
|
|
205
211
|
return result;
|
|
206
212
|
}
|
|
207
213
|
|
|
208
|
-
static Nan::Maybe<std::string> replace(
|
|
214
|
+
static Nan::Maybe<std::string> replace(
|
|
215
|
+
WrappedRE2 *re2,
|
|
216
|
+
const StrValBase &replacee,
|
|
217
|
+
const char *replacer,
|
|
218
|
+
size_t replacer_size)
|
|
209
219
|
{
|
|
210
220
|
const re2::StringPiece str = replacee;
|
|
211
221
|
const char *data = str.data();
|
|
@@ -216,67 +226,50 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
|
|
|
216
226
|
std::vector<re2::StringPiece> groups(std::min(re2->regexp.NumberOfCapturingGroups(), getMaxSubmatch(replacer, replacer_size, namedGroups)) + 1);
|
|
217
227
|
const auto &match = groups[0];
|
|
218
228
|
|
|
219
|
-
size_t
|
|
229
|
+
size_t byteIndex = 0;
|
|
220
230
|
std::string result;
|
|
221
231
|
auto anchor = re2::RE2::UNANCHORED;
|
|
222
232
|
|
|
223
233
|
if (re2->sticky)
|
|
224
234
|
{
|
|
225
235
|
if (!re2->global)
|
|
226
|
-
|
|
227
|
-
if (replacee.isBuffer)
|
|
228
|
-
{
|
|
229
|
-
lastIndex = re2->lastIndex;
|
|
230
|
-
}
|
|
231
|
-
else
|
|
232
|
-
{
|
|
233
|
-
for (size_t n = re2->lastIndex; n; --n)
|
|
234
|
-
{
|
|
235
|
-
size_t s = getUtf8CharSize(data[lastIndex]);
|
|
236
|
-
lastIndex += s;
|
|
237
|
-
if (s == 4 && n >= 2)
|
|
238
|
-
{
|
|
239
|
-
--n; // this utf8 character will take two utf16 characters
|
|
240
|
-
}
|
|
241
|
-
// the decrement above is protected to avoid an overflow of an unsigned integer
|
|
242
|
-
}
|
|
243
|
-
}
|
|
244
|
-
}
|
|
236
|
+
byteIndex = replacee.byteIndex;
|
|
245
237
|
anchor = re2::RE2::ANCHOR_START;
|
|
246
238
|
}
|
|
247
239
|
|
|
248
|
-
if (
|
|
240
|
+
if (byteIndex)
|
|
249
241
|
{
|
|
250
|
-
result = std::string(data,
|
|
242
|
+
result = std::string(data, byteIndex);
|
|
251
243
|
}
|
|
252
244
|
|
|
253
245
|
bool noMatch = true;
|
|
254
|
-
while (
|
|
246
|
+
while (byteIndex <= size && re2->regexp.Match(str, byteIndex, size, anchor, &groups[0], groups.size()))
|
|
255
247
|
{
|
|
256
248
|
noMatch = false;
|
|
257
249
|
auto offset = match.data() - data;
|
|
258
250
|
if (!re2->global && re2->sticky)
|
|
259
251
|
{
|
|
260
|
-
re2->lastIndex +=
|
|
252
|
+
re2->lastIndex +=
|
|
253
|
+
replacee.isBuffer ? offset + match.size() - byteIndex : getUtf16Length(data + byteIndex, match.data() + match.size());
|
|
261
254
|
}
|
|
262
|
-
if (match.data() == data || offset > static_cast<long>(
|
|
255
|
+
if (match.data() == data || offset > static_cast<long>(byteIndex))
|
|
263
256
|
{
|
|
264
|
-
result += std::string(data +
|
|
257
|
+
result += std::string(data + byteIndex, offset - byteIndex);
|
|
265
258
|
}
|
|
266
259
|
result += replace(replacer, replacer_size, groups, str, namedGroups);
|
|
267
260
|
if (match.size())
|
|
268
261
|
{
|
|
269
|
-
|
|
262
|
+
byteIndex = offset + match.size();
|
|
270
263
|
}
|
|
271
264
|
else if ((size_t)offset < size)
|
|
272
265
|
{
|
|
273
266
|
auto sym_size = getUtf8CharSize(data[offset]);
|
|
274
267
|
result.append(data + offset, sym_size);
|
|
275
|
-
|
|
268
|
+
byteIndex = offset + sym_size;
|
|
276
269
|
}
|
|
277
270
|
else
|
|
278
271
|
{
|
|
279
|
-
|
|
272
|
+
byteIndex = size;
|
|
280
273
|
break;
|
|
281
274
|
}
|
|
282
275
|
if (!re2->global)
|
|
@@ -284,9 +277,9 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
|
|
|
284
277
|
break;
|
|
285
278
|
}
|
|
286
279
|
}
|
|
287
|
-
if (
|
|
280
|
+
if (byteIndex < size)
|
|
288
281
|
{
|
|
289
|
-
result += std::string(data +
|
|
282
|
+
result += std::string(data + byteIndex, size - byteIndex);
|
|
290
283
|
}
|
|
291
284
|
|
|
292
285
|
if (re2->global)
|
|
@@ -296,15 +289,19 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
|
|
|
296
289
|
else if (re2->sticky)
|
|
297
290
|
{
|
|
298
291
|
if (noMatch)
|
|
299
|
-
{
|
|
300
292
|
re2->lastIndex = 0;
|
|
301
|
-
}
|
|
302
293
|
}
|
|
303
294
|
|
|
304
295
|
return Nan::Just(result);
|
|
305
296
|
}
|
|
306
297
|
|
|
307
|
-
inline Nan::Maybe<std::string> replace(
|
|
298
|
+
inline Nan::Maybe<std::string> replace(
|
|
299
|
+
const Nan::Callback *replacer,
|
|
300
|
+
const std::vector<re2::StringPiece> &groups,
|
|
301
|
+
const re2::StringPiece &str,
|
|
302
|
+
const v8::Local<v8::Value> &input,
|
|
303
|
+
bool useBuffers,
|
|
304
|
+
const std::map<std::string, int> &namedGroups)
|
|
308
305
|
{
|
|
309
306
|
std::vector<v8::Local<v8::Value>> argv;
|
|
310
307
|
|
|
@@ -373,11 +370,16 @@ inline Nan::Maybe<std::string> replace(const Nan::Callback *replacer, const std:
|
|
|
373
370
|
return Nan::Just(std::string(node::Buffer::Data(result), node::Buffer::Length(result)));
|
|
374
371
|
}
|
|
375
372
|
|
|
376
|
-
|
|
373
|
+
StrValString val(result);
|
|
377
374
|
return Nan::Just(std::string(val.data, val.size));
|
|
378
375
|
}
|
|
379
376
|
|
|
380
|
-
static Nan::Maybe<std::string> replace(
|
|
377
|
+
static Nan::Maybe<std::string> replace(
|
|
378
|
+
WrappedRE2 *re2,
|
|
379
|
+
const StrValBase &replacee,
|
|
380
|
+
const Nan::Callback *replacer,
|
|
381
|
+
const v8::Local<v8::Value> &input,
|
|
382
|
+
bool useBuffers)
|
|
381
383
|
{
|
|
382
384
|
const re2::StringPiece str = replacee;
|
|
383
385
|
const char *data = str.data();
|
|
@@ -386,54 +388,36 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
|
|
|
386
388
|
std::vector<re2::StringPiece> groups(re2->regexp.NumberOfCapturingGroups() + 1);
|
|
387
389
|
const auto &match = groups[0];
|
|
388
390
|
|
|
389
|
-
size_t
|
|
391
|
+
size_t byteIndex = 0;
|
|
390
392
|
std::string result;
|
|
391
393
|
auto anchor = re2::RE2::UNANCHORED;
|
|
392
394
|
|
|
393
395
|
if (re2->sticky)
|
|
394
396
|
{
|
|
395
397
|
if (!re2->global)
|
|
396
|
-
|
|
397
|
-
if (replacee.isBuffer)
|
|
398
|
-
{
|
|
399
|
-
lastIndex = re2->lastIndex;
|
|
400
|
-
}
|
|
401
|
-
else
|
|
402
|
-
{
|
|
403
|
-
for (size_t n = re2->lastIndex; n; --n)
|
|
404
|
-
{
|
|
405
|
-
size_t s = getUtf8CharSize(data[lastIndex]);
|
|
406
|
-
lastIndex += s;
|
|
407
|
-
if (s == 4 && n >= 2)
|
|
408
|
-
{
|
|
409
|
-
--n; // this utf8 character will take two utf16 characters
|
|
410
|
-
}
|
|
411
|
-
// the decrement above is protected to avoid an overflow of an unsigned integer
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
}
|
|
398
|
+
byteIndex = replacee.byteIndex;
|
|
415
399
|
anchor = RE2::ANCHOR_START;
|
|
416
400
|
}
|
|
417
401
|
|
|
418
|
-
if (
|
|
402
|
+
if (byteIndex)
|
|
419
403
|
{
|
|
420
|
-
result = std::string(data,
|
|
404
|
+
result = std::string(data, byteIndex);
|
|
421
405
|
}
|
|
422
406
|
|
|
423
407
|
const auto &namedGroups = re2->regexp.NamedCapturingGroups();
|
|
424
408
|
|
|
425
409
|
bool noMatch = true;
|
|
426
|
-
while (
|
|
410
|
+
while (byteIndex <= size && re2->regexp.Match(str, byteIndex, size, anchor, &groups[0], groups.size()))
|
|
427
411
|
{
|
|
428
412
|
noMatch = false;
|
|
429
413
|
auto offset = match.data() - data;
|
|
430
414
|
if (!re2->global && re2->sticky)
|
|
431
415
|
{
|
|
432
|
-
re2->lastIndex += replacee.isBuffer ? offset + match.size() -
|
|
416
|
+
re2->lastIndex += replacee.isBuffer ? offset + match.size() - byteIndex : getUtf16Length(data + byteIndex, match.data() + match.size());
|
|
433
417
|
}
|
|
434
|
-
if (match.data() == data || offset > static_cast<long>(
|
|
418
|
+
if (match.data() == data || offset > static_cast<long>(byteIndex))
|
|
435
419
|
{
|
|
436
|
-
result += std::string(data +
|
|
420
|
+
result += std::string(data + byteIndex, offset - byteIndex);
|
|
437
421
|
}
|
|
438
422
|
const auto part = replace(replacer, groups, str, input, useBuffers, namedGroups);
|
|
439
423
|
if (part.IsNothing())
|
|
@@ -443,17 +427,17 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
|
|
|
443
427
|
result += part.FromJust();
|
|
444
428
|
if (match.size())
|
|
445
429
|
{
|
|
446
|
-
|
|
430
|
+
byteIndex = offset + match.size();
|
|
447
431
|
}
|
|
448
432
|
else if ((size_t)offset < size)
|
|
449
433
|
{
|
|
450
434
|
auto sym_size = getUtf8CharSize(data[offset]);
|
|
451
435
|
result.append(data + offset, sym_size);
|
|
452
|
-
|
|
436
|
+
byteIndex = offset + sym_size;
|
|
453
437
|
}
|
|
454
438
|
else
|
|
455
439
|
{
|
|
456
|
-
|
|
440
|
+
byteIndex = size;
|
|
457
441
|
break;
|
|
458
442
|
}
|
|
459
443
|
if (!re2->global)
|
|
@@ -461,9 +445,9 @@ static Nan::Maybe<std::string> replace(WrappedRE2 *re2, const StrVal &replacee,
|
|
|
461
445
|
break;
|
|
462
446
|
}
|
|
463
447
|
}
|
|
464
|
-
if (
|
|
448
|
+
if (byteIndex < size)
|
|
465
449
|
{
|
|
466
|
-
result += std::string(data +
|
|
450
|
+
result += std::string(data + byteIndex, size - byteIndex);
|
|
467
451
|
}
|
|
468
452
|
|
|
469
453
|
if (re2->global)
|
|
@@ -508,8 +492,11 @@ NAN_METHOD(WrappedRE2::Replace)
|
|
|
508
492
|
return;
|
|
509
493
|
}
|
|
510
494
|
|
|
511
|
-
|
|
512
|
-
|
|
495
|
+
re2->prepareLastString(info[0]);
|
|
496
|
+
StrValBase &replacee = *re2->lastStringValue;
|
|
497
|
+
if (replacee.isBad) return; // throws an exception
|
|
498
|
+
|
|
499
|
+
if (!replacee.isIndexValid)
|
|
513
500
|
{
|
|
514
501
|
info.GetReturnValue().Set(info[0]);
|
|
515
502
|
return;
|
|
@@ -531,13 +518,15 @@ NAN_METHOD(WrappedRE2::Replace)
|
|
|
531
518
|
}
|
|
532
519
|
else
|
|
533
520
|
{
|
|
534
|
-
|
|
535
|
-
if (
|
|
521
|
+
StrValBase *replacer = StrValBase::New(info[1]);
|
|
522
|
+
if (replacer->isBad) return; // throws an exception
|
|
523
|
+
|
|
524
|
+
if (!replacer->data)
|
|
536
525
|
{
|
|
537
526
|
info.GetReturnValue().Set(info[0]);
|
|
538
527
|
return;
|
|
539
528
|
}
|
|
540
|
-
const auto replaced = replace(re2, replacee, replacer
|
|
529
|
+
const auto replaced = replace(re2, replacee, replacer->data, replacer->size);
|
|
541
530
|
if (replaced.IsNothing())
|
|
542
531
|
{
|
|
543
532
|
info.GetReturnValue().Set(info[0]);
|
package/lib/search.cc
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#include "./wrapped_re2.h"
|
|
2
|
-
#include "./
|
|
2
|
+
#include "./str-val.h"
|
|
3
3
|
|
|
4
4
|
NAN_METHOD(WrappedRE2::Search)
|
|
5
5
|
{
|
|
@@ -13,19 +13,20 @@ NAN_METHOD(WrappedRE2::Search)
|
|
|
13
13
|
return;
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
re2->prepareLastString(info[0], true);
|
|
17
|
+
StrValBase &str = *re2->lastStringValue;
|
|
18
|
+
if (str.isBad) return; // throws an exception
|
|
19
|
+
|
|
20
|
+
if (!str.data)
|
|
19
21
|
return;
|
|
20
|
-
}
|
|
21
22
|
|
|
22
23
|
// actual work
|
|
23
24
|
|
|
24
25
|
re2::StringPiece match;
|
|
25
26
|
|
|
26
|
-
if (re2->regexp.Match(
|
|
27
|
+
if (re2->regexp.Match(str, 0, str.size, re2->sticky ? re2::RE2::ANCHOR_START : re2::RE2::UNANCHORED, &match, 1))
|
|
27
28
|
{
|
|
28
|
-
info.GetReturnValue().Set(static_cast<int>(
|
|
29
|
+
info.GetReturnValue().Set(static_cast<int>(str.isBuffer ? match.data() - str.data : getUtf16Length(str.data, match.data())));
|
|
29
30
|
return;
|
|
30
31
|
}
|
|
31
32
|
|
package/lib/split.cc
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#include "./wrapped_re2.h"
|
|
2
|
-
#include "./
|
|
2
|
+
#include "./str-val.h"
|
|
3
3
|
|
|
4
4
|
#include <algorithm>
|
|
5
5
|
#include <limits>
|
|
@@ -20,13 +20,9 @@ NAN_METHOD(WrappedRE2::Split)
|
|
|
20
20
|
return;
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
return;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
re2::StringPiece str = a;
|
|
23
|
+
re2->prepareLastString(info[0], true);
|
|
24
|
+
StrValBase &str = *re2->lastStringValue;
|
|
25
|
+
if (str.isBad) return; // throws an exception
|
|
30
26
|
|
|
31
27
|
size_t limit = std::numeric_limits<size_t>::max();
|
|
32
28
|
if (info.Length() > 1 && info[1]->IsNumber())
|
|
@@ -42,30 +38,30 @@ NAN_METHOD(WrappedRE2::Split)
|
|
|
42
38
|
|
|
43
39
|
std::vector<re2::StringPiece> groups(re2->regexp.NumberOfCapturingGroups() + 1), pieces;
|
|
44
40
|
const auto &match = groups[0];
|
|
45
|
-
size_t
|
|
41
|
+
size_t byteIndex = 0;
|
|
46
42
|
|
|
47
|
-
while (
|
|
43
|
+
while (byteIndex < str.size && re2->regexp.Match(str, byteIndex, str.size, RE2::UNANCHORED, &groups[0], groups.size()))
|
|
48
44
|
{
|
|
49
45
|
if (match.size())
|
|
50
46
|
{
|
|
51
|
-
pieces.push_back(re2::StringPiece(
|
|
52
|
-
|
|
47
|
+
pieces.push_back(re2::StringPiece(str.data + byteIndex, match.data() - str.data - byteIndex));
|
|
48
|
+
byteIndex = match.data() - str.data + match.size();
|
|
53
49
|
pieces.insert(pieces.end(), groups.begin() + 1, groups.end());
|
|
54
50
|
}
|
|
55
51
|
else
|
|
56
52
|
{
|
|
57
|
-
size_t sym_size = getUtf8CharSize(
|
|
58
|
-
pieces.push_back(re2::StringPiece(
|
|
59
|
-
|
|
53
|
+
size_t sym_size = getUtf8CharSize(str.data[byteIndex]);
|
|
54
|
+
pieces.push_back(re2::StringPiece(str.data + byteIndex, sym_size));
|
|
55
|
+
byteIndex += sym_size;
|
|
60
56
|
}
|
|
61
57
|
if (pieces.size() >= limit)
|
|
62
58
|
{
|
|
63
59
|
break;
|
|
64
60
|
}
|
|
65
61
|
}
|
|
66
|
-
if (pieces.size() < limit && (
|
|
62
|
+
if (pieces.size() < limit && (byteIndex < str.size || (byteIndex == str.size && match.size())))
|
|
67
63
|
{
|
|
68
|
-
pieces.push_back(re2::StringPiece(
|
|
64
|
+
pieces.push_back(re2::StringPiece(str.data + byteIndex, str.size - byteIndex));
|
|
69
65
|
}
|
|
70
66
|
|
|
71
67
|
if (pieces.empty())
|
|
@@ -77,7 +73,7 @@ NAN_METHOD(WrappedRE2::Split)
|
|
|
77
73
|
|
|
78
74
|
// form a result
|
|
79
75
|
|
|
80
|
-
if (
|
|
76
|
+
if (str.isBuffer)
|
|
81
77
|
{
|
|
82
78
|
for (size_t i = 0, n = std::min(pieces.size(), limit); i < n; ++i)
|
|
83
79
|
{
|
package/lib/str-val.cc
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
#include "./str-val.h"
|
|
2
|
+
|
|
3
|
+
StrValBuffer::StrValBuffer(const v8::Local<v8::Value> &arg, size_t newIndex) : StrValBase()
|
|
4
|
+
{
|
|
5
|
+
if (!node::Buffer::HasInstance(arg))
|
|
6
|
+
return;
|
|
7
|
+
|
|
8
|
+
isBuffer = true;
|
|
9
|
+
size = length = node::Buffer::Length(arg);
|
|
10
|
+
data = node::Buffer::Data(arg);
|
|
11
|
+
|
|
12
|
+
byteIndex = index = newIndex;
|
|
13
|
+
isIndexValid = byteIndex < size;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
inline size_t getUtf8CharSize(char ch)
|
|
17
|
+
{
|
|
18
|
+
return ((0xE5000000 >> ((ch >> 3) & 0x1E)) & 3) + 1;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
inline size_t countBytes(const char *data, size_t from, size_t n)
|
|
22
|
+
{
|
|
23
|
+
for (; n > 0; --n)
|
|
24
|
+
{
|
|
25
|
+
size_t s = getUtf8CharSize(data[from]);
|
|
26
|
+
from += s;
|
|
27
|
+
if (s == 4 && n >= 2)
|
|
28
|
+
--n; // this utf8 character will take two utf16 characters
|
|
29
|
+
// the decrement above is protected to avoid an overflow of an unsigned integer
|
|
30
|
+
}
|
|
31
|
+
return from;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
StrValString::StrValString(const v8::Local<v8::Value> &arg, size_t newIndex) : StrValBase()
|
|
35
|
+
{
|
|
36
|
+
if (node::Buffer::HasInstance(arg))
|
|
37
|
+
return;
|
|
38
|
+
|
|
39
|
+
auto t = arg->ToString(Nan::GetCurrentContext());
|
|
40
|
+
if (t.IsEmpty())
|
|
41
|
+
{
|
|
42
|
+
isBad = true;
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
auto s = t.ToLocalChecked();
|
|
47
|
+
length = Nan::DecodeBytes(s);
|
|
48
|
+
size = Nan::DecodeBytes(s, Nan::UTF8);
|
|
49
|
+
buffer.resize(size + 1);
|
|
50
|
+
data = &buffer[0];
|
|
51
|
+
Nan::DecodeWrite(data, size, s, Nan::UTF8);
|
|
52
|
+
buffer[size] = '\0';
|
|
53
|
+
|
|
54
|
+
index = newIndex;
|
|
55
|
+
isIndexValid = index <= length;
|
|
56
|
+
|
|
57
|
+
if (!isIndexValid || !index)
|
|
58
|
+
return;
|
|
59
|
+
|
|
60
|
+
if (index == length)
|
|
61
|
+
{
|
|
62
|
+
byteIndex = size;
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
byteIndex = countBytes(data, 0, index);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
void StrValBase::setIndex(size_t newIndex)
|
|
70
|
+
{
|
|
71
|
+
isIndexValid = newIndex <= length;
|
|
72
|
+
if (!isIndexValid)
|
|
73
|
+
{
|
|
74
|
+
index = newIndex;
|
|
75
|
+
byteIndex = 0;
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (newIndex == index)
|
|
80
|
+
return;
|
|
81
|
+
|
|
82
|
+
if (isBuffer)
|
|
83
|
+
{
|
|
84
|
+
byteIndex = index = newIndex;
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// String
|
|
89
|
+
|
|
90
|
+
if (!newIndex)
|
|
91
|
+
{
|
|
92
|
+
byteIndex = index = 0;
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (newIndex == length)
|
|
97
|
+
{
|
|
98
|
+
byteIndex = size;
|
|
99
|
+
index = length;
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
byteIndex = index < newIndex ? countBytes(data, byteIndex, newIndex - index) : countBytes(data, 0, newIndex);
|
|
104
|
+
index = newIndex;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
StrValBase *StrValBase::New(const v8::Local<v8::Value> &arg, size_t newIndex)
|
|
108
|
+
{
|
|
109
|
+
if (node::Buffer::HasInstance(arg))
|
|
110
|
+
return new StrValBuffer(arg, newIndex);
|
|
111
|
+
return new StrValString(arg, newIndex);
|
|
112
|
+
}
|
package/lib/str-val.h
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <vector>
|
|
3
|
+
#include <nan.h>
|
|
4
|
+
#include <re2/re2.h>
|
|
5
|
+
|
|
6
|
+
struct StrValBase
|
|
7
|
+
{
|
|
8
|
+
char *data;
|
|
9
|
+
size_t size, length;
|
|
10
|
+
size_t index, byteIndex;
|
|
11
|
+
bool isBuffer, isIndexValid, isBad;
|
|
12
|
+
|
|
13
|
+
StrValBase() : data(NULL), size(0), length(0), index(0), byteIndex(0), isBuffer(false), isIndexValid(false), isBad(false) {}
|
|
14
|
+
|
|
15
|
+
operator re2::StringPiece() const { return re2::StringPiece(data, size); }
|
|
16
|
+
|
|
17
|
+
void setIndex(size_t newIndex = 0);
|
|
18
|
+
|
|
19
|
+
static StrValBase *New(const v8::Local<v8::Value> &arg, size_t newIndex = 0);
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
struct StrValBuffer : public StrValBase
|
|
23
|
+
{
|
|
24
|
+
StrValBuffer(const v8::Local<v8::Value> &arg, size_t newIndex = 0);
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
struct StrValString : public StrValBase
|
|
28
|
+
{
|
|
29
|
+
StrValString(const v8::Local<v8::Value> &arg, size_t newIndex = 0);
|
|
30
|
+
|
|
31
|
+
std::vector<char> buffer;
|
|
32
|
+
};
|
package/lib/test.cc
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
#include "./wrapped_re2.h"
|
|
2
|
-
#include "./
|
|
2
|
+
#include "./str-val.h"
|
|
3
3
|
|
|
4
4
|
#include <vector>
|
|
5
5
|
|
|
6
|
-
#include <node_buffer.h>
|
|
7
|
-
|
|
8
6
|
NAN_METHOD(WrappedRE2::Test)
|
|
9
7
|
{
|
|
10
8
|
|
|
@@ -17,62 +15,33 @@ NAN_METHOD(WrappedRE2::Test)
|
|
|
17
15
|
return;
|
|
18
16
|
}
|
|
19
17
|
|
|
20
|
-
|
|
21
|
-
|
|
18
|
+
re2->prepareLastString(info[0]);
|
|
19
|
+
StrValBase &str = *re2->lastStringValue;
|
|
20
|
+
if (str.isBad) return; // throws an exception
|
|
21
|
+
|
|
22
|
+
if (!re2->global && !re2->sticky)
|
|
22
23
|
{
|
|
24
|
+
info.GetReturnValue().Set(re2->regexp.Match(str, 0, str.size, re2::RE2::UNANCHORED, NULL, 0));
|
|
23
25
|
return;
|
|
24
26
|
}
|
|
25
27
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
if (str.isBuffer)
|
|
29
|
-
{
|
|
30
|
-
if ((re2->global || re2->sticky) && re2->lastIndex)
|
|
31
|
-
{
|
|
32
|
-
if (re2->lastIndex > str.size)
|
|
33
|
-
{
|
|
34
|
-
re2->lastIndex = 0;
|
|
35
|
-
info.GetReturnValue().Set(false);
|
|
36
|
-
return;
|
|
37
|
-
}
|
|
38
|
-
lastIndex = re2->lastIndex;
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
else
|
|
28
|
+
if (!str.isIndexValid)
|
|
42
29
|
{
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
{
|
|
47
|
-
re2->lastIndex = 0;
|
|
48
|
-
info.GetReturnValue().Set(false);
|
|
49
|
-
return;
|
|
50
|
-
}
|
|
51
|
-
for (size_t n = re2->lastIndex; n; --n)
|
|
52
|
-
{
|
|
53
|
-
size_t s = getUtf8CharSize(str.data[lastIndex]);
|
|
54
|
-
lastIndex += s;
|
|
55
|
-
if (s == 4 && n >= 2) --n; // this utf8 character will take two utf16 characters
|
|
56
|
-
// the decrement above is protected to avoid an overflow of an unsigned integer
|
|
57
|
-
}
|
|
58
|
-
}
|
|
30
|
+
re2->lastIndex = 0;
|
|
31
|
+
info.GetReturnValue().SetNull();
|
|
32
|
+
return;
|
|
59
33
|
}
|
|
60
34
|
|
|
61
35
|
// actual work
|
|
62
36
|
|
|
63
|
-
|
|
37
|
+
re2::StringPiece match;
|
|
38
|
+
if (re2->regexp.Match(str, str.byteIndex, str.size, re2->sticky ? re2::RE2::ANCHOR_START : re2::RE2::UNANCHORED, &match, 1))
|
|
64
39
|
{
|
|
65
|
-
re2
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
re2->lastIndex += str.isBuffer ? match.data() - str.data + match.size() - lastIndex : getUtf16Length(str.data + lastIndex, match.data() + match.size());
|
|
69
|
-
info.GetReturnValue().Set(true);
|
|
70
|
-
return;
|
|
71
|
-
}
|
|
72
|
-
re2->lastIndex = 0;
|
|
73
|
-
info.GetReturnValue().Set(false);
|
|
40
|
+
re2->lastIndex +=
|
|
41
|
+
str.isBuffer ? match.data() - str.data + match.size() - str.byteIndex : getUtf16Length(str.data + str.byteIndex, match.data() + match.size());
|
|
42
|
+
info.GetReturnValue().Set(true);
|
|
74
43
|
return;
|
|
75
44
|
}
|
|
76
|
-
|
|
77
|
-
info.GetReturnValue().Set(
|
|
45
|
+
re2->lastIndex = 0;
|
|
46
|
+
info.GetReturnValue().Set(false);
|
|
78
47
|
}
|
package/lib/util.cc
CHANGED
|
@@ -1,34 +1,5 @@
|
|
|
1
1
|
#include "./util.h"
|
|
2
2
|
|
|
3
|
-
#include <sys/types.h>
|
|
4
|
-
#include <string>
|
|
5
|
-
|
|
6
|
-
#include <node_buffer.h>
|
|
7
|
-
|
|
8
|
-
StrVal::StrVal(const v8::Local<v8::Value> &arg) : data(NULL), size(0), isBuffer(false)
|
|
9
|
-
{
|
|
10
|
-
if (node::Buffer::HasInstance(arg))
|
|
11
|
-
{
|
|
12
|
-
isBuffer = true;
|
|
13
|
-
size = length = node::Buffer::Length(arg);
|
|
14
|
-
data = node::Buffer::Data(arg);
|
|
15
|
-
}
|
|
16
|
-
else
|
|
17
|
-
{
|
|
18
|
-
auto t = arg->ToString(Nan::GetCurrentContext());
|
|
19
|
-
if (!t.IsEmpty())
|
|
20
|
-
{
|
|
21
|
-
auto s = t.ToLocalChecked();
|
|
22
|
-
length = Nan::DecodeBytes(s);
|
|
23
|
-
size = Nan::DecodeBytes(s, Nan::UTF8);
|
|
24
|
-
buffer.resize(size + 1);
|
|
25
|
-
data = &buffer[0];
|
|
26
|
-
Nan::DecodeWrite(data, size, s, Nan::UTF8);
|
|
27
|
-
buffer[size] = '\0';
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
|
|
32
3
|
void consoleCall(const v8::Local<v8::String> &methodName, v8::Local<v8::Value> text)
|
|
33
4
|
{
|
|
34
5
|
auto context = Nan::GetCurrentContext();
|
package/lib/util.h
CHANGED
|
@@ -3,21 +3,6 @@
|
|
|
3
3
|
|
|
4
4
|
#include "./wrapped_re2.h"
|
|
5
5
|
|
|
6
|
-
#include <vector>
|
|
7
|
-
|
|
8
|
-
struct StrVal
|
|
9
|
-
{
|
|
10
|
-
std::vector<char> buffer;
|
|
11
|
-
char *data;
|
|
12
|
-
size_t size, length;
|
|
13
|
-
bool isBuffer;
|
|
14
|
-
|
|
15
|
-
StrVal() : data(NULL), size(0), length(0), isBuffer(false) {}
|
|
16
|
-
StrVal(const v8::Local<v8::Value> &arg);
|
|
17
|
-
|
|
18
|
-
operator re2::StringPiece() const { return re2::StringPiece(data, size); }
|
|
19
|
-
};
|
|
20
|
-
|
|
21
6
|
template <typename R, typename P, typename L>
|
|
22
7
|
inline v8::MaybeLocal<R> bind(v8::MaybeLocal<P> param, L lambda)
|
|
23
8
|
{
|
package/lib/wrapped_re2.h
CHANGED
|
@@ -7,12 +7,30 @@
|
|
|
7
7
|
|
|
8
8
|
#include <string>
|
|
9
9
|
|
|
10
|
+
struct StrValBase;
|
|
11
|
+
|
|
10
12
|
class WrappedRE2 : public Nan::ObjectWrap
|
|
11
13
|
{
|
|
12
14
|
private:
|
|
13
|
-
WrappedRE2(
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
WrappedRE2(
|
|
16
|
+
const re2::StringPiece &pattern,
|
|
17
|
+
const re2::RE2::Options &options,
|
|
18
|
+
const std::string &src,
|
|
19
|
+
const bool &g,
|
|
20
|
+
const bool &i,
|
|
21
|
+
const bool &m,
|
|
22
|
+
const bool &s,
|
|
23
|
+
const bool &y,
|
|
24
|
+
const bool &d) : regexp(pattern, options),
|
|
25
|
+
source(src),
|
|
26
|
+
global(g),
|
|
27
|
+
ignoreCase(i),
|
|
28
|
+
multiline(m),
|
|
29
|
+
dotAll(s),
|
|
30
|
+
sticky(y),
|
|
31
|
+
hasIndices(d),
|
|
32
|
+
lastIndex(0),
|
|
33
|
+
lastStringValue(nullptr) {}
|
|
16
34
|
|
|
17
35
|
static NAN_METHOD(New);
|
|
18
36
|
static NAN_METHOD(ToString);
|
|
@@ -45,6 +63,11 @@ private:
|
|
|
45
63
|
static NAN_SETTER(SetUnicodeWarningLevel);
|
|
46
64
|
|
|
47
65
|
public:
|
|
66
|
+
~WrappedRE2()
|
|
67
|
+
{
|
|
68
|
+
dropLastString();
|
|
69
|
+
}
|
|
70
|
+
|
|
48
71
|
static v8::Local<v8::Function> Init();
|
|
49
72
|
|
|
50
73
|
static inline bool HasInstance(v8::Local<v8::Object> object)
|
|
@@ -73,6 +96,13 @@ public:
|
|
|
73
96
|
bool sticky;
|
|
74
97
|
bool hasIndices;
|
|
75
98
|
size_t lastIndex;
|
|
99
|
+
|
|
100
|
+
private:
|
|
101
|
+
Nan::Persistent<v8::Value> lastString; // weak pointer
|
|
102
|
+
StrValBase *lastStringValue;
|
|
103
|
+
|
|
104
|
+
void dropLastString();
|
|
105
|
+
void prepareLastString(const v8::Local<v8::Value> &arg, bool ignoreLastIndex = false);
|
|
76
106
|
};
|
|
77
107
|
|
|
78
108
|
// utilities
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "re2",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.21.0",
|
|
4
4
|
"description": "Bindings for RE2: fast, safe alternative to backtracking regular expression engines.",
|
|
5
5
|
"homepage": "https://github.com/uhop/node-re2",
|
|
6
6
|
"bugs": "https://github.com/uhop/node-re2/issues",
|