re2 1.21.5 → 1.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +10 -1
- package/lib/new.cc +72 -0
- package/package.json +2 -2
package/LICENSE
CHANGED
|
@@ -7,7 +7,7 @@ The text of the BSD license is reproduced below.
|
|
|
7
7
|
The "New" BSD License:
|
|
8
8
|
**********************
|
|
9
9
|
|
|
10
|
-
Copyright (c) 2005-
|
|
10
|
+
Copyright (c) 2005-2025, Eugene Lazutkin
|
|
11
11
|
All rights reserved.
|
|
12
12
|
|
|
13
13
|
Redistribution and use in source and binary forms, with or without
|
package/README.md
CHANGED
|
@@ -351,9 +351,18 @@ console.log('re2_res : ' + re2_res); // prints: re2_res : abc,a,b,c
|
|
|
351
351
|
|
|
352
352
|
`RE2` always works in the Unicode mode. See `RE2.unicodeWarningLevel` above for more details on how to control warnings about this feature.
|
|
353
353
|
|
|
354
|
+
#### Unicode classes
|
|
355
|
+
|
|
356
|
+
`RE2` supports a subset of Unicode classes as defined in [RE2 Syntax](https://github.com/google/re2/wiki/Syntax). Native Google RE2 supports only short names, e.g., `L` for `Letter`, `N` for `Number`, etc. Like `RegExp`, `RE2` supports both short and long names, e.g., `Letter` for `L`, by translating them to short names.
|
|
357
|
+
|
|
358
|
+
The extended form `\p{name=value}` is not supported. Only form `\p{name}` is supported.
|
|
359
|
+
|
|
360
|
+
The same applies to `\P{name}`.
|
|
361
|
+
|
|
354
362
|
## Release history
|
|
355
363
|
|
|
356
|
-
- 1.
|
|
364
|
+
- 1.22.0 *Added support for translation of Unicode classes (thx, [John Livingston](https://github.com/JohnXLivingston)). Added [attestations](https://github.com/uhop/node-re2/attestations).*
|
|
365
|
+
- 1.21.5 *Updated all dependencies and the list of pre-compiled targets. Fixed minor bugs. C++ style fix (thx, [Benjamin Brienen](https://github.com/BenjaminBrienen)). Added Windows 11 ARM build runner (thx, [Kagami Sascha Rosylight](https://github.com/saschanaz)).*
|
|
357
366
|
- 1.21.4 *Fixed a regression reported by [caroline-matsec](https://github.com/caroline-matsec), thx! Added pre-compilation targets for Alpine Linux on ARM. Updated deps.*
|
|
358
367
|
- 1.21.3 *Fixed an empty string regression reported by [Rhys Arkins](https://github.com/rarkins), thx! Updated deps.*
|
|
359
368
|
- 1.21.2 *Fixed another memory regression reported by [matthewvalentine](https://github.com/matthewvalentine), thx! Updated deps. Added more tests and benchmarks.*
|
package/lib/new.cc
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#include "./wrapped_re2.h"
|
|
2
2
|
#include "./util.h"
|
|
3
3
|
|
|
4
|
+
#include <map>
|
|
4
5
|
#include <memory>
|
|
5
6
|
#include <string>
|
|
6
7
|
#include <unordered_set>
|
|
@@ -18,6 +19,47 @@ inline bool isHexadecimal(char ch)
|
|
|
18
19
|
return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z');
|
|
19
20
|
}
|
|
20
21
|
|
|
22
|
+
static std::map<std::string, std::string> unicodeClasses = {
|
|
23
|
+
{"Uppercase_Letter", "Lu"},
|
|
24
|
+
{"Lowercase_Letter", "Ll"},
|
|
25
|
+
{"Titlecase_Letter", "Lt"},
|
|
26
|
+
{"Cased_Letter", "LC"},
|
|
27
|
+
{"Modifier_Letter", "Lm"},
|
|
28
|
+
{"Other_Letter", "Lo"},
|
|
29
|
+
{"Letter", "L"},
|
|
30
|
+
{"Nonspacing_Mark", "Mn"},
|
|
31
|
+
{"Spacing_Mark", "Mc"},
|
|
32
|
+
{"Enclosing_Mark", "Me"},
|
|
33
|
+
{"Mark", "M"},
|
|
34
|
+
{"Decimal_Number", "Nd"},
|
|
35
|
+
{"Letter_Number", "Nl"},
|
|
36
|
+
{"Other_Number", "No"},
|
|
37
|
+
{"Number", "N"},
|
|
38
|
+
{"Connector_Punctuation", "Pc"},
|
|
39
|
+
{"Dash_Punctuation", "Pd"},
|
|
40
|
+
{"Open_Punctuation", "Ps"},
|
|
41
|
+
{"Close_Punctuation", "Pe"},
|
|
42
|
+
{"Initial_Punctuation", "Pi"},
|
|
43
|
+
{"Final_Punctuation", "Pf"},
|
|
44
|
+
{"Other_Punctuation", "Po"},
|
|
45
|
+
{"Punctuation", "P"},
|
|
46
|
+
{"Math_Symbol", "Sm"},
|
|
47
|
+
{"Currency_Symbol", "Sc"},
|
|
48
|
+
{"Modifier_Symbol", "Sk"},
|
|
49
|
+
{"Other_Symbol", "So"},
|
|
50
|
+
{"Symbol", "S"},
|
|
51
|
+
{"Space_Separator", "Zs"},
|
|
52
|
+
{"Line_Separator", "Zl"},
|
|
53
|
+
{"Paragraph_Separator", "Zp"},
|
|
54
|
+
{"Separator", "Z"},
|
|
55
|
+
{"Control", "Cc"},
|
|
56
|
+
{"Format", "Cf"},
|
|
57
|
+
{"Surrogate", "Cs"},
|
|
58
|
+
{"Private_Use", "Co"},
|
|
59
|
+
{"Unassigned", "Cn"},
|
|
60
|
+
{"Other", "C"},
|
|
61
|
+
};
|
|
62
|
+
|
|
21
63
|
static bool translateRegExp(const char *data, size_t size, bool multiline, std::vector<char> &buffer)
|
|
22
64
|
{
|
|
23
65
|
std::string result;
|
|
@@ -98,6 +140,36 @@ static bool translateRegExp(const char *data, size_t size, bool multiline, std::
|
|
|
98
140
|
result += "\\u";
|
|
99
141
|
i += 2;
|
|
100
142
|
continue;
|
|
143
|
+
case 'p':
|
|
144
|
+
case 'P':
|
|
145
|
+
if (i + 2 < size) {
|
|
146
|
+
if (data[i + 2] == '{') {
|
|
147
|
+
size_t j = i + 3;
|
|
148
|
+
while (j < size && data[j] != '}') ++j;
|
|
149
|
+
if (j < size) {
|
|
150
|
+
result += "\\";
|
|
151
|
+
result += data[i + 1];
|
|
152
|
+
std::string name(data + i + 3, j - i - 3);
|
|
153
|
+
if (unicodeClasses.find(name) != unicodeClasses.end()) {
|
|
154
|
+
name = unicodeClasses[name];
|
|
155
|
+
}
|
|
156
|
+
if (name.size() == 1) {
|
|
157
|
+
result += name;
|
|
158
|
+
} else {
|
|
159
|
+
result += "{";
|
|
160
|
+
result += name;
|
|
161
|
+
result += "}";
|
|
162
|
+
}
|
|
163
|
+
i = j + 1;
|
|
164
|
+
changed = true;
|
|
165
|
+
continue;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
result += "\\";
|
|
170
|
+
result += data[i + 1];
|
|
171
|
+
i += 2;
|
|
172
|
+
continue;
|
|
101
173
|
default:
|
|
102
174
|
result += "\\";
|
|
103
175
|
size_t sym_size = getUtf8CharSize(ch);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "re2",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.22.0",
|
|
4
4
|
"description": "Bindings for RE2: fast, safe alternative to backtracking regular expression engines.",
|
|
5
5
|
"homepage": "https://github.com/uhop/node-re2",
|
|
6
6
|
"bugs": "https://github.com/uhop/node-re2/issues",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"vendor"
|
|
16
16
|
],
|
|
17
17
|
"dependencies": {
|
|
18
|
-
"install-artifact-from-github": "^1.
|
|
18
|
+
"install-artifact-from-github": "^1.4.0",
|
|
19
19
|
"nan": "^2.22.2",
|
|
20
20
|
"node-gyp": "^11.2.0"
|
|
21
21
|
},
|