re2 1.21.5 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -7,7 +7,7 @@ The text of the BSD license is reproduced below.
7
7
  The "New" BSD License:
8
8
  **********************
9
9
 
10
- Copyright (c) 2005-2024, Eugene Lazutkin
10
+ Copyright (c) 2005-2025, Eugene Lazutkin
11
11
  All rights reserved.
12
12
 
13
13
  Redistribution and use in source and binary forms, with or without
package/README.md CHANGED
@@ -351,9 +351,18 @@ console.log('re2_res : ' + re2_res); // prints: re2_res : abc,a,b,c
351
351
 
352
352
  `RE2` always works in the Unicode mode. See `RE2.unicodeWarningLevel` above for more details on how to control warnings about this feature.
353
353
 
354
+ #### Unicode classes
355
+
356
+ `RE2` supports a subset of Unicode classes as defined in [RE2 Syntax](https://github.com/google/re2/wiki/Syntax). Native Google RE2 supports only short names, e.g., `L` for `Letter`, `N` for `Number`, etc. Like `RegExp`, `RE2` supports both short and long names, e.g., `Letter` for `L`, by translating them to short names.
357
+
358
+ The extended form `\p{name=value}` is not supported. Only form `\p{name}` is supported.
359
+
360
+ The same applies to `\P{name}`.
361
+
354
362
  ## Release history
355
363
 
356
- - 1.21.5 *Updated all dependencies and the list of pre-compiled targets. Fixed minor bugs. C++ style fix (thx, [Benjamin Brienen](https://github.com/BenjaminBrienen)). Added Windows 11 ARM build runner [Kagami Sascha Rosylight](https://github.com/saschanaz).*
364
+ - 1.22.0 *Added support for translation of Unicode classes (thx, [John Livingston](https://github.com/JohnXLivingston)). Added [attestations](https://github.com/uhop/node-re2/attestations).*
365
+ - 1.21.5 *Updated all dependencies and the list of pre-compiled targets. Fixed minor bugs. C++ style fix (thx, [Benjamin Brienen](https://github.com/BenjaminBrienen)). Added Windows 11 ARM build runner (thx, [Kagami Sascha Rosylight](https://github.com/saschanaz)).*
357
366
  - 1.21.4 *Fixed a regression reported by [caroline-matsec](https://github.com/caroline-matsec), thx! Added pre-compilation targets for Alpine Linux on ARM. Updated deps.*
358
367
  - 1.21.3 *Fixed an empty string regression reported by [Rhys Arkins](https://github.com/rarkins), thx! Updated deps.*
359
368
  - 1.21.2 *Fixed another memory regression reported by [matthewvalentine](https://github.com/matthewvalentine), thx! Updated deps. Added more tests and benchmarks.*
package/lib/new.cc CHANGED
@@ -1,6 +1,7 @@
1
1
  #include "./wrapped_re2.h"
2
2
  #include "./util.h"
3
3
 
4
+ #include <map>
4
5
  #include <memory>
5
6
  #include <string>
6
7
  #include <unordered_set>
@@ -18,6 +19,47 @@ inline bool isHexadecimal(char ch)
18
19
  return ('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z');
19
20
  }
20
21
 
22
+ static std::map<std::string, std::string> unicodeClasses = {
23
+ {"Uppercase_Letter", "Lu"},
24
+ {"Lowercase_Letter", "Ll"},
25
+ {"Titlecase_Letter", "Lt"},
26
+ {"Cased_Letter", "LC"},
27
+ {"Modifier_Letter", "Lm"},
28
+ {"Other_Letter", "Lo"},
29
+ {"Letter", "L"},
30
+ {"Nonspacing_Mark", "Mn"},
31
+ {"Spacing_Mark", "Mc"},
32
+ {"Enclosing_Mark", "Me"},
33
+ {"Mark", "M"},
34
+ {"Decimal_Number", "Nd"},
35
+ {"Letter_Number", "Nl"},
36
+ {"Other_Number", "No"},
37
+ {"Number", "N"},
38
+ {"Connector_Punctuation", "Pc"},
39
+ {"Dash_Punctuation", "Pd"},
40
+ {"Open_Punctuation", "Ps"},
41
+ {"Close_Punctuation", "Pe"},
42
+ {"Initial_Punctuation", "Pi"},
43
+ {"Final_Punctuation", "Pf"},
44
+ {"Other_Punctuation", "Po"},
45
+ {"Punctuation", "P"},
46
+ {"Math_Symbol", "Sm"},
47
+ {"Currency_Symbol", "Sc"},
48
+ {"Modifier_Symbol", "Sk"},
49
+ {"Other_Symbol", "So"},
50
+ {"Symbol", "S"},
51
+ {"Space_Separator", "Zs"},
52
+ {"Line_Separator", "Zl"},
53
+ {"Paragraph_Separator", "Zp"},
54
+ {"Separator", "Z"},
55
+ {"Control", "Cc"},
56
+ {"Format", "Cf"},
57
+ {"Surrogate", "Cs"},
58
+ {"Private_Use", "Co"},
59
+ {"Unassigned", "Cn"},
60
+ {"Other", "C"},
61
+ };
62
+
21
63
  static bool translateRegExp(const char *data, size_t size, bool multiline, std::vector<char> &buffer)
22
64
  {
23
65
  std::string result;
@@ -98,6 +140,36 @@ static bool translateRegExp(const char *data, size_t size, bool multiline, std::
98
140
  result += "\\u";
99
141
  i += 2;
100
142
  continue;
143
+ case 'p':
144
+ case 'P':
145
+ if (i + 2 < size) {
146
+ if (data[i + 2] == '{') {
147
+ size_t j = i + 3;
148
+ while (j < size && data[j] != '}') ++j;
149
+ if (j < size) {
150
+ result += "\\";
151
+ result += data[i + 1];
152
+ std::string name(data + i + 3, j - i - 3);
153
+ if (unicodeClasses.find(name) != unicodeClasses.end()) {
154
+ name = unicodeClasses[name];
155
+ }
156
+ if (name.size() == 1) {
157
+ result += name;
158
+ } else {
159
+ result += "{";
160
+ result += name;
161
+ result += "}";
162
+ }
163
+ i = j + 1;
164
+ changed = true;
165
+ continue;
166
+ }
167
+ }
168
+ }
169
+ result += "\\";
170
+ result += data[i + 1];
171
+ i += 2;
172
+ continue;
101
173
  default:
102
174
  result += "\\";
103
175
  size_t sym_size = getUtf8CharSize(ch);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "re2",
3
- "version": "1.21.5",
3
+ "version": "1.22.0",
4
4
  "description": "Bindings for RE2: fast, safe alternative to backtracking regular expression engines.",
5
5
  "homepage": "https://github.com/uhop/node-re2",
6
6
  "bugs": "https://github.com/uhop/node-re2/issues",
@@ -15,7 +15,7 @@
15
15
  "vendor"
16
16
  ],
17
17
  "dependencies": {
18
- "install-artifact-from-github": "^1.3.5",
18
+ "install-artifact-from-github": "^1.4.0",
19
19
  "nan": "^2.22.2",
20
20
  "node-gyp": "^11.2.0"
21
21
  },