@bufbuild/re2 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +30 -0
  3. package/dist/cjs/CharClass.d.ts +30 -0
  4. package/dist/cjs/CharClass.js +284 -0
  5. package/dist/cjs/CharGroup.d.ts +8 -0
  6. package/dist/cjs/CharGroup.js +83 -0
  7. package/dist/cjs/Codepoint.d.ts +3 -0
  8. package/dist/cjs/Codepoint.js +62 -0
  9. package/dist/cjs/Compiler.d.ts +40 -0
  10. package/dist/cjs/Compiler.js +262 -0
  11. package/dist/cjs/DFA.d.ts +36 -0
  12. package/dist/cjs/DFA.js +350 -0
  13. package/dist/cjs/Inst.d.ts +26 -0
  14. package/dist/cjs/Inst.js +86 -0
  15. package/dist/cjs/MachineInput.d.ts +17 -0
  16. package/dist/cjs/MachineInput.js +72 -0
  17. package/dist/cjs/Parser.d.ts +111 -0
  18. package/dist/cjs/Parser.js +1538 -0
  19. package/dist/cjs/Prefilter.d.ts +19 -0
  20. package/dist/cjs/Prefilter.js +163 -0
  21. package/dist/cjs/Prog.d.ts +39 -0
  22. package/dist/cjs/Prog.js +154 -0
  23. package/dist/cjs/RE2.d.ts +27 -0
  24. package/dist/cjs/RE2.js +221 -0
  25. package/dist/cjs/RE2Flags.d.ts +16 -0
  26. package/dist/cjs/RE2Flags.js +58 -0
  27. package/dist/cjs/Regexp.d.ts +43 -0
  28. package/dist/cjs/Regexp.js +98 -0
  29. package/dist/cjs/Simplify.d.ts +3 -0
  30. package/dist/cjs/Simplify.js +230 -0
  31. package/dist/cjs/Unicode.d.ts +17 -0
  32. package/dist/cjs/Unicode.js +165 -0
  33. package/dist/cjs/UnicodeRangeTable.d.ts +12 -0
  34. package/dist/cjs/UnicodeRangeTable.js +31 -0
  35. package/dist/cjs/UnicodeTables.d.ts +29 -0
  36. package/dist/cjs/UnicodeTables.js +571 -0
  37. package/dist/cjs/Utils.d.ts +22 -0
  38. package/dist/cjs/Utils.js +119 -0
  39. package/dist/cjs/__fixtures__/find.d.ts +9 -0
  40. package/dist/cjs/__fixtures__/find.js +115 -0
  41. package/dist/cjs/chars.d.ts +2 -0
  42. package/dist/cjs/chars.js +19 -0
  43. package/dist/cjs/exceptions.d.ts +55 -0
  44. package/dist/cjs/exceptions.js +94 -0
  45. package/dist/cjs/index.d.ts +102 -0
  46. package/dist/cjs/index.js +173 -0
  47. package/dist/cjs/package.json +1 -0
  48. package/dist/cjs/testParser.d.ts +3 -0
  49. package/dist/cjs/testParser.js +143 -0
  50. package/dist/esm/CharClass.d.ts +30 -0
  51. package/dist/esm/CharClass.js +281 -0
  52. package/dist/esm/CharGroup.d.ts +8 -0
  53. package/dist/esm/CharGroup.js +78 -0
  54. package/dist/esm/Codepoint.d.ts +3 -0
  55. package/dist/esm/Codepoint.js +59 -0
  56. package/dist/esm/Compiler.d.ts +40 -0
  57. package/dist/esm/Compiler.js +259 -0
  58. package/dist/esm/DFA.d.ts +36 -0
  59. package/dist/esm/DFA.js +347 -0
  60. package/dist/esm/Inst.d.ts +26 -0
  61. package/dist/esm/Inst.js +83 -0
  62. package/dist/esm/MachineInput.d.ts +17 -0
  63. package/dist/esm/MachineInput.js +68 -0
  64. package/dist/esm/Parser.d.ts +111 -0
  65. package/dist/esm/Parser.js +1535 -0
  66. package/dist/esm/Prefilter.d.ts +19 -0
  67. package/dist/esm/Prefilter.js +159 -0
  68. package/dist/esm/Prog.d.ts +39 -0
  69. package/dist/esm/Prog.js +150 -0
  70. package/dist/esm/RE2.d.ts +27 -0
  71. package/dist/esm/RE2.js +218 -0
  72. package/dist/esm/RE2Flags.d.ts +16 -0
  73. package/dist/esm/RE2Flags.js +41 -0
  74. package/dist/esm/Regexp.d.ts +43 -0
  75. package/dist/esm/Regexp.js +94 -0
  76. package/dist/esm/Simplify.d.ts +3 -0
  77. package/dist/esm/Simplify.js +228 -0
  78. package/dist/esm/Unicode.d.ts +17 -0
  79. package/dist/esm/Unicode.js +150 -0
  80. package/dist/esm/UnicodeRangeTable.d.ts +12 -0
  81. package/dist/esm/UnicodeRangeTable.js +28 -0
  82. package/dist/esm/UnicodeTables.d.ts +29 -0
  83. package/dist/esm/UnicodeTables.js +568 -0
  84. package/dist/esm/Utils.d.ts +22 -0
  85. package/dist/esm/Utils.js +103 -0
  86. package/dist/esm/__fixtures__/find.d.ts +9 -0
  87. package/dist/esm/__fixtures__/find.js +112 -0
  88. package/dist/esm/chars.d.ts +2 -0
  89. package/dist/esm/chars.js +14 -0
  90. package/dist/esm/exceptions.d.ts +55 -0
  91. package/dist/esm/exceptions.js +86 -0
  92. package/dist/esm/index.d.ts +102 -0
  93. package/dist/esm/index.js +163 -0
  94. package/dist/esm/testParser.d.ts +3 -0
  95. package/dist/esm/testParser.js +138 -0
  96. package/package.json +49 -0
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Regular expression abstract syntax tree. Produced by parser, used by compiler.
3
+ */
4
+ export declare class Regexp {
5
+ static Op: {
6
+ readonly NO_MATCH: 0;
7
+ readonly EMPTY_MATCH: 1;
8
+ readonly LITERAL: 2;
9
+ readonly CHAR_CLASS: 3;
10
+ readonly ANY_CHAR_NOT_NL: 4;
11
+ readonly ANY_CHAR: 5;
12
+ readonly BEGIN_LINE: 6;
13
+ readonly END_LINE: 7;
14
+ readonly BEGIN_TEXT: 8;
15
+ readonly END_TEXT: 9;
16
+ readonly WORD_BOUNDARY: 10;
17
+ readonly NO_WORD_BOUNDARY: 11;
18
+ readonly CAPTURE: 12;
19
+ readonly STAR: 13;
20
+ readonly PLUS: 14;
21
+ readonly QUEST: 15;
22
+ readonly REPEAT: 16;
23
+ readonly CONCAT: 17;
24
+ readonly ALTERNATE: 18;
25
+ readonly LEFT_PAREN: 19;
26
+ readonly VERTICAL_BAR: 20;
27
+ };
28
+ static isPseudoOp(op: number): boolean;
29
+ static emptySubs(): Regexp[];
30
+ static fromRegexp(re: Regexp): Regexp;
31
+ op: number;
32
+ flags: number;
33
+ subs: Regexp[];
34
+ runes: number[];
35
+ min: number;
36
+ max: number;
37
+ cap: number;
38
+ name: string | null;
39
+ namedGroups: Map<string, number>;
40
+ constructor(op: number);
41
+ reinit(): void;
42
+ maxCap(): number;
43
+ }
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Regular expression abstract syntax tree. Produced by parser, used by compiler.
3
+ */
4
+ export class Regexp {
5
+ static Op = {
6
+ NO_MATCH: 0,
7
+ EMPTY_MATCH: 1,
8
+ LITERAL: 2,
9
+ CHAR_CLASS: 3,
10
+ ANY_CHAR_NOT_NL: 4,
11
+ ANY_CHAR: 5,
12
+ BEGIN_LINE: 6,
13
+ END_LINE: 7,
14
+ BEGIN_TEXT: 8,
15
+ END_TEXT: 9,
16
+ WORD_BOUNDARY: 10,
17
+ NO_WORD_BOUNDARY: 11,
18
+ CAPTURE: 12,
19
+ STAR: 13,
20
+ PLUS: 14,
21
+ QUEST: 15,
22
+ REPEAT: 16,
23
+ CONCAT: 17,
24
+ ALTERNATE: 18,
25
+ LEFT_PAREN: 19,
26
+ VERTICAL_BAR: 20,
27
+ };
28
+ static isPseudoOp(op) {
29
+ return op >= Regexp.Op.LEFT_PAREN;
30
+ }
31
+ static emptySubs() {
32
+ return [];
33
+ }
34
+ static fromRegexp(re) {
35
+ const regex = new Regexp(re.op);
36
+ regex.flags = re.flags;
37
+ regex.subs = re.subs;
38
+ regex.runes = re.runes;
39
+ regex.cap = re.cap;
40
+ regex.min = re.min;
41
+ regex.max = re.max;
42
+ regex.name = re.name;
43
+ regex.namedGroups = re.namedGroups;
44
+ return regex;
45
+ }
46
+ op;
47
+ flags;
48
+ subs;
49
+ runes;
50
+ min;
51
+ max;
52
+ cap;
53
+ name;
54
+ namedGroups;
55
+ constructor(op) {
56
+ this.op = op; // operator
57
+ this.flags = 0; // bitmap of parse flags
58
+ // subexpressions, if any. Never null.
59
+ // subs[0] is used as the freelist.
60
+ this.subs = Regexp.emptySubs();
61
+ this.runes = []; // matched runes, for LITERAL, CHAR_CLASS
62
+ this.min = 0; // min for REPEAT
63
+ this.max = 0; // max for REPEAT
64
+ this.cap = 0; // capturing index, for CAPTURE
65
+ this.name = null; // capturing name, for CAPTURE
66
+ this.namedGroups = new Map();
67
+ }
68
+ reinit() {
69
+ this.flags = 0;
70
+ this.subs = Regexp.emptySubs();
71
+ this.runes = [];
72
+ this.cap = 0;
73
+ this.min = 0;
74
+ this.max = 0;
75
+ this.name = null;
76
+ this.namedGroups = new Map();
77
+ }
78
+ // maxCap() walks the regexp to find the maximum capture index.
79
+ maxCap() {
80
+ let m = 0;
81
+ if (this.op === Regexp.Op.CAPTURE) {
82
+ m = this.cap;
83
+ }
84
+ if (this.subs !== null) {
85
+ for (let sub of this.subs) {
86
+ const n = sub.maxCap();
87
+ if (m < n) {
88
+ m = n;
89
+ }
90
+ }
91
+ }
92
+ return m;
93
+ }
94
+ }
@@ -0,0 +1,3 @@
1
+ import { Regexp } from "./Regexp.js";
2
+ declare function simplify(re: Regexp): Regexp;
3
+ export { simplify };
@@ -0,0 +1,228 @@
1
+ import { NON_GREEDY } from "./RE2Flags.js";
2
+ import { Regexp } from "./Regexp.js";
3
+ import { MAX_RUNE } from "./Unicode.js";
4
+ // simplify returns a regexp equivalent to re but without counted
5
+ // repetitions and with various other simplifications, such as
6
+ // rewriting /(?:a+)+/ to /a+/. The resulting regexp will execute
7
+ // correctly but its string representation will not produce the same
8
+ // parse tree, because capturing parentheses may have been duplicated
9
+ // or removed. For example, the simplified form for /(x){1,2}/ is
10
+ // /(x)(x)?/ but both parentheses capture as $1. The returned regexp
11
+ // may share structure with or be the original.
12
+ function simplify(re) {
13
+ switch (re.op) {
14
+ case Regexp.Op.CAPTURE: {
15
+ const sub = simplify(re.subs[0]);
16
+ if (sub !== re.subs[0]) {
17
+ const nre = Regexp.fromRegexp(re);
18
+ nre.runes = [];
19
+ nre.subs = [sub];
20
+ return nre;
21
+ }
22
+ return re;
23
+ }
24
+ case Regexp.Op.CONCAT:
25
+ case Regexp.Op.ALTERNATE: {
26
+ const newSubs = [];
27
+ let changed = false;
28
+ for (let i = 0; i < re.subs.length; i++) {
29
+ const sub = re.subs[i];
30
+ const nsub = simplify(sub);
31
+ if (nsub !== sub) {
32
+ changed = true;
33
+ }
34
+ if (re.op === Regexp.Op.CONCAT) {
35
+ // If any part of a CONCAT is mathematically impossible,
36
+ // the entire CONCAT sequence becomes impossible.
37
+ if (nsub.op === Regexp.Op.NO_MATCH) {
38
+ return new Regexp(Regexp.Op.NO_MATCH);
39
+ }
40
+ // Drop empty 0-width match nodes entirely from sequences
41
+ if (nsub.op === Regexp.Op.EMPTY_MATCH) {
42
+ changed = true;
43
+ continue;
44
+ }
45
+ // Flatten nested concatenations
46
+ if (nsub.op === Regexp.Op.CONCAT) {
47
+ changed = true;
48
+ newSubs.push(...nsub.subs);
49
+ continue;
50
+ }
51
+ }
52
+ else if (re.op === Regexp.Op.ALTERNATE) {
53
+ // Drop impossible branches from alternations
54
+ if (nsub.op === Regexp.Op.NO_MATCH) {
55
+ changed = true;
56
+ continue;
57
+ }
58
+ // Flatten nested alternations
59
+ if (nsub.op === Regexp.Op.ALTERNATE) {
60
+ changed = true;
61
+ newSubs.push(...nsub.subs);
62
+ continue;
63
+ }
64
+ }
65
+ newSubs.push(nsub);
66
+ }
67
+ if (changed) {
68
+ // If we filtered out all nodes, return the mathematically correct fallback
69
+ if (newSubs.length === 0) {
70
+ return new Regexp(re.op === Regexp.Op.CONCAT
71
+ ? Regexp.Op.EMPTY_MATCH
72
+ : Regexp.Op.NO_MATCH);
73
+ }
74
+ // If only 1 node remains, we don't need a CONCAT/ALT container at all
75
+ if (newSubs.length === 1) {
76
+ return newSubs[0];
77
+ }
78
+ const nre = Regexp.fromRegexp(re);
79
+ nre.runes = [];
80
+ nre.subs = newSubs;
81
+ return nre;
82
+ }
83
+ return re;
84
+ }
85
+ case Regexp.Op.CHAR_CLASS: {
86
+ if (re.runes === null)
87
+ return re;
88
+ // Empty character classes match nothing.
89
+ if (re.runes.length === 0) {
90
+ return new Regexp(Regexp.Op.NO_MATCH);
91
+ }
92
+ // Full character classes match everything.
93
+ if (re.runes.length === 2 &&
94
+ re.runes[0] === 0 &&
95
+ re.runes[1] === MAX_RUNE) {
96
+ return new Regexp(Regexp.Op.ANY_CHAR);
97
+ }
98
+ // Standard catch-all except newline
99
+ if (re.runes.length === 4 &&
100
+ re.runes[0] === 0 &&
101
+ re.runes[1] === 0x0a - 1 &&
102
+ re.runes[2] === 0x0a + 1 &&
103
+ re.runes[3] === MAX_RUNE) {
104
+ return new Regexp(Regexp.Op.ANY_CHAR_NOT_NL);
105
+ }
106
+ return re;
107
+ }
108
+ case Regexp.Op.STAR:
109
+ case Regexp.Op.PLUS:
110
+ case Regexp.Op.QUEST: {
111
+ const sub = simplify(re.subs[0]);
112
+ return simplify1(re.op, re.flags, sub, re);
113
+ }
114
+ case Regexp.Op.REPEAT: {
115
+ // Special special case: x{0} matches the empty string
116
+ // and doesn't even need to consider x.
117
+ if (re.min === 0 && re.max === 0) {
118
+ return new Regexp(Regexp.Op.EMPTY_MATCH);
119
+ }
120
+ // The fun begins.
121
+ const sub = simplify(re.subs[0]);
122
+ // x{n,} means at least n matches of x.
123
+ if (re.max === -1) {
124
+ // Special case: x{0,} is x*.
125
+ if (re.min === 0) {
126
+ return simplify1(Regexp.Op.STAR, re.flags, sub, null);
127
+ }
128
+ // Special case: x{1,} is x+.
129
+ if (re.min === 1) {
130
+ return simplify1(Regexp.Op.PLUS, re.flags, sub, null);
131
+ }
132
+ // General case: x{4,} is xxxx+.
133
+ const nre = new Regexp(Regexp.Op.CONCAT);
134
+ const subs = [];
135
+ for (let i = 0; i < re.min - 1; i++) {
136
+ subs.push(sub);
137
+ }
138
+ subs.push(simplify1(Regexp.Op.PLUS, re.flags, sub, null));
139
+ nre.subs = subs.slice(0);
140
+ // Ensure newly created CONCAT is properly flattened
141
+ return simplify(nre);
142
+ }
143
+ // Special case x{0} handled above.
144
+ // Special case: x{1} is just x.
145
+ if (re.min === 1 && re.max === 1) {
146
+ return sub;
147
+ }
148
+ // General case: x{n,m} means n copies of x and m copies of x?
149
+ // The machine will do less work if we nest the final m copies,
150
+ // so that x{2,5} = xx(x(x(x)?)?)?
151
+ // Build leading prefix: xx.
152
+ let prefixSubs = null;
153
+ if (re.min > 0) {
154
+ prefixSubs = [];
155
+ for (let i = 0; i < re.min; i++) {
156
+ prefixSubs.push(sub);
157
+ }
158
+ }
159
+ // Build and attach suffix: (x(x(x)?)?)?
160
+ if (re.max > re.min) {
161
+ let suffix = simplify1(Regexp.Op.QUEST, re.flags, sub, null);
162
+ for (let i = re.min + 1; i < re.max; i++) {
163
+ const nre2 = new Regexp(Regexp.Op.CONCAT);
164
+ nre2.subs = [sub, suffix];
165
+ suffix = simplify1(Regexp.Op.QUEST, re.flags, nre2, null);
166
+ }
167
+ if (prefixSubs === null) {
168
+ return suffix;
169
+ }
170
+ prefixSubs.push(suffix);
171
+ }
172
+ if (prefixSubs !== null) {
173
+ const prefix = new Regexp(Regexp.Op.CONCAT);
174
+ prefix.subs = prefixSubs.slice(0);
175
+ // Ensure newly created CONCAT is properly flattened
176
+ return simplify(prefix);
177
+ }
178
+ // Some degenerate case like min > max or min < max < 0.
179
+ // Handle as impossible match.
180
+ return new Regexp(Regexp.Op.NO_MATCH);
181
+ }
182
+ }
183
+ return re;
184
+ }
185
+ // simplify1 implements Simplify for the unary OpStar,
186
+ // OpPlus, and OpQuest operators. It returns the simple regexp
187
+ // equivalent to
188
+ //
189
+ // Regexp{Op: op, Flags: flags, Sub: {sub}}
190
+ //
191
+ // under the assumption that sub is already simple, and
192
+ // without first allocating that structure. If the regexp
193
+ // to be returned turns out to be equivalent to re, simplify1
194
+ // returns re instead.
195
+ //
196
+ // simplify1 is factored out of Simplify because the implementation
197
+ // for other operators generates these unary expressions.
198
+ // Letting them call simplify1 makes sure the expressions they
199
+ // generate are simple.
200
+ function simplify1(op, flags, sub, re) {
201
+ // Special case: repeat the empty string as much as
202
+ // you want, but it's still the empty string.
203
+ if (sub.op === Regexp.Op.EMPTY_MATCH) {
204
+ return sub;
205
+ }
206
+ // Handle impossible targets gracefully.
207
+ // e.g. Trying to match "NO_MATCH" 0 or 1 times (QUEST/STAR) evaluates to EMPTY_MATCH.
208
+ if (sub.op === Regexp.Op.NO_MATCH) {
209
+ if (op === Regexp.Op.PLUS)
210
+ return sub; // 1+ times is impossible
211
+ return new Regexp(Regexp.Op.EMPTY_MATCH);
212
+ }
213
+ // The operators are idempotent if the flags match.
214
+ if (op === sub.op && (flags & NON_GREEDY) === (sub.flags & NON_GREEDY)) {
215
+ return sub;
216
+ }
217
+ if (re !== null &&
218
+ re.op === op &&
219
+ (re.flags & NON_GREEDY) === (flags & NON_GREEDY) &&
220
+ sub === re.subs[0]) {
221
+ return re;
222
+ }
223
+ const nre = new Regexp(op);
224
+ nre.flags = flags;
225
+ nre.subs = [sub];
226
+ return nre;
227
+ }
228
+ export { simplify };
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Utilities for dealing with Unicode better than JS does.
3
+ */
4
+ declare const MAX_RUNE = 1114111;
5
+ declare const MAX_ASCII = 127;
6
+ declare const MAX_BMP = 65535;
7
+ declare const MIN_FOLD = 65;
8
+ declare const MAX_FOLD = 125251;
9
+ declare const MIN_HIGH_SURROGATE = 55296;
10
+ declare const MAX_HIGH_SURROGATE = 56319;
11
+ declare const MIN_LOW_SURROGATE = 56320;
12
+ declare const MAX_LOW_SURROGATE = 57343;
13
+ declare const MIN_SUPPLEMENTARY_CODE_POINT = 65536;
14
+ declare function isUpper(r: number): boolean;
15
+ declare function simpleFold(r: number): number;
16
+ declare function equalsIgnoreCase(r1: number, r2: number): boolean;
17
+ export { MAX_RUNE, MIN_FOLD, MAX_FOLD, simpleFold, MAX_ASCII, equalsIgnoreCase, MIN_SUPPLEMENTARY_CODE_POINT, MIN_LOW_SURROGATE, MIN_HIGH_SURROGATE, MAX_LOW_SURROGATE, MAX_HIGH_SURROGATE, MAX_BMP, isUpper, };
@@ -0,0 +1,150 @@
1
+ import { UnicodeTables } from "./UnicodeTables.js";
2
+ /**
3
+ * Utilities for dealing with Unicode better than JS does.
4
+ */
5
+ // The highest legal rune value.
6
+ const MAX_RUNE = 0x10ffff;
7
+ // The highest legal ASCII value.
8
+ const MAX_ASCII = 0x7f;
9
+ // The highest legal Latin-1 value.
10
+ const MAX_LATIN1 = 0xff;
11
+ // The highest legal Basic Multilingual Plane (BMP) value.
12
+ const MAX_BMP = 0xffff;
13
+ // Minimum and maximum runes involved in folding.
14
+ // Checked during test.
15
+ const MIN_FOLD = 0x0041;
16
+ const MAX_FOLD = 0x1e943;
17
+ const MIN_HIGH_SURROGATE = 0xd800;
18
+ const MAX_HIGH_SURROGATE = 0xdbff;
19
+ const MIN_LOW_SURROGATE = 0xdc00;
20
+ const MAX_LOW_SURROGATE = 0xdfff;
21
+ const MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
22
+ // is32 uses binary search to test whether rune is in the specified
23
+ // slice of 32-bit ranges.
24
+ function is32(ranges, r) {
25
+ // binary search over ranges
26
+ let lo = 0;
27
+ let hi = ranges.length;
28
+ while (lo < hi) {
29
+ const m = lo + Math.floor((hi - lo) / 2);
30
+ const rlo = ranges.getLo(m);
31
+ const rhi = ranges.getHi(m);
32
+ if (rlo <= r && r <= rhi) {
33
+ const stride = ranges.getStride(m);
34
+ return (r - rlo) % stride === 0;
35
+ }
36
+ if (r < rlo) {
37
+ hi = m;
38
+ }
39
+ else {
40
+ lo = m + 1;
41
+ }
42
+ }
43
+ return false;
44
+ }
45
+ // is tests whether rune is in the specified table of ranges.
46
+ function is(ranges, r) {
47
+ // Fast path for Latin-1 characters using linear search.
48
+ if (r <= MAX_LATIN1) {
49
+ for (let i = 0; i < ranges.length; i++) {
50
+ const rhi = ranges.getHi(i);
51
+ if (r > rhi) {
52
+ continue;
53
+ }
54
+ const rlo = ranges.getLo(i);
55
+ if (r < rlo) {
56
+ return false;
57
+ }
58
+ const stride = ranges.getStride(i);
59
+ return (r - rlo) % stride === 0;
60
+ }
61
+ return false;
62
+ }
63
+ // Fallback to binary search for runes outside Latin-1
64
+ return ranges.length > 0 && r >= ranges.getLo(0) && is32(ranges, r);
65
+ }
66
+ // isUpper reports whether the rune is an upper case letter.
67
+ function isUpper(r) {
68
+ if (r <= MAX_LATIN1) {
69
+ const s = String.fromCodePoint(r);
70
+ return s.toUpperCase() === s && s.toLowerCase() !== s;
71
+ }
72
+ return is(UnicodeTables.Upper, r);
73
+ }
74
+ // simpleFold iterates over Unicode code points equivalent under
75
+ // the Unicode-defined simple case folding. Among the code points
76
+ // equivalent to rune (including rune itself), SimpleFold returns the
77
+ // smallest r >= rune if one exists, or else the smallest r >= 0.
78
+ //
79
+ // For example:
80
+ // SimpleFold('A') = 'a'
81
+ // SimpleFold('a') = 'A'
82
+ //
83
+ // SimpleFold('K') = 'k'
84
+ // SimpleFold('k') = '\u212A' (Kelvin symbol, K)
85
+ // SimpleFold('\u212A') = 'K'
86
+ //
87
+ // SimpleFold('1') = '1'
88
+ //
89
+ // Derived from Go's unicode.SimpleFold.
90
+ //
91
+ function simpleFold(r) {
92
+ // Consult caseOrbit table for special cases (3+ element cycles, lossy
93
+ // mappings like ſ→S, and Turkic-specific self-loops).
94
+ const caseOrbit = UnicodeTables.CASE_ORBIT;
95
+ const folded = caseOrbit.get(r);
96
+ if (folded !== undefined) {
97
+ return folded;
98
+ }
99
+ // Fallback for 2-element orbits: use raw native case conversion.
100
+ // The length check rejects multi-char results (e.g., ß→SS) which
101
+ // would otherwise be truncated to a non-equivalent codepoint.
102
+ const s = String.fromCodePoint(r);
103
+ const lower = s.toLowerCase();
104
+ if (lower.length === s.length) {
105
+ const lowerCp = lower.codePointAt(0);
106
+ if (lowerCp !== undefined && lowerCp !== r)
107
+ return lowerCp;
108
+ }
109
+ const upper = s.toUpperCase();
110
+ if (upper.length === s.length) {
111
+ const upperCp = upper.codePointAt(0);
112
+ if (upperCp !== undefined && upperCp !== r)
113
+ return upperCp;
114
+ }
115
+ return r;
116
+ }
117
+ // equalsIgnoreCase performs case-insensitive equality comparison
118
+ // on the given runes |r1| and |r2|, with special consideration
119
+ // for the likely scenario where both runes are ASCII characters.
120
+ // If non-ASCII, Unicode case folding will be performed on |r1|
121
+ // to compare it to |r2|.
122
+ // -1 is interpreted as the end-of-file mark and never matches.
123
+ function equalsIgnoreCase(r1, r2) {
124
+ if (r1 < 0 || r2 < 0) {
125
+ return false;
126
+ }
127
+ if (r1 === r2) {
128
+ return true;
129
+ }
130
+ // Fast path for the common case where both runes are ASCII characters.
131
+ // Coerces both runes to lowercase if applicable.
132
+ if (r1 <= MAX_ASCII && r2 <= MAX_ASCII) {
133
+ if (0x41 <= r1 && r1 <= 0x5a) {
134
+ r1 |= 0x20;
135
+ }
136
+ if (0x41 <= r2 && r2 <= 0x5a) {
137
+ r2 |= 0x20;
138
+ }
139
+ return r1 === r2;
140
+ }
141
+ // Fall back to full Unicode case folding otherwise.
142
+ // Invariant: r1 must be non-negative
143
+ for (let r = simpleFold(r1); r !== r1; r = simpleFold(r)) {
144
+ if (r === r2) {
145
+ return true;
146
+ }
147
+ }
148
+ return false;
149
+ }
150
+ export { MAX_RUNE, MIN_FOLD, MAX_FOLD, simpleFold, MAX_ASCII, equalsIgnoreCase, MIN_SUPPLEMENTARY_CODE_POINT, MIN_LOW_SURROGATE, MIN_HIGH_SURROGATE, MAX_LOW_SURROGATE, MAX_HIGH_SURROGATE, MAX_BMP, isUpper, };
@@ -0,0 +1,12 @@
1
+ declare class UnicodeRangeTable {
2
+ data: Uint32Array;
3
+ isStride1: boolean;
4
+ SIZE: number;
5
+ constructor(data: Uint32Array, isStride1?: boolean);
6
+ getLo(index: number): number;
7
+ getHi(index: number): number;
8
+ getStride(index: number): number;
9
+ get(index: number): number[];
10
+ get length(): number;
11
+ }
12
+ export { UnicodeRangeTable };
@@ -0,0 +1,28 @@
1
+ class UnicodeRangeTable {
2
+ data;
3
+ isStride1;
4
+ SIZE;
5
+ constructor(data, isStride1 = false) {
6
+ this.data = data; // A Uint32Array
7
+ this.isStride1 = isStride1;
8
+ this.SIZE = isStride1 ? 2 : 3;
9
+ }
10
+ // High-performance getters that do NOT allocate memory
11
+ getLo(index) {
12
+ return this.data[index * this.SIZE];
13
+ }
14
+ getHi(index) {
15
+ return this.data[index * this.SIZE + 1];
16
+ }
17
+ getStride(index) {
18
+ return this.isStride1 ? 1 : this.data[index * this.SIZE + 2];
19
+ }
20
+ get(index) {
21
+ const i = index * this.SIZE;
22
+ return [this.data[i], this.data[i + 1], this.getStride(index)];
23
+ }
24
+ get length() {
25
+ return this.data.length / this.SIZE;
26
+ }
27
+ }
28
+ export { UnicodeRangeTable };
@@ -0,0 +1,29 @@
1
+ import { UnicodeRangeTable } from "./UnicodeRangeTable.js";
2
+ export declare const UnicodeTables: {
3
+ readonly CASE_ORBIT: Map<number, number>;
4
+ STABLE_CATEGORY_NAMES: ReadonlySet<string>;
5
+ STABLE_SCRIPT_NAMES: ReadonlySet<string>;
6
+ NEW_SCRIPT_NAMES: ReadonlySet<string>;
7
+ buildForProperty: (name: string) => UnicodeRangeTable | null;
8
+ buildFoldOverlay: (name: string) => UnicodeRangeTable | null;
9
+ CATEGORIES: {
10
+ has: (name: string) => boolean;
11
+ get: (name: string) => UnicodeRangeTable | null;
12
+ };
13
+ SCRIPTS: {
14
+ has: (name: string) => boolean;
15
+ get: (name: string) => UnicodeRangeTable | null;
16
+ };
17
+ FOLD_CATEGORIES: {
18
+ has: (name: string) => boolean;
19
+ get: (name: string) => UnicodeRangeTable | null;
20
+ };
21
+ FOLD_SCRIPT: {
22
+ has: (name: string) => boolean;
23
+ get: (name: string) => UnicodeRangeTable | null;
24
+ };
25
+ readonly Upper: UnicodeRangeTable;
26
+ _deltaCategoryRanges: (name: string) => Uint32Array | null;
27
+ _deltaScriptRanges: (name: string) => Uint32Array | null;
28
+ _newScriptTable: (name: string) => UnicodeRangeTable | null;
29
+ };