porffor 0.58.6 → 0.58.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/compiler/builtins/regexp.ts +630 -22
- package/compiler/builtins_precompiled.js +373 -373
- package/compiler/wrap.js +120 -2
- package/foo.js +5 -5
- package/jsr.json +1 -1
- package/package.json +1 -1
- package/runtime/index.js +1 -1
@@ -1,8 +1,8 @@
|
|
1
|
+
// @porf --valtype=i32
|
1
2
|
import type {} from './porffor.d.ts';
|
2
3
|
|
3
4
|
// regex memory structure:
|
4
5
|
// source string ptr (u32)
|
5
|
-
// flags string ptr (u32)
|
6
6
|
// flags (u16):
|
7
7
|
// g, global - 0b00000001
|
8
8
|
// i, ignore case - 0b00000010
|
@@ -12,13 +12,45 @@ import type {} from './porffor.d.ts';
|
|
12
12
|
// y, sticky - 0b00100000
|
13
13
|
// d, has indices - 0b01000000
|
14
14
|
// v, unicode sets - 0b10000000
|
15
|
-
// bytecode
|
16
|
-
//
|
15
|
+
// bytecode (variable):
|
16
|
+
// op (u8)
|
17
|
+
// depends on op (variable)
|
18
|
+
// ----------------------------
|
19
|
+
// single - 0x01:
|
20
|
+
// char (u8)
|
21
|
+
// class - 0x02 / negated class - 0x03:
|
22
|
+
// items (variable):
|
23
|
+
// RANGE_MARKER (0x00) (u8) + from (u8) + to (u8)
|
24
|
+
// CHAR_MARKER (0x01) (u8) + char (u8)
|
25
|
+
// PREDEF_MARKER (0x02) (u8) + classId (u8)
|
26
|
+
// END_CLASS_MARKER (0xFF) (u8)
|
27
|
+
// predefined class - 0x04:
|
28
|
+
// class (u8)
|
29
|
+
// start (line or string) - 0x05
|
30
|
+
// end (line or string) - 0x06
|
31
|
+
// word boundary - 0x07
|
32
|
+
// non-word boundary - 0x08
|
33
|
+
// dot - 0x09
|
34
|
+
// back reference - 0x0a:
|
35
|
+
// index (u8)
|
36
|
+
// ----------------------------
|
37
|
+
// accept - 0x10
|
38
|
+
// reject - 0x11
|
39
|
+
// ----------------------------
|
40
|
+
// jump - 0x20:
|
41
|
+
// target (u16)
|
42
|
+
// fork - 0x21:
|
43
|
+
// branch 1 (u16)
|
44
|
+
// branch 2 (u16)
|
45
|
+
// ----------------------------
|
46
|
+
// start capture - 0x30:
|
47
|
+
// index (u8)
|
48
|
+
// end capture - 0x31:
|
49
|
+
// index (u8)
|
17
50
|
|
18
51
|
export const __Porffor_regex_construct = (patternStr: bytestring, flagsStr: bytestring): RegExp => {
|
19
52
|
const ptr: i32 = Porffor.allocate();
|
20
53
|
Porffor.wasm.i32.store(ptr, patternStr, 0, 0);
|
21
|
-
Porffor.wasm.i32.store(ptr, flagsStr, 0, 4);
|
22
54
|
|
23
55
|
// parse flags
|
24
56
|
let flags: i32 = 0;
|
@@ -26,7 +58,7 @@ export const __Porffor_regex_construct = (patternStr: bytestring, flagsStr: byte
|
|
26
58
|
const flagsEndPtr: i32 = flagsPtr + flagsStr.length;
|
27
59
|
while (flagsPtr < flagsEndPtr) {
|
28
60
|
const char: i32 = Porffor.wasm.i32.load8_u(flagsPtr, 0, 4);
|
29
|
-
flagsPtr
|
61
|
+
flagsPtr += 1;
|
30
62
|
|
31
63
|
if (char == 103) { // g
|
32
64
|
flags |= 0b00000001;
|
@@ -45,7 +77,7 @@ export const __Porffor_regex_construct = (patternStr: bytestring, flagsStr: byte
|
|
45
77
|
continue;
|
46
78
|
}
|
47
79
|
if (char == 117) { // u
|
48
|
-
if (flags & 0b10000000) throw new SyntaxError('
|
80
|
+
if (flags & 0b10000000) throw new SyntaxError('Regex parse: Conflicting unicode flag');
|
49
81
|
flags |= 0b00010000;
|
50
82
|
continue;
|
51
83
|
}
|
@@ -58,17 +90,589 @@ export const __Porffor_regex_construct = (patternStr: bytestring, flagsStr: byte
|
|
58
90
|
continue;
|
59
91
|
}
|
60
92
|
if (char == 118) { // v
|
61
|
-
if (flags & 0b00010000) throw new SyntaxError('
|
93
|
+
if (flags & 0b00010000) throw new SyntaxError('Regex parse: Conflicting unicode flag');
|
62
94
|
flags |= 0b10000000;
|
63
95
|
continue;
|
64
96
|
}
|
65
97
|
|
66
|
-
throw new SyntaxError('
|
98
|
+
throw new SyntaxError('Regex parse: Invalid flag');
|
67
99
|
}
|
100
|
+
Porffor.wasm.i32.store16(ptr, flags, 0, 4);
|
68
101
|
|
69
|
-
|
102
|
+
let bcPtr: i32 = ptr + 6;
|
103
|
+
const bcStart: i32 = bcPtr;
|
104
|
+
let patternPtr: i32 = patternStr;
|
105
|
+
let patternEndPtr: i32 = patternPtr + patternStr.length;
|
70
106
|
|
71
|
-
|
107
|
+
let groupDepth: i32 = 0;
|
108
|
+
let captureIndex: i32 = 1;
|
109
|
+
let lastWasAtom: boolean = false;
|
110
|
+
let lastAtomStart: i32 = 0;
|
111
|
+
let groupStack: i32[] = [];
|
112
|
+
let altStackPos: i32[] = [];
|
113
|
+
let altStackGroupDepth: i32[] = [];
|
114
|
+
let inClass: boolean = false;
|
115
|
+
|
116
|
+
while (patternPtr < patternEndPtr) {
|
117
|
+
let char: i32 = Porffor.wasm.i32.load8_u(patternPtr, 0, 4);
|
118
|
+
patternPtr = patternPtr + 1;
|
119
|
+
|
120
|
+
// escape
|
121
|
+
let notEscaped: boolean = true;
|
122
|
+
if (char == 92) { // '\'
|
123
|
+
notEscaped = false;
|
124
|
+
if (patternPtr >= patternEndPtr) throw new SyntaxError('Regex parse: trailing \\');
|
125
|
+
|
126
|
+
char = Porffor.wasm.i32.load8_u(patternPtr, 0, 4);
|
127
|
+
patternPtr = patternPtr + 1;
|
128
|
+
}
|
129
|
+
|
130
|
+
if (inClass) {
|
131
|
+
if (notEscaped && char == 93) { // ']'
|
132
|
+
inClass = false;
|
133
|
+
// end class
|
134
|
+
Porffor.wasm.i32.store8(bcPtr, 0xFF, 0, 0);
|
135
|
+
bcPtr += 1;
|
136
|
+
lastWasAtom = true;
|
137
|
+
continue;
|
138
|
+
}
|
139
|
+
|
140
|
+
// class escape
|
141
|
+
let v: i32 = char;
|
142
|
+
let predefClassId: i32 = 0;
|
143
|
+
if (!notEscaped) {
|
144
|
+
if (char == 100) predefClassId = 1; // \d
|
145
|
+
else if (char == 68) predefClassId = 2; // \D
|
146
|
+
else if (char == 115) predefClassId = 3; // \s
|
147
|
+
else if (char == 83) predefClassId = 4; // \S
|
148
|
+
else if (char == 119) predefClassId = 5; // \w
|
149
|
+
else if (char == 87) predefClassId = 6; // \W
|
150
|
+
else if (char == 110) v = 10; // \n
|
151
|
+
else if (char == 114) v = 13; // \r
|
152
|
+
else if (char == 116) v = 9; // \t
|
153
|
+
else if (char == 118) v = 11; // \v
|
154
|
+
else if (char == 102) v = 12; // \f
|
155
|
+
else if (char == 48) v = 0; // \0
|
156
|
+
}
|
157
|
+
|
158
|
+
if ((patternPtr + 1) < patternEndPtr && Porffor.wasm.i32.load8_u(patternPtr, 0, 4) == 45 && Porffor.wasm.i32.load8_u(patternPtr, 0, 5) != 93) {
|
159
|
+
// possible range
|
160
|
+
patternPtr += 1;
|
161
|
+
let endChar: i32;
|
162
|
+
let endNotEscaped: boolean = true;
|
163
|
+
if (patternPtr < patternEndPtr && Porffor.wasm.i32.load8_u(patternPtr, 0, 4) == 92) {
|
164
|
+
endNotEscaped = false;
|
165
|
+
patternPtr += 1;
|
166
|
+
if (patternPtr >= patternEndPtr) throw new SyntaxError('Regex parse: trailing \\ in range');
|
167
|
+
}
|
168
|
+
|
169
|
+
endChar = Porffor.wasm.i32.load8_u(patternPtr, 0, 4);
|
170
|
+
patternPtr += 1;
|
171
|
+
|
172
|
+
let endPredefClassId: i32 = 0;
|
173
|
+
if (!endNotEscaped) {
|
174
|
+
if (endChar == 100) endPredefClassId = 1;
|
175
|
+
else if (endChar == 68) endPredefClassId = 2;
|
176
|
+
else if (endChar == 115) endPredefClassId = 3;
|
177
|
+
else if (endChar == 83) endPredefClassId = 4;
|
178
|
+
else if (endChar == 119) endPredefClassId = 5;
|
179
|
+
else if (endChar == 87) endPredefClassId = 6;
|
180
|
+
else if (endChar == 110) endChar = 10;
|
181
|
+
else if (endChar == 114) endChar = 13;
|
182
|
+
else if (endChar == 116) endChar = 9;
|
183
|
+
else if (endChar == 118) endChar = 11;
|
184
|
+
else if (endChar == 102) endChar = 12;
|
185
|
+
else if (endChar == 48) endChar = 0;
|
186
|
+
}
|
187
|
+
|
188
|
+
// If either side is a predefined class, treat as literal chars
|
189
|
+
if (predefClassId > 0 || endPredefClassId > 0) {
|
190
|
+
// emit start char/predef
|
191
|
+
if (predefClassId > 0) {
|
192
|
+
Porffor.wasm.i32.store8(bcPtr, 0x02, 0, 0); // PREDEF_MARKER
|
193
|
+
Porffor.wasm.i32.store8(bcPtr, predefClassId, 0, 1);
|
194
|
+
bcPtr += 2;
|
195
|
+
} else {
|
196
|
+
Porffor.wasm.i32.store8(bcPtr, 0x01, 0, 0); // CHAR_MARKER
|
197
|
+
Porffor.wasm.i32.store8(bcPtr, v, 0, 1);
|
198
|
+
bcPtr += 2;
|
199
|
+
}
|
200
|
+
|
201
|
+
// emit hyphen
|
202
|
+
Porffor.wasm.i32.store8(bcPtr, 0x01, 0, 0); // CHAR_MARKER
|
203
|
+
Porffor.wasm.i32.store8(bcPtr, 45, 0, 1);
|
204
|
+
bcPtr += 2;
|
205
|
+
|
206
|
+
// emit end char/predef
|
207
|
+
if (endPredefClassId > 0) {
|
208
|
+
Porffor.wasm.i32.store8(bcPtr, 0x02, 0, 0); // PREDEF_MARKER
|
209
|
+
Porffor.wasm.i32.store8(bcPtr, endPredefClassId, 0, 1);
|
210
|
+
bcPtr += 2;
|
211
|
+
} else {
|
212
|
+
Porffor.wasm.i32.store8(bcPtr, 0x01, 0, 0); // CHAR_MARKER
|
213
|
+
Porffor.wasm.i32.store8(bcPtr, endChar, 0, 1);
|
214
|
+
bcPtr += 2;
|
215
|
+
}
|
216
|
+
} else {
|
217
|
+
if (v > endChar) throw new SyntaxError('Regex parse: invalid range');
|
218
|
+
|
219
|
+
Porffor.wasm.i32.store8(bcPtr, 0x00, 0, 0); // RANGE_MARKER
|
220
|
+
Porffor.wasm.i32.store8(bcPtr, v, 0, 1);
|
221
|
+
Porffor.wasm.i32.store8(bcPtr, endChar, 0, 2);
|
222
|
+
bcPtr += 3;
|
223
|
+
}
|
224
|
+
|
225
|
+
continue;
|
226
|
+
}
|
227
|
+
|
228
|
+
// store v as char or predefined
|
229
|
+
if (predefClassId > 0) {
|
230
|
+
Porffor.wasm.i32.store8(bcPtr, 0x02, 0, 0); // PREDEF_MARKER
|
231
|
+
Porffor.wasm.i32.store8(bcPtr, predefClassId, 0, 1);
|
232
|
+
} else {
|
233
|
+
Porffor.wasm.i32.store8(bcPtr, 0x01, 0, 0); // CHAR_MARKER
|
234
|
+
Porffor.wasm.i32.store8(bcPtr, v, 0, 1);
|
235
|
+
}
|
236
|
+
|
237
|
+
bcPtr += 2;
|
238
|
+
continue;
|
239
|
+
}
|
240
|
+
|
241
|
+
if (notEscaped) {
|
242
|
+
if (char == 91) { // '['
|
243
|
+
lastAtomStart = bcPtr;
|
244
|
+
inClass = true;
|
245
|
+
if (patternPtr < patternEndPtr && Porffor.wasm.i32.load8_u(patternPtr, 0, 4) == 94) {
|
246
|
+
patternPtr += 1;
|
247
|
+
|
248
|
+
// negated
|
249
|
+
Porffor.wasm.i32.store8(bcPtr, 0x03, 0, 0);
|
250
|
+
bcPtr += 1;
|
251
|
+
continue;
|
252
|
+
}
|
253
|
+
|
254
|
+
// not negated
|
255
|
+
Porffor.wasm.i32.store8(bcPtr, 0x02, 0, 0);
|
256
|
+
bcPtr += 1;
|
257
|
+
continue;
|
258
|
+
}
|
259
|
+
|
260
|
+
if (char == 40) { // '('
|
261
|
+
lastAtomStart = bcPtr;
|
262
|
+
|
263
|
+
// Check for non-capturing group
|
264
|
+
let ncg: boolean = false;
|
265
|
+
if (patternPtr < patternEndPtr && Porffor.wasm.i32.load8_u(patternPtr, 0, 4) == 63) { // '?'
|
266
|
+
if ((patternPtr + 1) < patternEndPtr && Porffor.wasm.i32.load8_u(patternPtr, 0, 5) == 58) { // ':'
|
267
|
+
ncg = true;
|
268
|
+
patternPtr += 2;
|
269
|
+
}
|
270
|
+
}
|
271
|
+
|
272
|
+
groupDepth += 1;
|
273
|
+
if (!ncg) {
|
274
|
+
Porffor.wasm.i32.store8(bcPtr, 0x30, 0, 0); // start capture
|
275
|
+
Porffor.wasm.i32.store8(bcPtr, captureIndex, 0, 1);
|
276
|
+
bcPtr += 2;
|
277
|
+
|
278
|
+
Porffor.array.fastPush(groupStack, captureIndex);
|
279
|
+
captureIndex += 1;
|
280
|
+
} else {
|
281
|
+
Porffor.array.fastPush(groupStack, -1);
|
282
|
+
}
|
283
|
+
|
284
|
+
lastWasAtom = false;
|
285
|
+
continue;
|
286
|
+
}
|
287
|
+
|
288
|
+
if (char == 41) { // ')'
|
289
|
+
if (groupDepth == 0) throw new SyntaxError('Regex parse: unmatched )');
|
290
|
+
groupDepth -= 1;
|
291
|
+
|
292
|
+
const popped: i32 = groupStack.pop()!;
|
293
|
+
if (popped != -1) {
|
294
|
+
Porffor.wasm.i32.store8(bcPtr, 0x31, 0, 0); // end capture
|
295
|
+
Porffor.wasm.i32.store8(bcPtr, popped, 0, 1);
|
296
|
+
bcPtr += 2;
|
297
|
+
}
|
298
|
+
|
299
|
+
// Patch alternation jumps for this group
|
300
|
+
for (let i: i32 = altStackPos.length - 1; i >= 0; --i) {
|
301
|
+
if (altStackGroupDepth[i] < groupDepth + 1) break;
|
302
|
+
Porffor.wasm.i32.store16(altStackPos[i], bcPtr - altStackPos[i] - 5, 0, 3); // patch branch2
|
303
|
+
altStackPos.pop();
|
304
|
+
altStackGroupDepth.pop();
|
305
|
+
}
|
306
|
+
|
307
|
+
lastWasAtom = true;
|
308
|
+
continue;
|
309
|
+
}
|
310
|
+
|
311
|
+
if (char == 124) { // '|'
|
312
|
+
// alternation: fork to choose between alternatives
|
313
|
+
Porffor.array.fastPush(altStackPos, bcPtr);
|
314
|
+
Porffor.array.fastPush(altStackGroupDepth, groupDepth);
|
315
|
+
|
316
|
+
Porffor.wasm.i32.store8(bcPtr, 0x21, 0, 0); // fork
|
317
|
+
Porffor.wasm.i32.store16(bcPtr, 5, 0, 1); // branch1 (next instruction)
|
318
|
+
Porffor.wasm.i32.store16(bcPtr, 0, 0, 3); // branch2 (to patch later)
|
319
|
+
bcPtr += 5;
|
320
|
+
|
321
|
+
lastWasAtom = false;
|
322
|
+
continue;
|
323
|
+
}
|
324
|
+
|
325
|
+
if (char == 46) { // '.'
|
326
|
+
lastAtomStart = bcPtr;
|
327
|
+
Porffor.wasm.i32.store8(bcPtr, 0x09, 0, 0); // dot
|
328
|
+
bcPtr += 1;
|
329
|
+
lastWasAtom = true;
|
330
|
+
continue;
|
331
|
+
}
|
332
|
+
|
333
|
+
if (char == 94) { // '^'
|
334
|
+
Porffor.wasm.i32.store8(bcPtr, 0x05, 0, 0); // start
|
335
|
+
bcPtr += 1;
|
336
|
+
lastWasAtom = false;
|
337
|
+
continue;
|
338
|
+
}
|
339
|
+
if (char == 36) { // '$'
|
340
|
+
Porffor.wasm.i32.store8(bcPtr, 0x06, 0, 0); // end
|
341
|
+
bcPtr += 1;
|
342
|
+
lastWasAtom = false;
|
343
|
+
continue;
|
344
|
+
}
|
345
|
+
|
346
|
+
// quantifiers: *, +, ?
|
347
|
+
if (Porffor.fastOr(char == 42, char == 43, char == 63)) {
|
348
|
+
if (!lastWasAtom) throw new SyntaxError('Regex parser: quantifier without atom');
|
349
|
+
|
350
|
+
// check for lazy
|
351
|
+
let lazy: boolean = false;
|
352
|
+
if (patternPtr < patternEndPtr && Porffor.wasm.i32.load8_u(patternPtr, 0, 4) == 63) { // '?'
|
353
|
+
lazy = true;
|
354
|
+
patternPtr++;
|
355
|
+
}
|
356
|
+
|
357
|
+
// Calculate atom size and move it forward to make space for quantifier logic
|
358
|
+
const atomSize: i32 = bcPtr - lastAtomStart;
|
359
|
+
|
360
|
+
if (char == 42) { // * (zero or more)
|
361
|
+
// Move atom forward to make space for fork BEFORE it
|
362
|
+
Porffor.wasm.memory.copy(lastAtomStart + 5, lastAtomStart, atomSize, 0, 0);
|
363
|
+
|
364
|
+
// Insert fork at atom start position
|
365
|
+
Porffor.wasm.i32.store8(lastAtomStart, 0x21, 0, 0); // fork
|
366
|
+
if (lazy) {
|
367
|
+
Porffor.wasm.i32.store16(lastAtomStart, atomSize + 8, 0, 1); // branch1: skip atom entirely
|
368
|
+
Porffor.wasm.i32.store16(lastAtomStart, 5, 0, 3); // branch2: execute atom
|
369
|
+
} else {
|
370
|
+
Porffor.wasm.i32.store16(lastAtomStart, 5, 0, 1); // branch1: execute atom
|
371
|
+
Porffor.wasm.i32.store16(lastAtomStart, atomSize + 8, 0, 3); // branch2: skip atom entirely
|
372
|
+
}
|
373
|
+
|
374
|
+
// insert jump to loop
|
375
|
+
Porffor.wasm.i32.store8(bcPtr, 0x20, 0, 5);
|
376
|
+
Porffor.wasm.i32.store16(bcPtr, -atomSize - 5, 0, 6);
|
377
|
+
|
378
|
+
// Update bcPtr to point after the moved atom
|
379
|
+
bcPtr += 8;
|
380
|
+
} else if (char == 43) { // + (one or more)
|
381
|
+
// For +, atom executes once, then add fork for additional matches
|
382
|
+
Porffor.wasm.i32.store8(bcPtr, 0x21, 0, 0); // fork
|
383
|
+
if (lazy) {
|
384
|
+
Porffor.wasm.i32.store16(bcPtr, 5, 0, 1); // branch1: continue (done)
|
385
|
+
Porffor.wasm.i32.store16(bcPtr, -(bcPtr - lastAtomStart), 0, 3); // branch2: back to atom
|
386
|
+
} else {
|
387
|
+
Porffor.wasm.i32.store16(bcPtr, -(bcPtr - lastAtomStart), 0, 1); // branch1: back to atom
|
388
|
+
Porffor.wasm.i32.store16(bcPtr, 5, 0, 3); // branch2: continue (done)
|
389
|
+
}
|
390
|
+
bcPtr += 5;
|
391
|
+
} else { // ? (zero or one)
|
392
|
+
// Move atom forward to make space for fork
|
393
|
+
Porffor.wasm.memory.copy(lastAtomStart + 5, lastAtomStart, atomSize, 0, 0);
|
394
|
+
|
395
|
+
// Insert fork at atom start position
|
396
|
+
const forkPos: i32 = lastAtomStart;
|
397
|
+
Porffor.wasm.i32.store8(forkPos, 0x21, 0, 0); // fork
|
398
|
+
if (lazy) {
|
399
|
+
Porffor.wasm.i32.store16(forkPos, atomSize + 5, 0, 1); // branch1: skip atom
|
400
|
+
Porffor.wasm.i32.store16(forkPos, 5, 0, 3); // branch2: execute atom
|
401
|
+
} else {
|
402
|
+
Porffor.wasm.i32.store16(forkPos, 5, 0, 1); // branch1: execute atom
|
403
|
+
Porffor.wasm.i32.store16(forkPos, atomSize + 5, 0, 3); // branch2: skip atom
|
404
|
+
}
|
405
|
+
|
406
|
+
// Update bcPtr to point after the moved atom
|
407
|
+
bcPtr = lastAtomStart + 5 + atomSize;
|
408
|
+
}
|
409
|
+
lastWasAtom = false;
|
410
|
+
continue;
|
411
|
+
}
|
412
|
+
|
413
|
+
if (char == 123) { // {n,m}
|
414
|
+
// parse n
|
415
|
+
let n: i32 = 0;
|
416
|
+
let m: i32 = -1;
|
417
|
+
let sawComma: boolean = false;
|
418
|
+
let sawDigit: boolean = false;
|
419
|
+
while (patternPtr < patternEndPtr) {
|
420
|
+
const d: i32 = Porffor.wasm.i32.load8_u(patternPtr, 0, 4);
|
421
|
+
if (Porffor.fastAnd(d >= 48, d <= 57)) { // digit
|
422
|
+
n = n * 10 + (d - 48);
|
423
|
+
sawDigit = true;
|
424
|
+
patternPtr++;
|
425
|
+
continue;
|
426
|
+
}
|
427
|
+
|
428
|
+
if (d == 44) { // ','
|
429
|
+
sawComma = true;
|
430
|
+
patternPtr++;
|
431
|
+
break;
|
432
|
+
}
|
433
|
+
|
434
|
+
if (d == 125) { // '}'
|
435
|
+
patternPtr++;
|
436
|
+
break;
|
437
|
+
}
|
438
|
+
|
439
|
+
throw new SyntaxError('Regex parse: invalid {n,m} quantifier');
|
440
|
+
}
|
441
|
+
|
442
|
+
if (!sawDigit) throw new SyntaxError('Regex parse: invalid {n,m} quantifier');
|
443
|
+
if (patternPtr > patternEndPtr) throw new SyntaxError('Regex parse: unterminated {n,m} quantifier');
|
444
|
+
|
445
|
+
if (sawComma) {
|
446
|
+
// parse m (or none)
|
447
|
+
let mVal: i32 = 0;
|
448
|
+
let sawMDigit: boolean = false;
|
449
|
+
while (patternPtr < patternEndPtr) {
|
450
|
+
const d: i32 = Porffor.wasm.i32.load8_u(patternPtr, 0, 4);
|
451
|
+
if (Porffor.fastAnd(d >= 48, d <= 57)) {
|
452
|
+
mVal = mVal * 10 + (d - 48);
|
453
|
+
sawMDigit = true;
|
454
|
+
patternPtr++;
|
455
|
+
continue;
|
456
|
+
}
|
457
|
+
|
458
|
+
if (d == 125) {
|
459
|
+
patternPtr++;
|
460
|
+
break;
|
461
|
+
}
|
462
|
+
|
463
|
+
throw new SyntaxError('Regex parse: invalid {n,m} quantifier');
|
464
|
+
}
|
465
|
+
|
466
|
+
if (sawMDigit) {
|
467
|
+
m = mVal;
|
468
|
+
if (m < n) throw new SyntaxError('Regex parse: {n,m} with m < n');
|
469
|
+
} else {
|
470
|
+
m = -1; // open
|
471
|
+
}
|
472
|
+
} else {
|
473
|
+
m = n;
|
474
|
+
}
|
475
|
+
|
476
|
+
// check for lazy
|
477
|
+
let lazyBrace: boolean = false;
|
478
|
+
if (patternPtr < patternEndPtr && Porffor.wasm.i32.load8_u(patternPtr, 0, 4) == 63) { // '?'
|
479
|
+
lazyBrace = true;
|
480
|
+
patternPtr++;
|
481
|
+
}
|
482
|
+
|
483
|
+
// emit n times
|
484
|
+
for (let i: i32 = 1; i < n; i++) {
|
485
|
+
let len: i32 = bcPtr - lastAtomStart;
|
486
|
+
for (let j: i32 = 0; j < len; ++j) {
|
487
|
+
Porffor.wasm.i32.store8(bcPtr + j, Porffor.wasm.i32.load8_u(lastAtomStart + j, 0, 0), 0, 0);
|
488
|
+
}
|
489
|
+
bcPtr += len;
|
490
|
+
}
|
491
|
+
|
492
|
+
if (m == n) {
|
493
|
+
// exactly n
|
494
|
+
} else if (m == -1) {
|
495
|
+
// {n,} - infinite (like * after n mandatory matches)
|
496
|
+
Porffor.wasm.i32.store8(bcPtr, 0x21, 0, 0); // fork
|
497
|
+
if (lazyBrace) {
|
498
|
+
Porffor.wasm.i32.store16(bcPtr, 5, 0, 1); // branch1: continue (done)
|
499
|
+
Porffor.wasm.i32.store16(bcPtr, -(bcPtr - lastAtomStart), 0, 3); // branch2: back to atom
|
500
|
+
} else {
|
501
|
+
Porffor.wasm.i32.store16(bcPtr, -(bcPtr - lastAtomStart), 0, 1); // branch1: back to atom
|
502
|
+
Porffor.wasm.i32.store16(bcPtr, 5, 0, 3); // branch2: continue (done)
|
503
|
+
}
|
504
|
+
bcPtr += 5;
|
505
|
+
} else {
|
506
|
+
// {n,m} - exactly between n and m matches
|
507
|
+
// Create chain of forks, each executing atom inline
|
508
|
+
const atomSize: i32 = bcPtr - lastAtomStart;
|
509
|
+
for (let i: i32 = n; i < m; i++) {
|
510
|
+
Porffor.wasm.i32.store8(bcPtr, 0x21, 0, 0); // fork
|
511
|
+
if (lazyBrace) {
|
512
|
+
Porffor.wasm.i32.store16(bcPtr, 5 + atomSize, 0, 1); // branch1: skip this match
|
513
|
+
Porffor.wasm.i32.store16(bcPtr, 5, 0, 3); // branch2: execute atom
|
514
|
+
} else {
|
515
|
+
Porffor.wasm.i32.store16(bcPtr, 5, 0, 1); // branch1: execute atom
|
516
|
+
Porffor.wasm.i32.store16(bcPtr, 5 + atomSize, 0, 3); // branch2: skip this match
|
517
|
+
}
|
518
|
+
bcPtr += 5;
|
519
|
+
|
520
|
+
// Copy the atom inline
|
521
|
+
for (let j: i32 = 0; j < atomSize; j++) {
|
522
|
+
Porffor.wasm.i32.store8(bcPtr + j, Porffor.wasm.i32.load8_u(lastAtomStart + j, 0, 0), 0, 0);
|
523
|
+
}
|
524
|
+
bcPtr += atomSize;
|
525
|
+
}
|
526
|
+
}
|
527
|
+
|
528
|
+
continue;
|
529
|
+
}
|
530
|
+
} else {
|
531
|
+
// handle escapes outside class OR literal chars if escaped and not special
|
532
|
+
// backreference: \1, \2, ...
|
533
|
+
if (Porffor.fastAnd(char >= 49, char <= 57)) { // '1'-'9'
|
534
|
+
lastAtomStart = bcPtr;
|
535
|
+
Porffor.wasm.i32.store8(bcPtr, 0x0a, 0, 0); // back reference
|
536
|
+
Porffor.wasm.i32.store8(bcPtr, char - 48, 0, 1);
|
537
|
+
bcPtr += 2;
|
538
|
+
lastWasAtom = true;
|
539
|
+
continue;
|
540
|
+
}
|
541
|
+
|
542
|
+
if (char == 100) { // \d
|
543
|
+
lastAtomStart = bcPtr;
|
544
|
+
Porffor.wasm.i32.store8(bcPtr, 0x04, 0, 0); // predefined class
|
545
|
+
Porffor.wasm.i32.store8(bcPtr, 1, 0, 1); // digit
|
546
|
+
bcPtr += 2;
|
547
|
+
lastWasAtom = true;
|
548
|
+
continue;
|
549
|
+
}
|
550
|
+
if (char == 68) { // \D
|
551
|
+
lastAtomStart = bcPtr;
|
552
|
+
Porffor.wasm.i32.store8(bcPtr, 0x04, 0, 0);
|
553
|
+
Porffor.wasm.i32.store8(bcPtr, 2, 0, 1); // non-digit
|
554
|
+
bcPtr += 2;
|
555
|
+
lastWasAtom = true;
|
556
|
+
continue;
|
557
|
+
}
|
558
|
+
|
559
|
+
if (char == 115) { // \s
|
560
|
+
lastAtomStart = bcPtr;
|
561
|
+
Porffor.wasm.i32.store8(bcPtr, 0x04, 0, 0);
|
562
|
+
Porffor.wasm.i32.store8(bcPtr, 3, 0, 1); // space
|
563
|
+
bcPtr += 2;
|
564
|
+
lastWasAtom = true;
|
565
|
+
continue;
|
566
|
+
}
|
567
|
+
if (char == 83) { // \S
|
568
|
+
lastAtomStart = bcPtr;
|
569
|
+
Porffor.wasm.i32.store8(bcPtr, 0x04, 0, 0);
|
570
|
+
Porffor.wasm.i32.store8(bcPtr, 4, 0, 1); // non-space
|
571
|
+
bcPtr += 2;
|
572
|
+
lastWasAtom = true;
|
573
|
+
continue;
|
574
|
+
}
|
575
|
+
|
576
|
+
if (char == 119) { // \w
|
577
|
+
lastAtomStart = bcPtr;
|
578
|
+
Porffor.wasm.i32.store8(bcPtr, 0x04, 0, 0);
|
579
|
+
Porffor.wasm.i32.store8(bcPtr, 5, 0, 1); // word
|
580
|
+
bcPtr += 2;
|
581
|
+
lastWasAtom = true;
|
582
|
+
continue;
|
583
|
+
}
|
584
|
+
if (char == 87) { // \W
|
585
|
+
lastAtomStart = bcPtr;
|
586
|
+
Porffor.wasm.i32.store8(bcPtr, 0x04, 0, 0);
|
587
|
+
Porffor.wasm.i32.store8(bcPtr, 6, 0, 1); // non-word
|
588
|
+
bcPtr += 2;
|
589
|
+
lastWasAtom = true;
|
590
|
+
continue;
|
591
|
+
}
|
592
|
+
|
593
|
+
if (char == 98) { // \b
|
594
|
+
Porffor.wasm.i32.store8(bcPtr, 0x07, 0, 0); // word boundary
|
595
|
+
bcPtr += 1;
|
596
|
+
lastWasAtom = false;
|
597
|
+
continue;
|
598
|
+
}
|
599
|
+
if (char == 66) { // \B
|
600
|
+
Porffor.wasm.i32.store8(bcPtr, 0x08, 0, 0); // non-word boundary
|
601
|
+
bcPtr += 1;
|
602
|
+
lastWasAtom = false;
|
603
|
+
continue;
|
604
|
+
}
|
605
|
+
|
606
|
+
if (char == 110) { // \n
|
607
|
+
lastAtomStart = bcPtr;
|
608
|
+
Porffor.wasm.i32.store8(bcPtr, 0x01, 0, 0); // single
|
609
|
+
Porffor.wasm.i32.store8(bcPtr, 10, 0, 1);
|
610
|
+
bcPtr += 2;
|
611
|
+
lastWasAtom = true;
|
612
|
+
continue;
|
613
|
+
}
|
614
|
+
if (char == 114) { // \r
|
615
|
+
lastAtomStart = bcPtr;
|
616
|
+
Porffor.wasm.i32.store8(bcPtr, 0x01, 0, 0);
|
617
|
+
Porffor.wasm.i32.store8(bcPtr, 13, 0, 1);
|
618
|
+
bcPtr += 2;
|
619
|
+
lastWasAtom = true;
|
620
|
+
continue;
|
621
|
+
}
|
622
|
+
if (char == 116) { // \t
|
623
|
+
lastAtomStart = bcPtr;
|
624
|
+
Porffor.wasm.i32.store8(bcPtr, 0x01, 0, 0);
|
625
|
+
Porffor.wasm.i32.store8(bcPtr, 9, 0, 1);
|
626
|
+
bcPtr += 2;
|
627
|
+
lastWasAtom = true;
|
628
|
+
continue;
|
629
|
+
}
|
630
|
+
if (char == 118) { // \v
|
631
|
+
lastAtomStart = bcPtr;
|
632
|
+
Porffor.wasm.i32.store8(bcPtr, 0x01, 0, 0);
|
633
|
+
Porffor.wasm.i32.store8(bcPtr, 11, 0, 1);
|
634
|
+
bcPtr += 2;
|
635
|
+
lastWasAtom = true;
|
636
|
+
continue;
|
637
|
+
}
|
638
|
+
if (char == 102) { // \f
|
639
|
+
lastAtomStart = bcPtr;
|
640
|
+
Porffor.wasm.i32.store8(bcPtr, 0x01, 0, 0);
|
641
|
+
Porffor.wasm.i32.store8(bcPtr, 12, 0, 1);
|
642
|
+
bcPtr += 2;
|
643
|
+
lastWasAtom = true;
|
644
|
+
continue;
|
645
|
+
}
|
646
|
+
if (char == 48) { // \0
|
647
|
+
lastAtomStart = bcPtr;
|
648
|
+
Porffor.wasm.i32.store8(bcPtr, 0x01, 0, 0);
|
649
|
+
Porffor.wasm.i32.store8(bcPtr, 0, 0, 1);
|
650
|
+
bcPtr += 2;
|
651
|
+
lastWasAtom = true;
|
652
|
+
continue;
|
653
|
+
}
|
654
|
+
}
|
655
|
+
|
656
|
+
// default: emit single char (either a literal, or an escape that resolves to a literal)
|
657
|
+
lastAtomStart = bcPtr;
|
658
|
+
Porffor.wasm.i32.store8(bcPtr, 0x01, 0, 0);
|
659
|
+
Porffor.wasm.i32.store8(bcPtr, char, 0, 1);
|
660
|
+
bcPtr += 2;
|
661
|
+
lastWasAtom = true;
|
662
|
+
}
|
663
|
+
|
664
|
+
if (groupDepth != 0) throw new SyntaxError('Regex parse: Unmatched (');
|
665
|
+
if (inClass) throw new SyntaxError('Regex parse: Unmatched [');
|
666
|
+
|
667
|
+
// Accept
|
668
|
+
Porffor.wasm.i32.store8(bcPtr, 0x10, 0, 0);
|
669
|
+
|
670
|
+
// Patch any remaining alternation jumps at the end of the pattern
|
671
|
+
for (let i: i32 = 0; i < altStackPos.length; i++) {
|
672
|
+
Porffor.wasm.i32.store16(altStackPos[i], bcPtr - altStackPos[i] - 5, 0, 3);
|
673
|
+
}
|
674
|
+
|
675
|
+
return ptr as RegExp;
|
72
676
|
};
|
73
677
|
|
74
678
|
|
@@ -92,10 +696,12 @@ export const __RegExp_prototype_source$get = (_this: RegExp) => {
|
|
92
696
|
export const __RegExp_prototype_flags$get = (_this: RegExp) => {
|
93
697
|
// 1. Let R be the this value.
|
94
698
|
// 2. If R is not an Object, throw a TypeError exception.
|
95
|
-
if (!Porffor.object.isObject(_this)) throw new TypeError('
|
96
|
-
|
699
|
+
if (!Porffor.object.isObject(_this)) throw new TypeError('this is a non-object');
|
700
|
+
|
97
701
|
// 3. Let codeUnits be a new empty List.
|
98
|
-
|
702
|
+
const flags: i32 = Porffor.wasm.i32.load16_u(_this, 0, 4);
|
703
|
+
const result: bytestring = Porffor.allocateBytes(16);
|
704
|
+
|
99
705
|
// 4. Let hasIndices be ToBoolean(? Get(R, "hasIndices")).
|
100
706
|
// 5. If hasIndices is true, append the code unit 0x0064 (LATIN SMALL LETTER D) to codeUnits.
|
101
707
|
if (flags & 0b01000000) Porffor.bytestring.appendChar(result, 0x64);
|
@@ -120,40 +726,42 @@ export const __RegExp_prototype_flags$get = (_this: RegExp) => {
|
|
120
726
|
// 18. Let sticky be ToBoolean(? Get(R, "sticky")).
|
121
727
|
// 19. If sticky is true, append the code unit 0x0079 (LATIN SMALL LETTER Y) to codeUnits.
|
122
728
|
if (flags & 0b00100000) Porffor.bytestring.appendChar(result, 0x79);
|
123
|
-
|
729
|
+
|
730
|
+
// 20. Return the String value whose code units are the elements of the List codeUnits.
|
731
|
+
// If codeUnits has no elements, the empty String is returned.
|
124
732
|
return result;
|
125
733
|
};
|
126
734
|
|
127
735
|
export const __RegExp_prototype_global$get = (_this: RegExp) => {
|
128
|
-
return (Porffor.wasm.i32.
|
736
|
+
return (Porffor.wasm.i32.load16_u(_this, 0, 4) & 0b00000001) as boolean;
|
129
737
|
};
|
130
738
|
|
131
739
|
export const __RegExp_prototype_ignoreCase$get = (_this: RegExp) => {
|
132
|
-
return (Porffor.wasm.i32.
|
740
|
+
return (Porffor.wasm.i32.load16_u(_this, 0, 4) & 0b00000010) as boolean;
|
133
741
|
};
|
134
742
|
|
135
743
|
export const __RegExp_prototype_multiline$get = (_this: RegExp) => {
|
136
|
-
return (Porffor.wasm.i32.
|
744
|
+
return (Porffor.wasm.i32.load16_u(_this, 0, 4) & 0b00000100) as boolean;
|
137
745
|
};
|
138
746
|
|
139
747
|
export const __RegExp_prototype_dotAll$get = (_this: RegExp) => {
|
140
|
-
return (Porffor.wasm.i32.
|
748
|
+
return (Porffor.wasm.i32.load16_u(_this, 0, 4) & 0b00001000) as boolean;
|
141
749
|
};
|
142
750
|
|
143
751
|
export const __RegExp_prototype_unicode$get = (_this: RegExp) => {
|
144
|
-
return (Porffor.wasm.i32.
|
752
|
+
return (Porffor.wasm.i32.load16_u(_this, 0, 4) & 0b00010000) as boolean;
|
145
753
|
};
|
146
754
|
|
147
755
|
export const __RegExp_prototype_sticky$get = (_this: RegExp) => {
|
148
|
-
return (Porffor.wasm.i32.
|
756
|
+
return (Porffor.wasm.i32.load16_u(_this, 0, 4) & 0b00100000) as boolean;
|
149
757
|
};
|
150
758
|
|
151
759
|
export const __RegExp_prototype_hasIndices$get = (_this: RegExp) => {
|
152
|
-
return (Porffor.wasm.i32.
|
760
|
+
return (Porffor.wasm.i32.load16_u(_this, 0, 4) & 0b01000000) as boolean;
|
153
761
|
};
|
154
762
|
|
155
763
|
export const __RegExp_prototype_unicodeSets$get = (_this: RegExp) => {
|
156
|
-
return (Porffor.wasm.i32.
|
764
|
+
return (Porffor.wasm.i32.load16_u(_this, 0, 4) & 0b10000000) as boolean;
|
157
765
|
};
|
158
766
|
|
159
767
|
export const __RegExp_prototype_toString = (_this: RegExp) => {
|