porffor 0.57.26 → 0.57.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -7
- package/compiler/builtins/regexp.ts +128 -0
- package/compiler/builtins_precompiled.js +411 -344
- package/compiler/codegen.js +55 -104
- package/compiler/wrap.js +8 -0
- package/foo.js +8 -0
- package/package.json +1 -1
- package/runtime/index.js +1 -1
- package/rhemyn/README.md +0 -40
- package/rhemyn/compile.js +0 -331
- package/rhemyn/parse.js +0 -323
- package/rhemyn/test/parse.js +0 -59
package/compiler/codegen.js
CHANGED
@@ -4,7 +4,6 @@ import { operatorOpcode } from './expression.js';
|
|
4
4
|
import { BuiltinFuncs, BuiltinVars, importedFuncs, NULL, UNDEFINED } from './builtins.js';
|
5
5
|
import { PrototypeFuncs } from './prototype.js';
|
6
6
|
import { TYPES, TYPE_FLAGS, TYPE_NAMES } from './types.js';
|
7
|
-
import * as Rhemyn from '../rhemyn/compile.js';
|
8
7
|
import parse from './parse.js';
|
9
8
|
import { log } from './log.js';
|
10
9
|
import { allocPage, allocStr } from './allocator.js';
|
@@ -1256,28 +1255,16 @@ const generateBinaryExp = (scope, decl) => {
|
|
1256
1255
|
const asmFuncToAsm = (scope, func, extra) => func(scope, {
|
1257
1256
|
Valtype, Opcodes, TYPES, TYPE_NAMES, usedTypes, typeSwitch, makeString, internalThrow,
|
1258
1257
|
getNodeType, generate, generateIdent,
|
1259
|
-
builtin: (
|
1260
|
-
let idx = importedFuncs[
|
1261
|
-
if (idx == null
|
1262
|
-
includeBuiltin(scope, n);
|
1263
|
-
idx = funcIndex[n];
|
1264
|
-
}
|
1265
|
-
|
1266
|
-
scope.includes ??= new Set();
|
1267
|
-
scope.includes.add(n);
|
1268
|
-
|
1269
|
-
if (idx == null) throw new Error(`builtin('${n}') failed: could not find func (from ${scope.name})`);
|
1258
|
+
builtin: (name, offset = false) => {
|
1259
|
+
let idx = importedFuncs[name] ?? includeBuiltin(scope, name)?.index;
|
1260
|
+
if (idx == null) throw new Error(`builtin('${name}') failed: could not find func (from ${scope.name})`);
|
1270
1261
|
if (offset) idx -= importedFuncs.length;
|
1271
1262
|
|
1272
1263
|
return idx;
|
1273
1264
|
},
|
1274
1265
|
hasFunc: x => funcIndex[x] != null,
|
1275
1266
|
funcRef: name => {
|
1276
|
-
|
1277
|
-
includeBuiltin(scope, name);
|
1278
|
-
}
|
1279
|
-
|
1280
|
-
const func = funcByName(name);
|
1267
|
+
const func = includeBuiltin(scope, name);
|
1281
1268
|
return funcRef(func);
|
1282
1269
|
},
|
1283
1270
|
glbl: (opcode, name, type) => {
|
@@ -1341,7 +1328,7 @@ const asmFunc = (name, { wasm, params = [], typedParams = false, locals: localTy
|
|
1341
1328
|
wasm = [];
|
1342
1329
|
}
|
1343
1330
|
|
1344
|
-
const existing =
|
1331
|
+
const existing = builtinFuncByName(name);
|
1345
1332
|
if (existing) return existing;
|
1346
1333
|
|
1347
1334
|
const allLocals = params.concat(localTypes);
|
@@ -1580,9 +1567,7 @@ const getNodeType = (scope, node) => {
|
|
1580
1567
|
|
1581
1568
|
if (node.type === 'Literal') {
|
1582
1569
|
if (node.regex) return TYPES.regexp;
|
1583
|
-
|
1584
1570
|
if (typeof node.value === 'string' && byteStringable(node.value)) return TYPES.bytestring;
|
1585
|
-
|
1586
1571
|
return TYPES[typeof node.value];
|
1587
1572
|
}
|
1588
1573
|
|
@@ -1786,9 +1771,25 @@ const getNodeType = (scope, node) => {
|
|
1786
1771
|
const generateLiteral = (scope, decl, global, name) => {
|
1787
1772
|
if (decl.value === null) return [ number(NULL) ];
|
1788
1773
|
|
1789
|
-
// hack: just return 1 for regex literals
|
1790
1774
|
if (decl.regex) {
|
1791
|
-
|
1775
|
+
// todo/opt: separate aot compiling regex engine for compile-time known regex (literals, known RegExp args)
|
1776
|
+
return generate(scope, {
|
1777
|
+
type: 'CallExpression',
|
1778
|
+
callee: {
|
1779
|
+
type: 'Identifier',
|
1780
|
+
name: 'RegExp'
|
1781
|
+
},
|
1782
|
+
arguments: [
|
1783
|
+
{
|
1784
|
+
type: 'Literal',
|
1785
|
+
value: decl.regex.pattern
|
1786
|
+
},
|
1787
|
+
{
|
1788
|
+
type: 'Literal',
|
1789
|
+
value: decl.regex.flags
|
1790
|
+
}
|
1791
|
+
]
|
1792
|
+
});
|
1792
1793
|
}
|
1793
1794
|
|
1794
1795
|
switch (typeof decl.value) {
|
@@ -2152,35 +2153,6 @@ const generateCall = (scope, decl, _global, _name, unusedValue = false) => {
|
|
2152
2153
|
const prop = (decl.callee.expression ?? decl.callee).property;
|
2153
2154
|
const object = (decl.callee.expression ?? decl.callee).object;
|
2154
2155
|
|
2155
|
-
// megahack for /regex/.func()
|
2156
|
-
if (object?.regex && ['test'].includes(prop.name)) {
|
2157
|
-
const regex = object.regex.pattern;
|
2158
|
-
const rhemynName = `regex_${prop.name}_${sanitize(regex)}`;
|
2159
|
-
|
2160
|
-
if (!funcIndex[rhemynName]) {
|
2161
|
-
const func = Rhemyn[prop.name](regex, currentFuncIndex++, rhemynName);
|
2162
|
-
func.internal = true;
|
2163
|
-
|
2164
|
-
funcIndex[func.name] = func.index;
|
2165
|
-
funcs.push(func);
|
2166
|
-
}
|
2167
|
-
|
2168
|
-
const arg = decl.arguments[0] ?? DEFAULT_VALUE();
|
2169
|
-
const idx = funcIndex[rhemynName];
|
2170
|
-
return [
|
2171
|
-
// make string arg
|
2172
|
-
...generate(scope, arg),
|
2173
|
-
Opcodes.i32_to_u,
|
2174
|
-
...getNodeType(scope, arg),
|
2175
|
-
|
2176
|
-
// call regex func
|
2177
|
-
[ Opcodes.call, idx ],
|
2178
|
-
Opcodes.i32_from_u,
|
2179
|
-
|
2180
|
-
...setLastType(scope, Rhemyn.types[prop.name])
|
2181
|
-
];
|
2182
|
-
}
|
2183
|
-
|
2184
2156
|
protoName = prop?.name;
|
2185
2157
|
target = object;
|
2186
2158
|
}
|
@@ -2213,42 +2185,6 @@ const generateCall = (scope, decl, _global, _name, unusedValue = false) => {
|
|
2213
2185
|
});
|
2214
2186
|
}
|
2215
2187
|
|
2216
|
-
if (['search'].includes(protoName)) {
|
2217
|
-
const regex = decl.arguments[0]?.regex?.pattern;
|
2218
|
-
if (!regex) return [
|
2219
|
-
// no/bad regex arg, return -1/0 for now
|
2220
|
-
...generate(scope, target),
|
2221
|
-
[ Opcodes.drop ],
|
2222
|
-
|
2223
|
-
number(Rhemyn.types[protoName] === TYPES.number ? -1 : 0),
|
2224
|
-
...setLastType(scope, Rhemyn.types[protoName])
|
2225
|
-
];
|
2226
|
-
|
2227
|
-
const rhemynName = `regex_${protoName}_${sanitize(regex)}`;
|
2228
|
-
|
2229
|
-
if (!funcIndex[rhemynName]) {
|
2230
|
-
const func = Rhemyn[protoName](regex, currentFuncIndex++, rhemynName);
|
2231
|
-
func.internal = true;
|
2232
|
-
|
2233
|
-
funcIndex[func.name] = func.index;
|
2234
|
-
funcs.push(func);
|
2235
|
-
}
|
2236
|
-
|
2237
|
-
const idx = funcIndex[rhemynName];
|
2238
|
-
return [
|
2239
|
-
// make string arg
|
2240
|
-
...generate(scope, target),
|
2241
|
-
Opcodes.i32_to_u,
|
2242
|
-
...getNodeType(scope, target),
|
2243
|
-
|
2244
|
-
// call regex func
|
2245
|
-
[ Opcodes.call, idx ],
|
2246
|
-
Opcodes.i32_from,
|
2247
|
-
|
2248
|
-
...setLastType(scope, Rhemyn.types[protoName])
|
2249
|
-
];
|
2250
|
-
}
|
2251
|
-
|
2252
2188
|
const protoBC = {};
|
2253
2189
|
const builtinProtoCands = Object.keys(builtinFuncs).filter(x => x.startsWith('__') && x.endsWith('_prototype_' + protoName));
|
2254
2190
|
|
@@ -2550,20 +2486,24 @@ const generateCall = (scope, decl, _global, _name, unusedValue = false) => {
|
|
2550
2486
|
[ Opcodes.local_get, thisLocal ],
|
2551
2487
|
[ Opcodes.local_get, thisLocalType ]
|
2552
2488
|
];
|
2553
|
-
getCallee =
|
2554
|
-
|
2555
|
-
|
2556
|
-
|
2557
|
-
|
2558
|
-
|
2559
|
-
|
2560
|
-
|
2561
|
-
|
2562
|
-
|
2563
|
-
|
2564
|
-
|
2565
|
-
|
2566
|
-
|
2489
|
+
getCallee = generate(scope, {
|
2490
|
+
type: 'MemberExpression',
|
2491
|
+
object: {
|
2492
|
+
type: 'Wasm',
|
2493
|
+
wasm: () => [
|
2494
|
+
...generate(scope, object),
|
2495
|
+
[ Opcodes.local_tee, thisLocal ],
|
2496
|
+
...getNodeType(scope, object),
|
2497
|
+
[ Opcodes.local_set, thisLocalType ]
|
2498
|
+
],
|
2499
|
+
_type: [
|
2500
|
+
[ Opcodes.local_get, thisLocalType ]
|
2501
|
+
]
|
2502
|
+
},
|
2503
|
+
property,
|
2504
|
+
computed,
|
2505
|
+
optional
|
2506
|
+
});
|
2567
2507
|
}
|
2568
2508
|
}
|
2569
2509
|
|
@@ -3583,6 +3523,9 @@ const memberTmpNames = scope => {
|
|
3583
3523
|
};
|
3584
3524
|
};
|
3585
3525
|
|
3526
|
+
// todo: generate this array procedurally
|
3527
|
+
const builtinPrototypeGets = ['size', 'description', 'byteLength', 'byteOffset', 'buffer', 'detached', 'resizable', 'growable', 'maxByteLength', 'name', 'message', 'constructor', 'source', 'flags', 'global', 'ignoreCase', 'multiline', 'dotAll', 'unicode', 'sticky', 'hasIndices', 'unicodeSets'];
|
3528
|
+
|
3586
3529
|
const ctHash = prop => {
|
3587
3530
|
if (!Prefs.ctHash || !prop ||
|
3588
3531
|
prop.computed || prop.optional ||
|
@@ -3618,7 +3561,9 @@ const coctcOffset = prop => {
|
|
3618
3561
|
) return 0;
|
3619
3562
|
|
3620
3563
|
prop = prop.property.name;
|
3621
|
-
if (!prop ||
|
3564
|
+
if (!prop || builtinPrototypeGets.includes(prop) ||
|
3565
|
+
prop === 'prototype' || prop === 'length' || prop === '__proto__'
|
3566
|
+
) return 0;
|
3622
3567
|
|
3623
3568
|
let offset = coctc.get(prop);
|
3624
3569
|
if (offset == null) {
|
@@ -5853,8 +5798,7 @@ const generateMember = (scope, decl, _global, _name) => {
|
|
5853
5798
|
const type = getNodeType(scope, object);
|
5854
5799
|
const known = knownType(scope, type);
|
5855
5800
|
|
5856
|
-
|
5857
|
-
if (['size', 'description', 'byteLength', 'byteOffset', 'buffer', 'detached', 'resizable', 'growable', 'maxByteLength', 'name', 'message', 'constructor'].includes(decl.property.name)) {
|
5801
|
+
if (builtinPrototypeGets.includes(decl.property.name)) {
|
5858
5802
|
// todo: support optional
|
5859
5803
|
const bc = {};
|
5860
5804
|
const cands = Object.keys(builtinFuncs).filter(x => x.startsWith('__') && x.endsWith('_prototype_' + decl.property.name + '$get'));
|
@@ -6587,6 +6531,13 @@ const funcByIndex = idx => {
|
|
6587
6531
|
};
|
6588
6532
|
const funcByName = name => funcByIndex(funcIndex[name]);
|
6589
6533
|
|
6534
|
+
const builtinFuncByName = name => {
|
6535
|
+
const normal = funcByName(name);
|
6536
|
+
if (!normal || normal.internal) return normal;
|
6537
|
+
|
6538
|
+
return funcs.find(x => x.name === name && x.internal);
|
6539
|
+
};
|
6540
|
+
|
6590
6541
|
const generateFunc = (scope, decl, forceNoExpr = false) => {
|
6591
6542
|
doNotMarkFuncRef = false;
|
6592
6543
|
|
package/compiler/wrap.js
CHANGED
@@ -348,6 +348,14 @@ ${flags & 0b0001 ? ` get func idx: ${get}
|
|
348
348
|
return negative ? -result : result;
|
349
349
|
}
|
350
350
|
|
351
|
+
case TYPES.regexp: {
|
352
|
+
const [ pattern, flags ] = read(Uint32Array, memory, value, 2);
|
353
|
+
return new RegExp(
|
354
|
+
porfToJSValue({ memory, funcs, pages }, pattern, TYPES.bytestring),
|
355
|
+
porfToJSValue({ memory, funcs, pages }, flags, TYPES.bytestring)
|
356
|
+
);
|
357
|
+
}
|
358
|
+
|
351
359
|
default: return value;
|
352
360
|
}
|
353
361
|
};
|
package/foo.js
ADDED
package/package.json
CHANGED
package/runtime/index.js
CHANGED
package/rhemyn/README.md
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
# Rhemyn
|
2
|
-
A basic experimental WIP regex engine/AOT Wasm compiler in JS. Regex engine for Porffor! Uses own regex parser, no dependencies (excluding porffor internals). <br>
|
3
|
-
Age: ~1 day (of work)
|
4
|
-
|
5
|
-
Made for use with Porffor but could possibly be adapted, implementation/library notes:
|
6
|
-
- Exposes functions for each regex "operation" (eg test, match)
|
7
|
-
- Given a regex pattern string (eg `a+`), it returns a "function" object
|
8
|
-
- Wasm function returned expects an i32 pointer to a UTF-16 string (can add UTF-8 option later if someone else actually wants to use this)
|
9
|
-
|
10
|
-
## syntax
|
11
|
-
🟢 supported 🟡 partial 🟠 parsed only 🔴 unsupported
|
12
|
-
|
13
|
-
- 🟢 literal characters (eg `a`)
|
14
|
-
- 🟢 escaping (eg `\.\n\cJ\x0a\u000a`)
|
15
|
-
- 🟢 character itself (eg `\.`)
|
16
|
-
- 🟢 escape sequences (eg `\n`)
|
17
|
-
- 🟢 control character (eg `\cJ`)
|
18
|
-
- 🟢 unicode code points (eg `\x00`, `\u0000`)
|
19
|
-
- 🟢 sets (eg `[ab]`)
|
20
|
-
- 🟢 ranges (eg `[a-z]`)
|
21
|
-
- 🟢 negated sets (eg `[^ab]`)
|
22
|
-
- 🟢 metacharacters
|
23
|
-
- 🟢 dot (eg `a.b`)
|
24
|
-
- 🟢 digit, not digit (eg `\d\D`)
|
25
|
-
- 🟢 word, not word (eg `\w\W`)
|
26
|
-
- 🟢 whitespace, not whitespace (eg `\s\S`)
|
27
|
-
- 🟡 quantifiers
|
28
|
-
- 🟡 star (eg `a*`)
|
29
|
-
- 🟡 plus (eg `a+`)
|
30
|
-
- 🟡 optional (eg `a?`)
|
31
|
-
- 🟠 lazy modifier (eg `a*?`)
|
32
|
-
- 🔴 n repetitions (eg `a{4}`)
|
33
|
-
- 🔴 n-m repetitions (eg `a{2,4}`)
|
34
|
-
- 🟠 groups
|
35
|
-
- 🟠 capturing groups (`(a)`)
|
36
|
-
- 🔴 non-capturing groups (`(?:a)`)
|
37
|
-
- 🔴 assertions
|
38
|
-
- 🔴 beginning (eg `^a`)
|
39
|
-
- 🔴 end (eg `a$`)
|
40
|
-
- 🔴 word boundary assertion (eg `\b\B`)
|
package/rhemyn/compile.js
DELETED
@@ -1,331 +0,0 @@
|
|
1
|
-
import { Blocktype, Opcodes, Valtype, ValtypeSize } from '../compiler/wasmSpec.js';
|
2
|
-
import { number } from '../compiler/encoding.js';
|
3
|
-
import parse from './parse.js';
|
4
|
-
import { TYPES } from '../compiler/types.js';
|
5
|
-
import '../compiler/prefs.js';
|
6
|
-
|
7
|
-
// local indexes
|
8
|
-
const BasePointer = 0; // base string pointer
|
9
|
-
const Counter = 2; // what char we are running on
|
10
|
-
const Pointer = 3; // next char pointer
|
11
|
-
const Length = 4;
|
12
|
-
const Tmp = 5;
|
13
|
-
const QuantifierTmp = 6; // the temporary variable used for quanitifers
|
14
|
-
|
15
|
-
const doesSucceedZero = node => {
|
16
|
-
for (const n of node.body) {
|
17
|
-
if (n.type === 'Group') {
|
18
|
-
if (!doesSucceedZero(n)) return false;
|
19
|
-
}
|
20
|
-
|
21
|
-
if (!n.quantifier || n.quantifier[0] > 0) {
|
22
|
-
return false;
|
23
|
-
}
|
24
|
-
}
|
25
|
-
|
26
|
-
return true;
|
27
|
-
}
|
28
|
-
|
29
|
-
const generate = (node, negated = false, get = true, stringSize = 2, func = 'test') => {
|
30
|
-
let out = [];
|
31
|
-
switch (node.type) {
|
32
|
-
case 'Expression':
|
33
|
-
let succeedsZero = doesSucceedZero(node);
|
34
|
-
|
35
|
-
out = [
|
36
|
-
// set length local
|
37
|
-
[ Opcodes.local_get, BasePointer ],
|
38
|
-
[ Opcodes.i32_load, Math.log2(ValtypeSize.i32) - 1, 0 ],
|
39
|
-
[ Opcodes.local_tee, Length ],
|
40
|
-
|
41
|
-
number(0, Valtype.i32),
|
42
|
-
[ Opcodes.i32_eq ],
|
43
|
-
[ Opcodes.if, Blocktype.void ],
|
44
|
-
number(succeedsZero ? 1 : 0, Valtype.i32),
|
45
|
-
[ Opcodes.return ],
|
46
|
-
[ Opcodes.end ],
|
47
|
-
|
48
|
-
// pointer = base + sizeof i32
|
49
|
-
[ Opcodes.local_get, BasePointer ],
|
50
|
-
number(ValtypeSize.i32, Valtype.i32),
|
51
|
-
[ Opcodes.i32_add ],
|
52
|
-
[ Opcodes.local_set, Pointer ],
|
53
|
-
|
54
|
-
[ Opcodes.loop, Blocktype.void ],
|
55
|
-
[ Opcodes.block, Blocktype.void ],
|
56
|
-
// generate checks
|
57
|
-
...node.body.flatMap(x => generate(x, negated, true, stringSize, func)),
|
58
|
-
|
59
|
-
// reached end without branching out, successful match
|
60
|
-
...({
|
61
|
-
test: [
|
62
|
-
number(1, Valtype.i32)
|
63
|
-
],
|
64
|
-
search: [
|
65
|
-
[ Opcodes.local_get, Counter ]
|
66
|
-
]
|
67
|
-
})[func],
|
68
|
-
[ Opcodes.return ],
|
69
|
-
[ Opcodes.end ],
|
70
|
-
|
71
|
-
// counter++, if length > counter, loop
|
72
|
-
[ Opcodes.local_get, Length ],
|
73
|
-
|
74
|
-
[ Opcodes.local_get, Counter ],
|
75
|
-
number(1, Valtype.i32),
|
76
|
-
[ Opcodes.i32_add ],
|
77
|
-
[ Opcodes.local_tee, Counter ],
|
78
|
-
|
79
|
-
[ Opcodes.i32_gt_s ],
|
80
|
-
|
81
|
-
[ Opcodes.br_if, 0 ],
|
82
|
-
[ Opcodes.end ],
|
83
|
-
|
84
|
-
// no match
|
85
|
-
number(({
|
86
|
-
test: 0,
|
87
|
-
search: -1
|
88
|
-
})[func], Valtype.i32)
|
89
|
-
];
|
90
|
-
|
91
|
-
if (Prefs.regexLog) {
|
92
|
-
const underline = x => `\u001b[4m\u001b[1m${x}\u001b[0m`;
|
93
|
-
console.log(`\n${underline('ast')}`);
|
94
|
-
console.log(node);
|
95
|
-
console.log(`\n${underline('wasm bytecode')}\n` + disassemble(out) + '\n');
|
96
|
-
}
|
97
|
-
|
98
|
-
break;
|
99
|
-
|
100
|
-
case 'Character':
|
101
|
-
out = generateChar(node, node.negated ^ negated, get, stringSize);
|
102
|
-
break;
|
103
|
-
|
104
|
-
case 'Set':
|
105
|
-
out = generateSet(node, node.negated, get, stringSize);
|
106
|
-
break;
|
107
|
-
|
108
|
-
case 'Group':
|
109
|
-
out = generateGroup(node, negated, get, stringSize);
|
110
|
-
break;
|
111
|
-
|
112
|
-
case 'Range':
|
113
|
-
out = generateRange(node, negated, get, stringSize);
|
114
|
-
break;
|
115
|
-
}
|
116
|
-
|
117
|
-
return out;
|
118
|
-
};
|
119
|
-
|
120
|
-
const getNextChar = (stringSize, peek = false) => [
|
121
|
-
// get char from pointer
|
122
|
-
[ Opcodes.local_get, Pointer ],
|
123
|
-
[ stringSize == 2 ? Opcodes.i32_load16_u : Opcodes.i32_load8_u, 0, 0 ],
|
124
|
-
|
125
|
-
...(peek ? [] : [
|
126
|
-
// pointer += string size
|
127
|
-
[ Opcodes.local_get, Pointer ],
|
128
|
-
number(stringSize, Valtype.i32),
|
129
|
-
[ Opcodes.i32_add ],
|
130
|
-
[ Opcodes.local_set, Pointer ]
|
131
|
-
])
|
132
|
-
];
|
133
|
-
|
134
|
-
const checkFailure = () => [
|
135
|
-
// surely we do not need to do this for every single mismatch, right?
|
136
|
-
/* [ Opcodes.if, Blocktype.void ],
|
137
|
-
number(0, Valtype.i32),
|
138
|
-
[ Opcodes.return ],
|
139
|
-
[ Opcodes.end ], */
|
140
|
-
|
141
|
-
[ Opcodes.br_if, 0 ]
|
142
|
-
];
|
143
|
-
|
144
|
-
const wrapQuantifier = (node, method, get, stringSize) => {
|
145
|
-
const [ min, max ] = node.quantifier;
|
146
|
-
return [
|
147
|
-
// initalize our temp value (number of matched characters)
|
148
|
-
number(0, Valtype.i32),
|
149
|
-
[Opcodes.local_set, QuantifierTmp],
|
150
|
-
|
151
|
-
// if len - counter == 0, if min == 0, succeed, else fail
|
152
|
-
[ Opcodes.local_get, Length ],
|
153
|
-
[ Opcodes.local_get, Counter ],
|
154
|
-
[ Opcodes.i32_sub ],
|
155
|
-
number(0, Valtype.i32),
|
156
|
-
[ Opcodes.i32_eq ],
|
157
|
-
...(min == 0 ? [
|
158
|
-
[ Opcodes.if, Blocktype.void ],
|
159
|
-
] : [
|
160
|
-
[ Opcodes.br_if, 0 ],
|
161
|
-
]),
|
162
|
-
|
163
|
-
// start loop
|
164
|
-
[Opcodes.loop, Blocktype.void],
|
165
|
-
[ Opcodes.block, Blocktype.void ],
|
166
|
-
// if counter + tmp == length, break
|
167
|
-
[ Opcodes.local_get, Counter ],
|
168
|
-
[ Opcodes.local_get, QuantifierTmp ],
|
169
|
-
[ Opcodes.i32_add ],
|
170
|
-
[ Opcodes.local_get, Length ],
|
171
|
-
[ Opcodes.i32_eq ],
|
172
|
-
[ Opcodes.br_if, 0 ],
|
173
|
-
|
174
|
-
// if doesn't match, break
|
175
|
-
...method,
|
176
|
-
[Opcodes.br_if, 0 ],
|
177
|
-
...(get ? [
|
178
|
-
// pointer += stringSize
|
179
|
-
[ Opcodes.local_get, Pointer ],
|
180
|
-
number(stringSize, Valtype.i32),
|
181
|
-
[ Opcodes.i32_add ],
|
182
|
-
[ Opcodes.local_set, Pointer ]
|
183
|
-
] : []),
|
184
|
-
|
185
|
-
// if maximum was reached, break
|
186
|
-
...(max ? [
|
187
|
-
[ Opcodes.local_get, QuantifierTmp ],
|
188
|
-
number(max, Valtype.i32),
|
189
|
-
[ Opcodes.i32_eq ],
|
190
|
-
[ Opcodes.br_if, 0 ]
|
191
|
-
] : []),
|
192
|
-
|
193
|
-
[ Opcodes.local_get, QuantifierTmp ],
|
194
|
-
number(1, Valtype.i32),
|
195
|
-
[ Opcodes.i32_add ],
|
196
|
-
[ Opcodes.local_set, QuantifierTmp ],
|
197
|
-
[ Opcodes.br, 1 ],
|
198
|
-
[ Opcodes.end ],
|
199
|
-
[ Opcodes.end ],
|
200
|
-
|
201
|
-
// if less than minimum, fail
|
202
|
-
[Opcodes.local_get, QuantifierTmp],
|
203
|
-
number(min, Valtype.i32),
|
204
|
-
[Opcodes.i32_lt_s],
|
205
|
-
...(get ? checkFailure(): []),
|
206
|
-
|
207
|
-
...(min == 0 ? [ [ Opcodes.end ] ] : []),
|
208
|
-
];
|
209
|
-
}
|
210
|
-
|
211
|
-
const generateChar = (node, negated, get, stringSize) => {
|
212
|
-
const hasQuantifier = !!node.quantifier;
|
213
|
-
const out = [
|
214
|
-
...(get ? getNextChar(stringSize, hasQuantifier) : []),
|
215
|
-
number(node.char.charCodeAt(0), Valtype.i32),
|
216
|
-
negated ? [ Opcodes.i32_eq ] : [ Opcodes.i32_ne ],
|
217
|
-
];
|
218
|
-
|
219
|
-
if (node.quantifier) {
|
220
|
-
return wrapQuantifier(node, out, get, stringSize);
|
221
|
-
}
|
222
|
-
|
223
|
-
return [
|
224
|
-
...out,
|
225
|
-
...(get ? checkFailure(): []),
|
226
|
-
];
|
227
|
-
};
|
228
|
-
|
229
|
-
const generateSet = (node, negated, get, stringSize) => {
|
230
|
-
// for a single char we do not need a tmp, it is like just
|
231
|
-
const singleChar = node.body.length === 1 && node.body[0].type === 'Character';
|
232
|
-
if (singleChar) return generateChar(node.body[0], negated, get, stringSize)
|
233
|
-
|
234
|
-
const hasQuantifier = !!node.quantifier;
|
235
|
-
|
236
|
-
const out = [
|
237
|
-
...(get ? getNextChar(stringSize, hasQuantifier) : []),
|
238
|
-
[ Opcodes.local_set, Tmp ],
|
239
|
-
];
|
240
|
-
|
241
|
-
for (const x of node.body) {
|
242
|
-
out.push(
|
243
|
-
[ Opcodes.local_get, Tmp ],
|
244
|
-
...generate(x, negated, false, stringSize)
|
245
|
-
);
|
246
|
-
}
|
247
|
-
|
248
|
-
if (node.body.length > 0) {
|
249
|
-
for (let i = 0; i < node.body.length - 1; i++) {
|
250
|
-
out.push(negated ? [ Opcodes.i32_or ] : [ Opcodes.i32_and ])
|
251
|
-
}
|
252
|
-
};
|
253
|
-
|
254
|
-
if (hasQuantifier) {
|
255
|
-
return wrapQuantifier(node, out, get, stringSize);
|
256
|
-
}
|
257
|
-
|
258
|
-
return [
|
259
|
-
...out,
|
260
|
-
...checkFailure(),
|
261
|
-
];
|
262
|
-
};
|
263
|
-
|
264
|
-
const generateRange = (node, negated, get, stringSize) => {
|
265
|
-
return [
|
266
|
-
...(get ? getNextChar(stringSize) : []),
|
267
|
-
...(get ? [ [ Opcodes.local_tee, Tmp ] ] : []),
|
268
|
-
|
269
|
-
number(node.from.charCodeAt(0), Valtype.i32),
|
270
|
-
// negated ? [ Opcodes.i32_lt_s ] : [ Opcodes.i32_ge_s ],
|
271
|
-
negated ? [ Opcodes.i32_ge_s ] : [ Opcodes.i32_lt_s ],
|
272
|
-
|
273
|
-
[ Opcodes.local_get, Tmp ],
|
274
|
-
number(node.to.charCodeAt(0), Valtype.i32),
|
275
|
-
// negated ? [ Opcodes.i32_gt_s ] : [ Opcodes.i32_le_s ],
|
276
|
-
negated ? [ Opcodes.i32_le_s ] : [ Opcodes.i32_gt_s ],
|
277
|
-
|
278
|
-
negated ? [ Opcodes.i32_and ] : [ Opcodes.i32_or ],
|
279
|
-
...(get ? checkFailure(): [])
|
280
|
-
];
|
281
|
-
};
|
282
|
-
|
283
|
-
const generateGroup = (node, negated, get) => {
|
284
|
-
// todo
|
285
|
-
return [];
|
286
|
-
};
|
287
|
-
|
288
|
-
const wrapFunc = (regex, func, name, index) => {
|
289
|
-
const parsed = parse(regex);
|
290
|
-
|
291
|
-
return outputFunc([
|
292
|
-
[ Opcodes.local_get, 1 ],
|
293
|
-
number(TYPES.string, Valtype.i32),
|
294
|
-
[ Opcodes.i32_eq ],
|
295
|
-
[ Opcodes.if, Valtype.i32 ],
|
296
|
-
// string
|
297
|
-
...generate(parsed, false, true, 2, func),
|
298
|
-
[ Opcodes.else ],
|
299
|
-
// bytestring
|
300
|
-
...generate(parsed, false, true, 1, func),
|
301
|
-
[ Opcodes.end ]
|
302
|
-
], name, index, types[func]);
|
303
|
-
};
|
304
|
-
|
305
|
-
export const test = (regex, index = 0, name = 'regex_test_' + regex) => wrapFunc(regex, 'test', name, index);
|
306
|
-
export const search = (regex, index = 0, name = 'regex_search_' + regex) => wrapFunc(regex, 'search', name, index);
|
307
|
-
|
308
|
-
export const types = {
|
309
|
-
test: TYPES.boolean,
|
310
|
-
search: TYPES.number
|
311
|
-
};
|
312
|
-
|
313
|
-
const outputFunc = (wasm, name, index, returnType) => ({
|
314
|
-
name,
|
315
|
-
index,
|
316
|
-
wasm,
|
317
|
-
returnType,
|
318
|
-
|
319
|
-
export: true,
|
320
|
-
params: [ Valtype.i32, Valtype.i32 ],
|
321
|
-
returns: [ Valtype.i32 ],
|
322
|
-
locals: {
|
323
|
-
basePointer: { idx: 0, type: Valtype.i32 },
|
324
|
-
inputType: { idx: 1, type: Valtype.i32 },
|
325
|
-
counter: { idx: 2, type: Valtype.i32 },
|
326
|
-
pointer: { idx: 3, type: Valtype.i32 },
|
327
|
-
length: { idx: 4, type: Valtype.i32 },
|
328
|
-
tmp: { idx: 5, type: Valtype.i32 },
|
329
|
-
quantifierTmp: { idx: 6, type: Valtype.i32 },
|
330
|
-
}
|
331
|
-
});
|