porffor 0.57.25 → 0.57.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -7
- package/compiler/builtins/regexp.ts +128 -0
- package/compiler/builtins_precompiled.js +411 -344
- package/compiler/codegen.js +26 -78
- package/compiler/prefs.js +1 -6
- package/compiler/wrap.js +8 -0
- package/package.json +1 -1
- package/runtime/index.js +1 -1
- package/rhemyn/README.md +0 -40
- package/rhemyn/compile.js +0 -331
- package/rhemyn/parse.js +0 -323
- package/rhemyn/test/parse.js +0 -59
package/compiler/codegen.js
CHANGED
@@ -4,7 +4,6 @@ import { operatorOpcode } from './expression.js';
|
|
4
4
|
import { BuiltinFuncs, BuiltinVars, importedFuncs, NULL, UNDEFINED } from './builtins.js';
|
5
5
|
import { PrototypeFuncs } from './prototype.js';
|
6
6
|
import { TYPES, TYPE_FLAGS, TYPE_NAMES } from './types.js';
|
7
|
-
import * as Rhemyn from '../rhemyn/compile.js';
|
8
7
|
import parse from './parse.js';
|
9
8
|
import { log } from './log.js';
|
10
9
|
import { allocPage, allocStr } from './allocator.js';
|
@@ -1580,9 +1579,7 @@ const getNodeType = (scope, node) => {
|
|
1580
1579
|
|
1581
1580
|
if (node.type === 'Literal') {
|
1582
1581
|
if (node.regex) return TYPES.regexp;
|
1583
|
-
|
1584
1582
|
if (typeof node.value === 'string' && byteStringable(node.value)) return TYPES.bytestring;
|
1585
|
-
|
1586
1583
|
return TYPES[typeof node.value];
|
1587
1584
|
}
|
1588
1585
|
|
@@ -1786,9 +1783,25 @@ const getNodeType = (scope, node) => {
|
|
1786
1783
|
const generateLiteral = (scope, decl, global, name) => {
|
1787
1784
|
if (decl.value === null) return [ number(NULL) ];
|
1788
1785
|
|
1789
|
-
// hack: just return 1 for regex literals
|
1790
1786
|
if (decl.regex) {
|
1791
|
-
|
1787
|
+
// todo/opt: separate aot compiling regex engine for compile-time known regex (literals, known RegExp args)
|
1788
|
+
return generate(scope, {
|
1789
|
+
type: 'CallExpression',
|
1790
|
+
callee: {
|
1791
|
+
type: 'Identifier',
|
1792
|
+
name: 'RegExp'
|
1793
|
+
},
|
1794
|
+
arguments: [
|
1795
|
+
{
|
1796
|
+
type: 'Literal',
|
1797
|
+
value: decl.regex.pattern
|
1798
|
+
},
|
1799
|
+
{
|
1800
|
+
type: 'Literal',
|
1801
|
+
value: decl.regex.flags
|
1802
|
+
}
|
1803
|
+
]
|
1804
|
+
});
|
1792
1805
|
}
|
1793
1806
|
|
1794
1807
|
switch (typeof decl.value) {
|
@@ -2152,35 +2165,6 @@ const generateCall = (scope, decl, _global, _name, unusedValue = false) => {
|
|
2152
2165
|
const prop = (decl.callee.expression ?? decl.callee).property;
|
2153
2166
|
const object = (decl.callee.expression ?? decl.callee).object;
|
2154
2167
|
|
2155
|
-
// megahack for /regex/.func()
|
2156
|
-
if (object?.regex && ['test'].includes(prop.name)) {
|
2157
|
-
const regex = object.regex.pattern;
|
2158
|
-
const rhemynName = `regex_${prop.name}_${sanitize(regex)}`;
|
2159
|
-
|
2160
|
-
if (!funcIndex[rhemynName]) {
|
2161
|
-
const func = Rhemyn[prop.name](regex, currentFuncIndex++, rhemynName);
|
2162
|
-
func.internal = true;
|
2163
|
-
|
2164
|
-
funcIndex[func.name] = func.index;
|
2165
|
-
funcs.push(func);
|
2166
|
-
}
|
2167
|
-
|
2168
|
-
const arg = decl.arguments[0] ?? DEFAULT_VALUE();
|
2169
|
-
const idx = funcIndex[rhemynName];
|
2170
|
-
return [
|
2171
|
-
// make string arg
|
2172
|
-
...generate(scope, arg),
|
2173
|
-
Opcodes.i32_to_u,
|
2174
|
-
...getNodeType(scope, arg),
|
2175
|
-
|
2176
|
-
// call regex func
|
2177
|
-
[ Opcodes.call, idx ],
|
2178
|
-
Opcodes.i32_from_u,
|
2179
|
-
|
2180
|
-
...setLastType(scope, Rhemyn.types[prop.name])
|
2181
|
-
];
|
2182
|
-
}
|
2183
|
-
|
2184
2168
|
protoName = prop?.name;
|
2185
2169
|
target = object;
|
2186
2170
|
}
|
@@ -2213,42 +2197,6 @@ const generateCall = (scope, decl, _global, _name, unusedValue = false) => {
|
|
2213
2197
|
});
|
2214
2198
|
}
|
2215
2199
|
|
2216
|
-
if (['search'].includes(protoName)) {
|
2217
|
-
const regex = decl.arguments[0]?.regex?.pattern;
|
2218
|
-
if (!regex) return [
|
2219
|
-
// no/bad regex arg, return -1/0 for now
|
2220
|
-
...generate(scope, target),
|
2221
|
-
[ Opcodes.drop ],
|
2222
|
-
|
2223
|
-
number(Rhemyn.types[protoName] === TYPES.number ? -1 : 0),
|
2224
|
-
...setLastType(scope, Rhemyn.types[protoName])
|
2225
|
-
];
|
2226
|
-
|
2227
|
-
const rhemynName = `regex_${protoName}_${sanitize(regex)}`;
|
2228
|
-
|
2229
|
-
if (!funcIndex[rhemynName]) {
|
2230
|
-
const func = Rhemyn[protoName](regex, currentFuncIndex++, rhemynName);
|
2231
|
-
func.internal = true;
|
2232
|
-
|
2233
|
-
funcIndex[func.name] = func.index;
|
2234
|
-
funcs.push(func);
|
2235
|
-
}
|
2236
|
-
|
2237
|
-
const idx = funcIndex[rhemynName];
|
2238
|
-
return [
|
2239
|
-
// make string arg
|
2240
|
-
...generate(scope, target),
|
2241
|
-
Opcodes.i32_to_u,
|
2242
|
-
...getNodeType(scope, target),
|
2243
|
-
|
2244
|
-
// call regex func
|
2245
|
-
[ Opcodes.call, idx ],
|
2246
|
-
Opcodes.i32_from,
|
2247
|
-
|
2248
|
-
...setLastType(scope, Rhemyn.types[protoName])
|
2249
|
-
];
|
2250
|
-
}
|
2251
|
-
|
2252
2200
|
const protoBC = {};
|
2253
2201
|
const builtinProtoCands = Object.keys(builtinFuncs).filter(x => x.startsWith('__') && x.endsWith('_prototype_' + protoName));
|
2254
2202
|
|
@@ -3583,8 +3531,10 @@ const memberTmpNames = scope => {
|
|
3583
3531
|
};
|
3584
3532
|
};
|
3585
3533
|
|
3534
|
+
// todo: generate this array procedurally
|
3535
|
+
const builtinPrototypeGets = ['size', 'description', 'byteLength', 'byteOffset', 'buffer', 'detached', 'resizable', 'growable', 'maxByteLength', 'name', 'message', 'constructor', 'source', 'flags', 'global', 'ignoreCase', 'multiline', 'dotAll', 'unicode', 'sticky', 'hasIndices', 'unicodeSets'];
|
3536
|
+
|
3586
3537
|
const ctHash = prop => {
|
3587
|
-
const _ = prop;
|
3588
3538
|
if (!Prefs.ctHash || !prop ||
|
3589
3539
|
prop.computed || prop.optional ||
|
3590
3540
|
prop.property.type === 'PrivateIdentifier'
|
@@ -3601,13 +3551,10 @@ const ctHash = prop => {
|
|
3601
3551
|
const read = () => (prop.charCodeAt(i + 3) << 24 | prop.charCodeAt(i + 2) << 16 | prop.charCodeAt(i + 1) << 8 | prop.charCodeAt(i));
|
3602
3552
|
|
3603
3553
|
// hash in chunks of i32 (4 bytes)
|
3604
|
-
for (; i <= len
|
3554
|
+
for (; i <= len; i += 4) {
|
3605
3555
|
hash = Math.imul(rotl(hash + Math.imul(read(), 3266489917), 17), 668265263);
|
3606
3556
|
}
|
3607
3557
|
|
3608
|
-
// hash final bytes up to 4 via shift depending on bytes remaining
|
3609
|
-
hash = Math.imul(rotl(hash + Math.imul(read(), 3266489917), 17), 668265263);
|
3610
|
-
|
3611
3558
|
// final avalanche
|
3612
3559
|
hash = Math.imul(hash ^ (hash >>> 15), 2246822519);
|
3613
3560
|
hash = Math.imul(hash ^ (hash >>> 13), 3266489917);
|
@@ -3622,7 +3569,9 @@ const coctcOffset = prop => {
|
|
3622
3569
|
) return 0;
|
3623
3570
|
|
3624
3571
|
prop = prop.property.name;
|
3625
|
-
if (!prop ||
|
3572
|
+
if (!prop || builtinPrototypeGets.includes(prop) ||
|
3573
|
+
prop === 'prototype' || prop === 'length' || prop === '__proto__'
|
3574
|
+
) return 0;
|
3626
3575
|
|
3627
3576
|
let offset = coctc.get(prop);
|
3628
3577
|
if (offset == null) {
|
@@ -5857,8 +5806,7 @@ const generateMember = (scope, decl, _global, _name) => {
|
|
5857
5806
|
const type = getNodeType(scope, object);
|
5858
5807
|
const known = knownType(scope, type);
|
5859
5808
|
|
5860
|
-
|
5861
|
-
if (['size', 'description', 'byteLength', 'byteOffset', 'buffer', 'detached', 'resizable', 'growable', 'maxByteLength', 'name', 'message', 'constructor'].includes(decl.property.name)) {
|
5809
|
+
if (builtinPrototypeGets.includes(decl.property.name)) {
|
5862
5810
|
// todo: support optional
|
5863
5811
|
const bc = {};
|
5864
5812
|
const cands = Object.keys(builtinFuncs).filter(x => x.startsWith('__') && x.endsWith('_prototype_' + decl.property.name + '$get'));
|
package/compiler/prefs.js
CHANGED
@@ -2,9 +2,8 @@ const onByDefault = [ 'treeshakeWasmImports', 'alwaysMemory', 'indirectCalls', '
|
|
2
2
|
|
3
3
|
const nameToKey = x => x.replace(/[a-z]\-[a-z]/g, y => `${y[0]}${y[2].toUpperCase()}`);
|
4
4
|
|
5
|
-
let prefs = {};
|
6
5
|
const getPrefs = () => {
|
7
|
-
prefs = {};
|
6
|
+
const prefs = globalThis.Prefs = {};
|
8
7
|
for (const x of onByDefault) prefs[x] = true;
|
9
8
|
|
10
9
|
for (const x of process.argv) {
|
@@ -18,12 +17,8 @@ const getPrefs = () => {
|
|
18
17
|
prefs[nameToKey(name)] = value ?? true;
|
19
18
|
}
|
20
19
|
}
|
21
|
-
|
22
|
-
globalThis.Prefs = prefs;
|
23
20
|
};
|
24
21
|
getPrefs();
|
25
22
|
|
26
|
-
// export default prefs;
|
27
|
-
|
28
23
|
export const uncache = () => getPrefs();
|
29
24
|
globalThis.argvChanged = uncache;
|
package/compiler/wrap.js
CHANGED
@@ -348,6 +348,14 @@ ${flags & 0b0001 ? ` get func idx: ${get}
|
|
348
348
|
return negative ? -result : result;
|
349
349
|
}
|
350
350
|
|
351
|
+
case TYPES.regexp: {
|
352
|
+
const [ pattern, flags ] = read(Uint32Array, memory, value, 2);
|
353
|
+
return new RegExp(
|
354
|
+
porfToJSValue({ memory, funcs, pages }, pattern, TYPES.bytestring),
|
355
|
+
porfToJSValue({ memory, funcs, pages }, flags, TYPES.bytestring)
|
356
|
+
);
|
357
|
+
}
|
358
|
+
|
351
359
|
default: return value;
|
352
360
|
}
|
353
361
|
};
|
package/package.json
CHANGED
package/runtime/index.js
CHANGED
package/rhemyn/README.md
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
# Rhemyn
|
2
|
-
A basic experimental WIP regex engine/AOT Wasm compiler in JS. Regex engine for Porffor! Uses own regex parser, no dependencies (excluding porffor internals). <br>
|
3
|
-
Age: ~1 day (of work)
|
4
|
-
|
5
|
-
Made for use with Porffor but could possibly be adapted, implementation/library notes:
|
6
|
-
- Exposes functions for each regex "operation" (eg test, match)
|
7
|
-
- Given a regex pattern string (eg `a+`), it returns a "function" object
|
8
|
-
- Wasm function returned expects an i32 pointer to a UTF-16 string (can add UTF-8 option later if someone else actually wants to use this)
|
9
|
-
|
10
|
-
## syntax
|
11
|
-
🟢 supported 🟡 partial 🟠 parsed only 🔴 unsupported
|
12
|
-
|
13
|
-
- 🟢 literal characters (eg `a`)
|
14
|
-
- 🟢 escaping (eg `\.\n\cJ\x0a\u000a`)
|
15
|
-
- 🟢 character itself (eg `\.`)
|
16
|
-
- 🟢 escape sequences (eg `\n`)
|
17
|
-
- 🟢 control character (eg `\cJ`)
|
18
|
-
- 🟢 unicode code points (eg `\x00`, `\u0000`)
|
19
|
-
- 🟢 sets (eg `[ab]`)
|
20
|
-
- 🟢 ranges (eg `[a-z]`)
|
21
|
-
- 🟢 negated sets (eg `[^ab]`)
|
22
|
-
- 🟢 metacharacters
|
23
|
-
- 🟢 dot (eg `a.b`)
|
24
|
-
- 🟢 digit, not digit (eg `\d\D`)
|
25
|
-
- 🟢 word, not word (eg `\w\W`)
|
26
|
-
- 🟢 whitespace, not whitespace (eg `\s\S`)
|
27
|
-
- 🟡 quantifiers
|
28
|
-
- 🟡 star (eg `a*`)
|
29
|
-
- 🟡 plus (eg `a+`)
|
30
|
-
- 🟡 optional (eg `a?`)
|
31
|
-
- 🟠 lazy modifier (eg `a*?`)
|
32
|
-
- 🔴 n repetitions (eg `a{4}`)
|
33
|
-
- 🔴 n-m repetitions (eg `a{2,4}`)
|
34
|
-
- 🟠 groups
|
35
|
-
- 🟠 capturing groups (`(a)`)
|
36
|
-
- 🔴 non-capturing groups (`(?:a)`)
|
37
|
-
- 🔴 assertions
|
38
|
-
- 🔴 beginning (eg `^a`)
|
39
|
-
- 🔴 end (eg `a$`)
|
40
|
-
- 🔴 word boundary assertion (eg `\b\B`)
|
package/rhemyn/compile.js
DELETED
@@ -1,331 +0,0 @@
|
|
1
|
-
import { Blocktype, Opcodes, Valtype, ValtypeSize } from '../compiler/wasmSpec.js';
|
2
|
-
import { number } from '../compiler/encoding.js';
|
3
|
-
import parse from './parse.js';
|
4
|
-
import { TYPES } from '../compiler/types.js';
|
5
|
-
import '../compiler/prefs.js';
|
6
|
-
|
7
|
-
// local indexes
|
8
|
-
const BasePointer = 0; // base string pointer
|
9
|
-
const Counter = 2; // what char we are running on
|
10
|
-
const Pointer = 3; // next char pointer
|
11
|
-
const Length = 4;
|
12
|
-
const Tmp = 5;
|
13
|
-
const QuantifierTmp = 6; // the temporary variable used for quanitifers
|
14
|
-
|
15
|
-
const doesSucceedZero = node => {
|
16
|
-
for (const n of node.body) {
|
17
|
-
if (n.type === 'Group') {
|
18
|
-
if (!doesSucceedZero(n)) return false;
|
19
|
-
}
|
20
|
-
|
21
|
-
if (!n.quantifier || n.quantifier[0] > 0) {
|
22
|
-
return false;
|
23
|
-
}
|
24
|
-
}
|
25
|
-
|
26
|
-
return true;
|
27
|
-
}
|
28
|
-
|
29
|
-
const generate = (node, negated = false, get = true, stringSize = 2, func = 'test') => {
|
30
|
-
let out = [];
|
31
|
-
switch (node.type) {
|
32
|
-
case 'Expression':
|
33
|
-
let succeedsZero = doesSucceedZero(node);
|
34
|
-
|
35
|
-
out = [
|
36
|
-
// set length local
|
37
|
-
[ Opcodes.local_get, BasePointer ],
|
38
|
-
[ Opcodes.i32_load, Math.log2(ValtypeSize.i32) - 1, 0 ],
|
39
|
-
[ Opcodes.local_tee, Length ],
|
40
|
-
|
41
|
-
number(0, Valtype.i32),
|
42
|
-
[ Opcodes.i32_eq ],
|
43
|
-
[ Opcodes.if, Blocktype.void ],
|
44
|
-
number(succeedsZero ? 1 : 0, Valtype.i32),
|
45
|
-
[ Opcodes.return ],
|
46
|
-
[ Opcodes.end ],
|
47
|
-
|
48
|
-
// pointer = base + sizeof i32
|
49
|
-
[ Opcodes.local_get, BasePointer ],
|
50
|
-
number(ValtypeSize.i32, Valtype.i32),
|
51
|
-
[ Opcodes.i32_add ],
|
52
|
-
[ Opcodes.local_set, Pointer ],
|
53
|
-
|
54
|
-
[ Opcodes.loop, Blocktype.void ],
|
55
|
-
[ Opcodes.block, Blocktype.void ],
|
56
|
-
// generate checks
|
57
|
-
...node.body.flatMap(x => generate(x, negated, true, stringSize, func)),
|
58
|
-
|
59
|
-
// reached end without branching out, successful match
|
60
|
-
...({
|
61
|
-
test: [
|
62
|
-
number(1, Valtype.i32)
|
63
|
-
],
|
64
|
-
search: [
|
65
|
-
[ Opcodes.local_get, Counter ]
|
66
|
-
]
|
67
|
-
})[func],
|
68
|
-
[ Opcodes.return ],
|
69
|
-
[ Opcodes.end ],
|
70
|
-
|
71
|
-
// counter++, if length > counter, loop
|
72
|
-
[ Opcodes.local_get, Length ],
|
73
|
-
|
74
|
-
[ Opcodes.local_get, Counter ],
|
75
|
-
number(1, Valtype.i32),
|
76
|
-
[ Opcodes.i32_add ],
|
77
|
-
[ Opcodes.local_tee, Counter ],
|
78
|
-
|
79
|
-
[ Opcodes.i32_gt_s ],
|
80
|
-
|
81
|
-
[ Opcodes.br_if, 0 ],
|
82
|
-
[ Opcodes.end ],
|
83
|
-
|
84
|
-
// no match
|
85
|
-
number(({
|
86
|
-
test: 0,
|
87
|
-
search: -1
|
88
|
-
})[func], Valtype.i32)
|
89
|
-
];
|
90
|
-
|
91
|
-
if (Prefs.regexLog) {
|
92
|
-
const underline = x => `\u001b[4m\u001b[1m${x}\u001b[0m`;
|
93
|
-
console.log(`\n${underline('ast')}`);
|
94
|
-
console.log(node);
|
95
|
-
console.log(`\n${underline('wasm bytecode')}\n` + disassemble(out) + '\n');
|
96
|
-
}
|
97
|
-
|
98
|
-
break;
|
99
|
-
|
100
|
-
case 'Character':
|
101
|
-
out = generateChar(node, node.negated ^ negated, get, stringSize);
|
102
|
-
break;
|
103
|
-
|
104
|
-
case 'Set':
|
105
|
-
out = generateSet(node, node.negated, get, stringSize);
|
106
|
-
break;
|
107
|
-
|
108
|
-
case 'Group':
|
109
|
-
out = generateGroup(node, negated, get, stringSize);
|
110
|
-
break;
|
111
|
-
|
112
|
-
case 'Range':
|
113
|
-
out = generateRange(node, negated, get, stringSize);
|
114
|
-
break;
|
115
|
-
}
|
116
|
-
|
117
|
-
return out;
|
118
|
-
};
|
119
|
-
|
120
|
-
const getNextChar = (stringSize, peek = false) => [
|
121
|
-
// get char from pointer
|
122
|
-
[ Opcodes.local_get, Pointer ],
|
123
|
-
[ stringSize == 2 ? Opcodes.i32_load16_u : Opcodes.i32_load8_u, 0, 0 ],
|
124
|
-
|
125
|
-
...(peek ? [] : [
|
126
|
-
// pointer += string size
|
127
|
-
[ Opcodes.local_get, Pointer ],
|
128
|
-
number(stringSize, Valtype.i32),
|
129
|
-
[ Opcodes.i32_add ],
|
130
|
-
[ Opcodes.local_set, Pointer ]
|
131
|
-
])
|
132
|
-
];
|
133
|
-
|
134
|
-
const checkFailure = () => [
|
135
|
-
// surely we do not need to do this for every single mismatch, right?
|
136
|
-
/* [ Opcodes.if, Blocktype.void ],
|
137
|
-
number(0, Valtype.i32),
|
138
|
-
[ Opcodes.return ],
|
139
|
-
[ Opcodes.end ], */
|
140
|
-
|
141
|
-
[ Opcodes.br_if, 0 ]
|
142
|
-
];
|
143
|
-
|
144
|
-
const wrapQuantifier = (node, method, get, stringSize) => {
|
145
|
-
const [ min, max ] = node.quantifier;
|
146
|
-
return [
|
147
|
-
// initalize our temp value (number of matched characters)
|
148
|
-
number(0, Valtype.i32),
|
149
|
-
[Opcodes.local_set, QuantifierTmp],
|
150
|
-
|
151
|
-
// if len - counter == 0, if min == 0, succeed, else fail
|
152
|
-
[ Opcodes.local_get, Length ],
|
153
|
-
[ Opcodes.local_get, Counter ],
|
154
|
-
[ Opcodes.i32_sub ],
|
155
|
-
number(0, Valtype.i32),
|
156
|
-
[ Opcodes.i32_eq ],
|
157
|
-
...(min == 0 ? [
|
158
|
-
[ Opcodes.if, Blocktype.void ],
|
159
|
-
] : [
|
160
|
-
[ Opcodes.br_if, 0 ],
|
161
|
-
]),
|
162
|
-
|
163
|
-
// start loop
|
164
|
-
[Opcodes.loop, Blocktype.void],
|
165
|
-
[ Opcodes.block, Blocktype.void ],
|
166
|
-
// if counter + tmp == length, break
|
167
|
-
[ Opcodes.local_get, Counter ],
|
168
|
-
[ Opcodes.local_get, QuantifierTmp ],
|
169
|
-
[ Opcodes.i32_add ],
|
170
|
-
[ Opcodes.local_get, Length ],
|
171
|
-
[ Opcodes.i32_eq ],
|
172
|
-
[ Opcodes.br_if, 0 ],
|
173
|
-
|
174
|
-
// if doesn't match, break
|
175
|
-
...method,
|
176
|
-
[Opcodes.br_if, 0 ],
|
177
|
-
...(get ? [
|
178
|
-
// pointer += stringSize
|
179
|
-
[ Opcodes.local_get, Pointer ],
|
180
|
-
number(stringSize, Valtype.i32),
|
181
|
-
[ Opcodes.i32_add ],
|
182
|
-
[ Opcodes.local_set, Pointer ]
|
183
|
-
] : []),
|
184
|
-
|
185
|
-
// if maximum was reached, break
|
186
|
-
...(max ? [
|
187
|
-
[ Opcodes.local_get, QuantifierTmp ],
|
188
|
-
number(max, Valtype.i32),
|
189
|
-
[ Opcodes.i32_eq ],
|
190
|
-
[ Opcodes.br_if, 0 ]
|
191
|
-
] : []),
|
192
|
-
|
193
|
-
[ Opcodes.local_get, QuantifierTmp ],
|
194
|
-
number(1, Valtype.i32),
|
195
|
-
[ Opcodes.i32_add ],
|
196
|
-
[ Opcodes.local_set, QuantifierTmp ],
|
197
|
-
[ Opcodes.br, 1 ],
|
198
|
-
[ Opcodes.end ],
|
199
|
-
[ Opcodes.end ],
|
200
|
-
|
201
|
-
// if less than minimum, fail
|
202
|
-
[Opcodes.local_get, QuantifierTmp],
|
203
|
-
number(min, Valtype.i32),
|
204
|
-
[Opcodes.i32_lt_s],
|
205
|
-
...(get ? checkFailure(): []),
|
206
|
-
|
207
|
-
...(min == 0 ? [ [ Opcodes.end ] ] : []),
|
208
|
-
];
|
209
|
-
}
|
210
|
-
|
211
|
-
const generateChar = (node, negated, get, stringSize) => {
|
212
|
-
const hasQuantifier = !!node.quantifier;
|
213
|
-
const out = [
|
214
|
-
...(get ? getNextChar(stringSize, hasQuantifier) : []),
|
215
|
-
number(node.char.charCodeAt(0), Valtype.i32),
|
216
|
-
negated ? [ Opcodes.i32_eq ] : [ Opcodes.i32_ne ],
|
217
|
-
];
|
218
|
-
|
219
|
-
if (node.quantifier) {
|
220
|
-
return wrapQuantifier(node, out, get, stringSize);
|
221
|
-
}
|
222
|
-
|
223
|
-
return [
|
224
|
-
...out,
|
225
|
-
...(get ? checkFailure(): []),
|
226
|
-
];
|
227
|
-
};
|
228
|
-
|
229
|
-
const generateSet = (node, negated, get, stringSize) => {
|
230
|
-
// for a single char we do not need a tmp, it is like just
|
231
|
-
const singleChar = node.body.length === 1 && node.body[0].type === 'Character';
|
232
|
-
if (singleChar) return generateChar(node.body[0], negated, get, stringSize)
|
233
|
-
|
234
|
-
const hasQuantifier = !!node.quantifier;
|
235
|
-
|
236
|
-
const out = [
|
237
|
-
...(get ? getNextChar(stringSize, hasQuantifier) : []),
|
238
|
-
[ Opcodes.local_set, Tmp ],
|
239
|
-
];
|
240
|
-
|
241
|
-
for (const x of node.body) {
|
242
|
-
out.push(
|
243
|
-
[ Opcodes.local_get, Tmp ],
|
244
|
-
...generate(x, negated, false, stringSize)
|
245
|
-
);
|
246
|
-
}
|
247
|
-
|
248
|
-
if (node.body.length > 0) {
|
249
|
-
for (let i = 0; i < node.body.length - 1; i++) {
|
250
|
-
out.push(negated ? [ Opcodes.i32_or ] : [ Opcodes.i32_and ])
|
251
|
-
}
|
252
|
-
};
|
253
|
-
|
254
|
-
if (hasQuantifier) {
|
255
|
-
return wrapQuantifier(node, out, get, stringSize);
|
256
|
-
}
|
257
|
-
|
258
|
-
return [
|
259
|
-
...out,
|
260
|
-
...checkFailure(),
|
261
|
-
];
|
262
|
-
};
|
263
|
-
|
264
|
-
const generateRange = (node, negated, get, stringSize) => {
|
265
|
-
return [
|
266
|
-
...(get ? getNextChar(stringSize) : []),
|
267
|
-
...(get ? [ [ Opcodes.local_tee, Tmp ] ] : []),
|
268
|
-
|
269
|
-
number(node.from.charCodeAt(0), Valtype.i32),
|
270
|
-
// negated ? [ Opcodes.i32_lt_s ] : [ Opcodes.i32_ge_s ],
|
271
|
-
negated ? [ Opcodes.i32_ge_s ] : [ Opcodes.i32_lt_s ],
|
272
|
-
|
273
|
-
[ Opcodes.local_get, Tmp ],
|
274
|
-
number(node.to.charCodeAt(0), Valtype.i32),
|
275
|
-
// negated ? [ Opcodes.i32_gt_s ] : [ Opcodes.i32_le_s ],
|
276
|
-
negated ? [ Opcodes.i32_le_s ] : [ Opcodes.i32_gt_s ],
|
277
|
-
|
278
|
-
negated ? [ Opcodes.i32_and ] : [ Opcodes.i32_or ],
|
279
|
-
...(get ? checkFailure(): [])
|
280
|
-
];
|
281
|
-
};
|
282
|
-
|
283
|
-
const generateGroup = (node, negated, get) => {
|
284
|
-
// todo
|
285
|
-
return [];
|
286
|
-
};
|
287
|
-
|
288
|
-
const wrapFunc = (regex, func, name, index) => {
|
289
|
-
const parsed = parse(regex);
|
290
|
-
|
291
|
-
return outputFunc([
|
292
|
-
[ Opcodes.local_get, 1 ],
|
293
|
-
number(TYPES.string, Valtype.i32),
|
294
|
-
[ Opcodes.i32_eq ],
|
295
|
-
[ Opcodes.if, Valtype.i32 ],
|
296
|
-
// string
|
297
|
-
...generate(parsed, false, true, 2, func),
|
298
|
-
[ Opcodes.else ],
|
299
|
-
// bytestring
|
300
|
-
...generate(parsed, false, true, 1, func),
|
301
|
-
[ Opcodes.end ]
|
302
|
-
], name, index, types[func]);
|
303
|
-
};
|
304
|
-
|
305
|
-
export const test = (regex, index = 0, name = 'regex_test_' + regex) => wrapFunc(regex, 'test', name, index);
|
306
|
-
export const search = (regex, index = 0, name = 'regex_search_' + regex) => wrapFunc(regex, 'search', name, index);
|
307
|
-
|
308
|
-
export const types = {
|
309
|
-
test: TYPES.boolean,
|
310
|
-
search: TYPES.number
|
311
|
-
};
|
312
|
-
|
313
|
-
const outputFunc = (wasm, name, index, returnType) => ({
|
314
|
-
name,
|
315
|
-
index,
|
316
|
-
wasm,
|
317
|
-
returnType,
|
318
|
-
|
319
|
-
export: true,
|
320
|
-
params: [ Valtype.i32, Valtype.i32 ],
|
321
|
-
returns: [ Valtype.i32 ],
|
322
|
-
locals: {
|
323
|
-
basePointer: { idx: 0, type: Valtype.i32 },
|
324
|
-
inputType: { idx: 1, type: Valtype.i32 },
|
325
|
-
counter: { idx: 2, type: Valtype.i32 },
|
326
|
-
pointer: { idx: 3, type: Valtype.i32 },
|
327
|
-
length: { idx: 4, type: Valtype.i32 },
|
328
|
-
tmp: { idx: 5, type: Valtype.i32 },
|
329
|
-
quantifierTmp: { idx: 6, type: Valtype.i32 },
|
330
|
-
}
|
331
|
-
});
|