porffor 0.0.0-c743344 → 0.0.0-d650361
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -12
- package/c +0 -0
- package/c.exe +0 -0
- package/compiler/2c.js +350 -0
- package/compiler/builtins.js +6 -1
- package/compiler/codeGen.js +295 -75
- package/compiler/decompile.js +1 -1
- package/compiler/index.js +44 -2
- package/compiler/opt.js +1 -1
- package/compiler/parse.js +1 -0
- package/compiler/prototype.js +90 -29
- package/compiler/sections.js +25 -0
- package/compiler/wrap.js +12 -3
- package/cool.exe +0 -0
- package/g +0 -0
- package/g.exe +0 -0
- package/hi.c +37 -0
- package/out +0 -0
- package/out.exe +0 -0
- package/package.json +1 -1
- package/r.js +39 -0
- package/rhemyn/README.md +37 -0
- package/rhemyn/compile.js +214 -0
- package/rhemyn/parse.js +321 -0
- package/rhemyn/test/parse.js +59 -0
- package/runner/index.js +54 -40
- package/runner/transform.js +2 -1
- package/tmp.c +58 -0
package/compiler/prototype.js
CHANGED
@@ -15,13 +15,15 @@ const TYPES = {
|
|
15
15
|
bigint: 0xffffffffffff7,
|
16
16
|
|
17
17
|
// these are not "typeof" types but tracked internally
|
18
|
-
_array:
|
18
|
+
_array: 0xfffffffffff0f,
|
19
|
+
_regexp: 0xfffffffffff1f
|
19
20
|
};
|
20
21
|
|
21
22
|
// todo: turn these into built-ins once arrays and these become less hacky
|
22
23
|
|
23
24
|
export const PrototypeFuncs = function() {
|
24
25
|
const noUnlikelyChecks = process.argv.includes('-funsafe-no-unlikely-proto-checks');
|
26
|
+
const noCCAChecks = process.argv.includes('-funsafe-no-charcodeat-checks');
|
25
27
|
|
26
28
|
this[TYPES._array] = {
|
27
29
|
// lX = local accessor of X ({ get, set }), iX = local index of X, wX = wasm ops of X
|
@@ -35,7 +37,7 @@ export const PrototypeFuncs = function() {
|
|
35
37
|
[ Opcodes.i32_lt_s ],
|
36
38
|
[ Opcodes.if, Blocktype.void ],
|
37
39
|
[ Opcodes.local_get, iTmp ],
|
38
|
-
...length.
|
40
|
+
...length.getCachedI32(),
|
39
41
|
[ Opcodes.i32_add ],
|
40
42
|
[ Opcodes.local_set, iTmp ],
|
41
43
|
[ Opcodes.end ],
|
@@ -46,7 +48,7 @@ export const PrototypeFuncs = function() {
|
|
46
48
|
[ Opcodes.i32_lt_s ],
|
47
49
|
|
48
50
|
[ Opcodes.local_get, iTmp ],
|
49
|
-
...length.
|
51
|
+
...length.getCachedI32(),
|
50
52
|
[ Opcodes.i32_ge_s ],
|
51
53
|
[ Opcodes.i32_or ],
|
52
54
|
|
@@ -66,7 +68,7 @@ export const PrototypeFuncs = function() {
|
|
66
68
|
// todo: only for 1 argument
|
67
69
|
push: (pointer, length, wNewMember) => [
|
68
70
|
// get memory offset of array at last index (length)
|
69
|
-
...length.
|
71
|
+
...length.getCachedI32(),
|
70
72
|
...number(ValtypeSize[valtype], Valtype.i32),
|
71
73
|
[ Opcodes.i32_mul ],
|
72
74
|
|
@@ -78,17 +80,17 @@ export const PrototypeFuncs = function() {
|
|
78
80
|
|
79
81
|
// bump array length by 1 and return it
|
80
82
|
...length.setI32([
|
81
|
-
...length.
|
83
|
+
...length.getCachedI32(),
|
82
84
|
...number(1, Valtype.i32),
|
83
85
|
[ Opcodes.i32_add ]
|
84
86
|
]),
|
85
87
|
|
86
|
-
...length.get
|
88
|
+
...length.get()
|
87
89
|
],
|
88
90
|
|
89
91
|
pop: (pointer, length) => [
|
90
92
|
// if length == 0, noop
|
91
|
-
...length.
|
93
|
+
...length.getCachedI32(),
|
92
94
|
[ Opcodes.i32_eqz ],
|
93
95
|
[ Opcodes.if, Blocktype.void ],
|
94
96
|
...number(UNDEFINED),
|
@@ -99,13 +101,13 @@ export const PrototypeFuncs = function() {
|
|
99
101
|
|
100
102
|
// decrement length by 1
|
101
103
|
...length.setI32([
|
102
|
-
...length.
|
104
|
+
...length.getCachedI32(),
|
103
105
|
...number(1, Valtype.i32),
|
104
106
|
[ Opcodes.i32_sub ]
|
105
107
|
]),
|
106
108
|
|
107
109
|
// load last element
|
108
|
-
...length.
|
110
|
+
...length.getCachedI32(),
|
109
111
|
...number(ValtypeSize[valtype], Valtype.i32),
|
110
112
|
[ Opcodes.i32_mul ],
|
111
113
|
|
@@ -114,7 +116,7 @@ export const PrototypeFuncs = function() {
|
|
114
116
|
|
115
117
|
shift: (pointer, length) => [
|
116
118
|
// if length == 0, noop
|
117
|
-
...length.
|
119
|
+
...length.getCachedI32(),
|
118
120
|
Opcodes.i32_eqz,
|
119
121
|
[ Opcodes.if, Blocktype.void ],
|
120
122
|
...number(UNDEFINED),
|
@@ -125,7 +127,7 @@ export const PrototypeFuncs = function() {
|
|
125
127
|
|
126
128
|
// decrement length by 1
|
127
129
|
...length.setI32([
|
128
|
-
...length.
|
130
|
+
...length.getCachedI32(),
|
129
131
|
...number(1, Valtype.i32),
|
130
132
|
[ Opcodes.i32_sub ]
|
131
133
|
]),
|
@@ -139,11 +141,66 @@ export const PrototypeFuncs = function() {
|
|
139
141
|
...number(pointer + ValtypeSize.i32 + ValtypeSize[valtype], Valtype.i32), // src = base array index + length size + an index
|
140
142
|
...number(pageSize - ValtypeSize.i32 - ValtypeSize[valtype], Valtype.i32), // size = PageSize - length size - an index
|
141
143
|
[ ...Opcodes.memory_copy, 0x00, 0x00 ]
|
144
|
+
],
|
145
|
+
|
146
|
+
fill: (pointer, length, wElement, iTmp) => [
|
147
|
+
...wElement,
|
148
|
+
[ Opcodes.local_set, iTmp ],
|
149
|
+
|
150
|
+
// use cached length i32 as pointer
|
151
|
+
...length.getCachedI32(),
|
152
|
+
|
153
|
+
// length - 1 for indexes
|
154
|
+
...number(1, Valtype.i32),
|
155
|
+
[ Opcodes.i32_sub ],
|
156
|
+
|
157
|
+
// * sizeof value
|
158
|
+
...number(ValtypeSize[valtype], Valtype.i32),
|
159
|
+
[ Opcodes.i32_mul ],
|
160
|
+
|
161
|
+
...length.setCachedI32(),
|
162
|
+
|
163
|
+
...(noUnlikelyChecks ? [] : [
|
164
|
+
...length.getCachedI32(),
|
165
|
+
...number(0, Valtype.i32),
|
166
|
+
[ Opcodes.i32_lt_s ],
|
167
|
+
[ Opcodes.if, Blocktype.void ],
|
168
|
+
...number(pointer),
|
169
|
+
[ Opcodes.br, 1 ],
|
170
|
+
[ Opcodes.end ]
|
171
|
+
]),
|
172
|
+
|
173
|
+
[ Opcodes.loop, Blocktype.void ],
|
174
|
+
|
175
|
+
// set element using pointer
|
176
|
+
...length.getCachedI32(),
|
177
|
+
[ Opcodes.local_get, iTmp ],
|
178
|
+
[ Opcodes.store, Math.log2(ValtypeSize[valtype]) - 1, ...unsignedLEB128(pointer + ValtypeSize.i32) ],
|
179
|
+
|
180
|
+
// pointer - sizeof value
|
181
|
+
...length.getCachedI32(),
|
182
|
+
...number(ValtypeSize[valtype], Valtype.i32),
|
183
|
+
[ Opcodes.i32_sub ],
|
184
|
+
|
185
|
+
...length.setCachedI32(),
|
186
|
+
|
187
|
+
// if pointer >= 0, loop
|
188
|
+
...length.getCachedI32(),
|
189
|
+
...number(0, Valtype.i32),
|
190
|
+
[ Opcodes.i32_ge_s ],
|
191
|
+
[ Opcodes.br_if, 0 ],
|
192
|
+
|
193
|
+
[ Opcodes.end ],
|
194
|
+
|
195
|
+
// return this array
|
196
|
+
...number(pointer)
|
142
197
|
]
|
143
198
|
};
|
144
199
|
|
145
200
|
this[TYPES._array].at.local = Valtype.i32;
|
146
201
|
this[TYPES._array].push.noArgRetLength = true;
|
202
|
+
this[TYPES._array].fill.local = valtypeBinary;
|
203
|
+
this[TYPES._array].fill.returnType = TYPES._array;
|
147
204
|
|
148
205
|
this[TYPES.string] = {
|
149
206
|
at: (pointer, length, wIndex, iTmp, arrayShell) => {
|
@@ -165,7 +222,7 @@ export const PrototypeFuncs = function() {
|
|
165
222
|
[ Opcodes.i32_lt_s ],
|
166
223
|
[ Opcodes.if, Blocktype.void ],
|
167
224
|
[ Opcodes.local_get, iTmp ],
|
168
|
-
...length.
|
225
|
+
...length.getCachedI32(),
|
169
226
|
[ Opcodes.i32_add ],
|
170
227
|
[ Opcodes.local_set, iTmp ],
|
171
228
|
[ Opcodes.end ],
|
@@ -176,7 +233,7 @@ export const PrototypeFuncs = function() {
|
|
176
233
|
[ Opcodes.i32_lt_s ],
|
177
234
|
|
178
235
|
[ Opcodes.local_get, iTmp ],
|
179
|
-
...length.
|
236
|
+
...length.getCachedI32(),
|
180
237
|
[ Opcodes.i32_ge_s ],
|
181
238
|
[ Opcodes.i32_or ],
|
182
239
|
|
@@ -232,27 +289,31 @@ export const PrototypeFuncs = function() {
|
|
232
289
|
return [
|
233
290
|
...wIndex,
|
234
291
|
Opcodes.i32_to,
|
235
|
-
[ Opcodes.local_set, iTmp ],
|
236
292
|
|
237
|
-
|
238
|
-
|
293
|
+
...(noCCAChecks ? [] : [
|
294
|
+
[ Opcodes.local_set, iTmp ],
|
295
|
+
|
296
|
+
// index < 0
|
297
|
+
...(noUnlikelyChecks ? [] : [
|
298
|
+
[ Opcodes.local_get, iTmp ],
|
299
|
+
...number(0, Valtype.i32),
|
300
|
+
[ Opcodes.i32_lt_s ],
|
301
|
+
]),
|
302
|
+
|
303
|
+
// index >= length
|
239
304
|
[ Opcodes.local_get, iTmp ],
|
240
|
-
...
|
241
|
-
[ Opcodes.
|
242
|
-
]),
|
305
|
+
...length.getCachedI32(),
|
306
|
+
[ Opcodes.i32_ge_s ],
|
243
307
|
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
308
|
+
...(noUnlikelyChecks ? [] : [ [ Opcodes.i32_or ] ]),
|
309
|
+
[ Opcodes.if, Blocktype.void ],
|
310
|
+
...number(NaN),
|
311
|
+
[ Opcodes.br, 1 ],
|
312
|
+
[ Opcodes.end ],
|
248
313
|
|
249
|
-
|
250
|
-
|
251
|
-
...number(NaN),
|
252
|
-
[ Opcodes.br, 1 ],
|
253
|
-
[ Opcodes.end ],
|
314
|
+
[ Opcodes.local_get, iTmp ],
|
315
|
+
]),
|
254
316
|
|
255
|
-
[ Opcodes.local_get, iTmp ],
|
256
317
|
...number(ValtypeSize.i16, Valtype.i32),
|
257
318
|
[ Opcodes.i32_mul ],
|
258
319
|
|
package/compiler/sections.js
CHANGED
@@ -8,11 +8,26 @@ const createSection = (type, data) => [
|
|
8
8
|
...encodeVector(data)
|
9
9
|
];
|
10
10
|
|
11
|
+
const customSection = (name, data) => [
|
12
|
+
Section.custom,
|
13
|
+
...encodeVector([...encodeString(name), ...data])
|
14
|
+
];
|
15
|
+
|
16
|
+
const chHint = (topTier, baselineTier, strategy) => {
|
17
|
+
// 1 byte of 4 2 bit components: spare, top tier, baseline tier, compilation strategy
|
18
|
+
// tiers: 0x00 = default, 0x01 = baseline (liftoff), 0x02 = optimized (turbofan)
|
19
|
+
// strategy: 0x00 = default, 0x01 = lazy, 0x02 = eager, 0x03 = lazy baseline, eager top tier
|
20
|
+
return (strategy | (baselineTier << 2) | (topTier << 4));
|
21
|
+
};
|
22
|
+
|
11
23
|
export default (funcs, globals, tags, pages, flags) => {
|
12
24
|
const types = [], typeCache = {};
|
13
25
|
|
14
26
|
const optLevel = parseInt(process.argv.find(x => x.startsWith('-O'))?.[2] ?? 1);
|
15
27
|
|
28
|
+
const compileHints = process.argv.includes('-compile-hints');
|
29
|
+
if (compileHints) log('sections', 'warning: compile hints is V8 only w/ experimental arg! (you used -compile-hints)');
|
30
|
+
|
16
31
|
const getType = (params, returns) => {
|
17
32
|
const hash = `${params.join(',')}_${returns.join(',')}`;
|
18
33
|
if (optLog) log('sections', `getType(${JSON.stringify(params)}, ${JSON.stringify(returns)}) -> ${hash} | cache: ${typeCache[hash]}`);
|
@@ -61,6 +76,7 @@ export default (funcs, globals, tags, pages, flags) => {
|
|
61
76
|
}
|
62
77
|
}
|
63
78
|
}
|
79
|
+
globalThis.importFuncs = importFuncs;
|
64
80
|
|
65
81
|
if (optLog) log('sections', `treeshake: using ${importFuncs.length}/${importedFuncs.length} imports`);
|
66
82
|
|
@@ -74,6 +90,14 @@ export default (funcs, globals, tags, pages, flags) => {
|
|
74
90
|
encodeVector(funcs.map(x => getType(x.params, x.returns))) // type indexes
|
75
91
|
);
|
76
92
|
|
93
|
+
// compilation hints section - unspec v8 only
|
94
|
+
// https://github.com/WebAssembly/design/issues/1473#issuecomment-1431274746
|
95
|
+
const chSection = !compileHints ? [] : customSection(
|
96
|
+
'compilationHints',
|
97
|
+
// for now just do everything as optimise eager
|
98
|
+
encodeVector(funcs.map(_ => chHint(0x02, 0x02, 0x02)))
|
99
|
+
);
|
100
|
+
|
77
101
|
const globalSection = Object.keys(globals).length === 0 ? [] : createSection(
|
78
102
|
Section.global,
|
79
103
|
encodeVector(Object.keys(globals).map(x => [ globals[x].type, 0x01, ...number(globals[x].init ?? 0, globals[x].type).flat(), Opcodes.end ]))
|
@@ -146,6 +170,7 @@ export default (funcs, globals, tags, pages, flags) => {
|
|
146
170
|
...typeSection,
|
147
171
|
...importSection,
|
148
172
|
...funcSection,
|
173
|
+
...chSection,
|
149
174
|
...memorySection,
|
150
175
|
...tagSection,
|
151
176
|
...globalSection,
|
package/compiler/wrap.js
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
import compile from './index.js';
|
2
2
|
import decompile from './decompile.js';
|
3
|
-
import fs from 'node:fs';
|
3
|
+
// import fs from 'node:fs';
|
4
4
|
|
5
5
|
const bold = x => `\u001b[1m${x}\u001b[0m`;
|
6
6
|
|
7
7
|
const typeBase = 0xffffffffffff0;
|
8
|
+
const internalTypeBase = 0xfffffffffff0f;
|
8
9
|
const TYPES = {
|
9
10
|
[typeBase]: 'number',
|
10
11
|
[typeBase + 1]: 'boolean',
|
@@ -16,7 +17,8 @@ const TYPES = {
|
|
16
17
|
[typeBase + 7]: 'bigint',
|
17
18
|
|
18
19
|
// internal
|
19
|
-
[
|
20
|
+
[internalTypeBase]: '_array',
|
21
|
+
[internalTypeBase + 1]: '_regexp'
|
20
22
|
};
|
21
23
|
|
22
24
|
export default async (source, flags = [ 'module' ], customImports = {}, print = str => process.stdout.write(str)) => {
|
@@ -27,16 +29,23 @@ export default async (source, flags = [ 'module' ], customImports = {}, print =
|
|
27
29
|
|
28
30
|
if (source.includes('export function')) flags.push('module');
|
29
31
|
|
30
|
-
fs.writeFileSync('out.wasm', Buffer.from(wasm));
|
32
|
+
// fs.writeFileSync('out.wasm', Buffer.from(wasm));
|
31
33
|
|
32
34
|
times.push(performance.now() - t1);
|
33
35
|
if (flags.includes('info')) console.log(bold(`compiled in ${times[0].toFixed(2)}ms`));
|
34
36
|
|
37
|
+
const getString = pointer => {
|
38
|
+
const length = new Int32Array(memory.buffer, pointer, 1);
|
39
|
+
|
40
|
+
return Array.from(new Uint16Array(memory.buffer, pointer + 4, length)).map(x => String.fromCharCode(x)).join('');
|
41
|
+
};
|
42
|
+
|
35
43
|
const t2 = performance.now();
|
36
44
|
const { instance } = await WebAssembly.instantiate(wasm, {
|
37
45
|
'': {
|
38
46
|
p: valtype === 'i64' ? i => print(Number(i).toString()) : i => print(i.toString()),
|
39
47
|
c: valtype === 'i64' ? i => print(String.fromCharCode(Number(i))) : i => print(String.fromCharCode(i)),
|
48
|
+
s: valtype === 'i64' ? i => print(getString(Number(i))) : i => print(getString(i)),
|
40
49
|
a: c => { if (!Number(c)) throw new Error(`assert failed`); },
|
41
50
|
t: _ => performance.now(),
|
42
51
|
...customImports
|
package/cool.exe
ADDED
Binary file
|
package/g
ADDED
Binary file
|
package/g.exe
ADDED
Binary file
|
package/hi.c
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
|
3
|
+
double inline f64_f(double x, double y) {
|
4
|
+
return x - (int)(x / y) * y;
|
5
|
+
}
|
6
|
+
|
7
|
+
double isPrime(double number) {
|
8
|
+
double i;
|
9
|
+
|
10
|
+
if (number < 2e+0) {
|
11
|
+
return 0e+0;
|
12
|
+
}
|
13
|
+
i = 2e+0;
|
14
|
+
while (i < number) {
|
15
|
+
if (f64_f(number, i) == 0e+0) {
|
16
|
+
return 0e+0;
|
17
|
+
}
|
18
|
+
i = i + 1e+0;
|
19
|
+
}
|
20
|
+
return 1e+0;
|
21
|
+
}
|
22
|
+
|
23
|
+
int main() {
|
24
|
+
double sum;
|
25
|
+
double counter;
|
26
|
+
|
27
|
+
sum = 0e+0;
|
28
|
+
counter = 0e+0;
|
29
|
+
while (counter <= 1e+5) {
|
30
|
+
if (isPrime(counter) == 1e+0) {
|
31
|
+
sum = sum + counter;
|
32
|
+
}
|
33
|
+
counter = counter + 1e+0;
|
34
|
+
}
|
35
|
+
printf("%f\n", sum);
|
36
|
+
}
|
37
|
+
|
package/out
ADDED
Binary file
|
package/out.exe
ADDED
Binary file
|
package/package.json
CHANGED
package/r.js
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
compareArray.isSameValue = function(a, b) {
|
2
|
+
if (a === 0 && b === 0) return 1 / a === 1 / b;
|
3
|
+
if (a !== a && b !== b) return true;
|
4
|
+
|
5
|
+
return a === b;
|
6
|
+
};
|
7
|
+
|
8
|
+
function compareArray(a, b) {
|
9
|
+
// if either are nullish
|
10
|
+
if (a == null || b == null) return false;
|
11
|
+
|
12
|
+
// megahack: all arrays from now on will be >0 pointer
|
13
|
+
const _hack = '';
|
14
|
+
|
15
|
+
// hack: enforce type inference of being arrays
|
16
|
+
a ??= [];
|
17
|
+
b ??= [];
|
18
|
+
|
19
|
+
if (b.length !== a.length) {
|
20
|
+
return false;
|
21
|
+
}
|
22
|
+
|
23
|
+
for (var i = 0; i < a.length; i++) {
|
24
|
+
if (!compareArray.isSameValue(b[i], a[i])) {
|
25
|
+
return false;
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
return true;
|
30
|
+
}
|
31
|
+
|
32
|
+
console.log(compareArray(null, []));
|
33
|
+
console.log(compareArray(undefined, []));
|
34
|
+
|
35
|
+
console.log(compareArray([], []));
|
36
|
+
console.log(compareArray([ 1 ], []));
|
37
|
+
console.log(compareArray([ 1 ], [ 1 ]));
|
38
|
+
console.log(compareArray([ 1, 2 ], [ 1 ]));
|
39
|
+
console.log(compareArray([ 1, 2 ], [ 1, 2 ]));
|
package/rhemyn/README.md
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# rhemyn
|
2
|
+
a basic experimental wip regex engine/aot wasm compiler in js. regex engine for porffor. uses own regex parser, no dependencies (excluding porffor internals). <br>
|
3
|
+
age: ~1 day
|
4
|
+
|
5
|
+
made for use with porffor but could possibly be adapted, implementation/library notes:
|
6
|
+
- exposes functions for each regex "operation" (eg test, match)
|
7
|
+
- given a regex pattern string (eg `a+`), it returns a "function" object
|
8
|
+
- wasm function returned expects an i32 pointer to a utf-16 string (can add utf-8 option later if someone else actually wants to use this)
|
9
|
+
|
10
|
+
## syntax
|
11
|
+
🟢 supported 🟡 partial 🟠 parsed only 🔴 unsupported
|
12
|
+
|
13
|
+
- 🟢 literal characters (eg `a`)
|
14
|
+
- 🟢 escaping (eg `\.\n\cJ\x0a\u000a`)
|
15
|
+
- 🟢 character itself (eg `\.`)
|
16
|
+
- 🟢 escape sequences (eg `\n`)
|
17
|
+
- 🟢 control character (eg `\cJ`)
|
18
|
+
- 🟢 unicode code points (eg `\x00`, `\u0000`)
|
19
|
+
- 🟢 sets (eg `[ab]`)
|
20
|
+
- 🟢 ranges (eg `[a-z]`)
|
21
|
+
- 🟢 negated sets (eg `[^ab]`)
|
22
|
+
- 🟢 metacharacters
|
23
|
+
- 🟢 dot (eg `a.b`)
|
24
|
+
- 🟢 digit, not digit (eg `\d\D`)
|
25
|
+
- 🟢 word, not word (eg `\w\W`)
|
26
|
+
- 🟢 whitespace, not whitespace (eg `\s\S`)
|
27
|
+
- 🟠 quantifiers
|
28
|
+
- 🟠 star (eg `a*`)
|
29
|
+
- 🟠 plus (eg `a+`)
|
30
|
+
- 🟠 optional (eg `a?`)
|
31
|
+
- 🟠 lazy modifier (eg `a*?`)
|
32
|
+
- 🔴 n repetitions (eg `a{4}`)
|
33
|
+
- 🔴 n-m repetitions (eg `a{2,4}`)
|
34
|
+
- 🔴 assertions
|
35
|
+
- 🔴 beginning (eg `^a`)
|
36
|
+
- 🔴 end (eg `a$`)
|
37
|
+
- 🔴 word boundary assertion (eg `\b\B`)
|
@@ -0,0 +1,214 @@
|
|
1
|
+
import { Blocktype, Opcodes, Valtype, PageSize, ValtypeSize } from '../compiler/wasmSpec.js';
|
2
|
+
import { number } from '../compiler/embedding.js';
|
3
|
+
import { signedLEB128, unsignedLEB128 } from '../compiler/encoding.js';
|
4
|
+
import parse from './parse.js';
|
5
|
+
|
6
|
+
// local indexes
|
7
|
+
const BasePointer = 0; // base string pointer
|
8
|
+
const IterPointer = 1; // this iteration base pointer
|
9
|
+
const EndPointer = 2; // pointer for the end
|
10
|
+
const Counter = 3; // what char we are running on
|
11
|
+
const Pointer = 4; // next char BYTE pointer
|
12
|
+
const Length = 5;
|
13
|
+
const Tmp = 6;
|
14
|
+
|
15
|
+
let exprLastGet = false;
|
16
|
+
const generate = (node, negated = false, get = true, func = 'test') => {
|
17
|
+
let out = [];
|
18
|
+
switch (node.type) {
|
19
|
+
case 'Expression':
|
20
|
+
exprLastGet = false;
|
21
|
+
out = [
|
22
|
+
// set length local
|
23
|
+
[ Opcodes.local_get, BasePointer ],
|
24
|
+
[ Opcodes.i32_load, Math.log2(ValtypeSize.i32) - 1, 0 ],
|
25
|
+
[ Opcodes.local_set, Length ],
|
26
|
+
|
27
|
+
// set iter pointer local as base + sizeof i32 initially
|
28
|
+
[ Opcodes.local_get, BasePointer ],
|
29
|
+
...number(ValtypeSize.i32, Valtype.i32),
|
30
|
+
[ Opcodes.i32_add ],
|
31
|
+
[ Opcodes.local_set, IterPointer ],
|
32
|
+
|
33
|
+
[ Opcodes.loop, Blocktype.void ],
|
34
|
+
|
35
|
+
// reset pointer as iter pointer
|
36
|
+
[ Opcodes.local_get, IterPointer ],
|
37
|
+
[ Opcodes.local_set, Pointer ],
|
38
|
+
|
39
|
+
[ Opcodes.block, Blocktype.void ],
|
40
|
+
|
41
|
+
// generate checks
|
42
|
+
...node.body.flatMap((x, i) => {
|
43
|
+
exprLastGet = x.type !== 'Group' && i === (node.body.length - 1);
|
44
|
+
return generate(x, negated);
|
45
|
+
}),
|
46
|
+
|
47
|
+
// reached end without branching out, successful match
|
48
|
+
...({
|
49
|
+
test: number(1, Valtype.i32),
|
50
|
+
search: [
|
51
|
+
[ Opcodes.local_get, Counter ]
|
52
|
+
]
|
53
|
+
})[func],
|
54
|
+
[ Opcodes.return ],
|
55
|
+
|
56
|
+
[ Opcodes.end ],
|
57
|
+
|
58
|
+
// increment iter pointer by sizeof i16
|
59
|
+
[ Opcodes.local_get, IterPointer ],
|
60
|
+
...number(ValtypeSize.i16, Valtype.i32),
|
61
|
+
[ Opcodes.i32_add ],
|
62
|
+
[ Opcodes.local_set, IterPointer ],
|
63
|
+
|
64
|
+
// increment counter by 1, check if eq length, if not loop
|
65
|
+
[ Opcodes.local_get, Counter ],
|
66
|
+
...number(1, Valtype.i32),
|
67
|
+
[ Opcodes.i32_add ],
|
68
|
+
[ Opcodes.local_tee, Counter ],
|
69
|
+
|
70
|
+
[ Opcodes.local_get, Length ],
|
71
|
+
[ Opcodes.i32_ne ],
|
72
|
+
|
73
|
+
[ Opcodes.br_if, 0 ],
|
74
|
+
[ Opcodes.end ],
|
75
|
+
|
76
|
+
// no match, return 0
|
77
|
+
...number(({
|
78
|
+
test: 0,
|
79
|
+
search: -1
|
80
|
+
})[func], Valtype.i32)
|
81
|
+
];
|
82
|
+
|
83
|
+
if (globalThis.regexLog) {
|
84
|
+
const underline = x => `\u001b[4m\u001b[1m${x}\u001b[0m`;
|
85
|
+
console.log(`\n${underline('ast')}`);
|
86
|
+
console.log(node);
|
87
|
+
console.log(`\n${underline('wasm bytecode')}\n` + decompile(out) + '\n');
|
88
|
+
}
|
89
|
+
|
90
|
+
break;
|
91
|
+
|
92
|
+
case 'Character':
|
93
|
+
out = generateChar(node, node.negated ^ negated, get);
|
94
|
+
break;
|
95
|
+
|
96
|
+
case 'Set':
|
97
|
+
out = generateSet(node, node.negated, get);
|
98
|
+
break;
|
99
|
+
|
100
|
+
case 'Group':
|
101
|
+
out = generateGroup(node, negated, get);
|
102
|
+
break;
|
103
|
+
|
104
|
+
case 'Range':
|
105
|
+
out = generateRange(node, negated, get);
|
106
|
+
break;
|
107
|
+
}
|
108
|
+
|
109
|
+
return out;
|
110
|
+
};
|
111
|
+
|
112
|
+
const getNextChar = () => [
|
113
|
+
// get char from pointer
|
114
|
+
[ Opcodes.local_get, Pointer ],
|
115
|
+
[ Opcodes.i32_load16_u, Math.log2(ValtypeSize.i16) - 1, ...unsignedLEB128(0) ],
|
116
|
+
|
117
|
+
...(exprLastGet ? [] : [
|
118
|
+
// pointer += sizeof i16
|
119
|
+
[ Opcodes.local_get, Pointer ],
|
120
|
+
...number(ValtypeSize.i16, Valtype.i32),
|
121
|
+
[ Opcodes.i32_add ],
|
122
|
+
[ Opcodes.local_set, Pointer ]
|
123
|
+
])
|
124
|
+
];
|
125
|
+
|
126
|
+
const checkFailure = () => [
|
127
|
+
// surely we do not need to do this for every single mismatch, right?
|
128
|
+
/* [ Opcodes.if, Blocktype.void ],
|
129
|
+
...number(0, Valtype.i32),
|
130
|
+
[ Opcodes.return ],
|
131
|
+
[ Opcodes.end ], */
|
132
|
+
|
133
|
+
[ Opcodes.br_if, 0 ]
|
134
|
+
];
|
135
|
+
|
136
|
+
const generateChar = (node, negated, get) => {
|
137
|
+
return [
|
138
|
+
...(get ? getNextChar() : []),
|
139
|
+
...number(node.char.charCodeAt(0), Valtype.i32),
|
140
|
+
negated ? [ Opcodes.i32_eq ] : [ Opcodes.i32_ne ],
|
141
|
+
...(get ? checkFailure(): [])
|
142
|
+
];
|
143
|
+
};
|
144
|
+
|
145
|
+
const generateSet = (node, negated, get) => {
|
146
|
+
// for a single char we do not need a tmp, it is like just
|
147
|
+
const singleChar = node.body.length === 1 && node.body[0].type === 'Character';
|
148
|
+
|
149
|
+
let out = [
|
150
|
+
...(get ? getNextChar() : []),
|
151
|
+
...(singleChar ? [] : [ [ Opcodes.local_set, Tmp ] ]),
|
152
|
+
];
|
153
|
+
|
154
|
+
for (const x of node.body) {
|
155
|
+
out = [
|
156
|
+
...out,
|
157
|
+
...(singleChar ? [] : [ [ Opcodes.local_get, Tmp ] ]),
|
158
|
+
...generate(x, negated, false)
|
159
|
+
];
|
160
|
+
}
|
161
|
+
|
162
|
+
out = out.concat(new Array(node.body.length - 1).fill(negated ? [ Opcodes.i32_or ] : [ Opcodes.i32_and ]));
|
163
|
+
|
164
|
+
return [
|
165
|
+
...out,
|
166
|
+
...checkFailure()
|
167
|
+
];
|
168
|
+
};
|
169
|
+
|
170
|
+
const generateRange = (node, negated, get) => {
|
171
|
+
return [
|
172
|
+
...(get ? getNextChar() : []),
|
173
|
+
...(get ? [ [ Opcodes.local_tee, Tmp ] ] : []),
|
174
|
+
|
175
|
+
...number(node.from.charCodeAt(0), Valtype.i32),
|
176
|
+
// negated ? [ Opcodes.i32_lt_s ] : [ Opcodes.i32_ge_s ],
|
177
|
+
negated ? [ Opcodes.i32_ge_s ] : [ Opcodes.i32_lt_s ],
|
178
|
+
|
179
|
+
[ Opcodes.local_get, Tmp ],
|
180
|
+
...number(node.to.charCodeAt(0), Valtype.i32),
|
181
|
+
// negated ? [ Opcodes.i32_gt_s ] : [ Opcodes.i32_le_s ],
|
182
|
+
negated ? [ Opcodes.i32_le_s ] : [ Opcodes.i32_gt_s ],
|
183
|
+
|
184
|
+
negated ? [ Opcodes.i32_and ] : [ Opcodes.i32_or ],
|
185
|
+
...(get ? checkFailure(): [])
|
186
|
+
];
|
187
|
+
};
|
188
|
+
|
189
|
+
const generateGroup = (node, negated, get) => {
|
190
|
+
|
191
|
+
};
|
192
|
+
|
193
|
+
export const test = (regex, index = 0, name = 'regex_test_' + regex) => outputFunc(generate(parse(regex), false, true, 'test'), name, index);
|
194
|
+
export const search = (regex, index = 0, name = 'regex_search_' + regex) => outputFunc(generate(parse(regex), false, true, 'search'), name, index);
|
195
|
+
|
196
|
+
const outputFunc = (wasm, name, index) => ({
|
197
|
+
name,
|
198
|
+
index,
|
199
|
+
wasm,
|
200
|
+
|
201
|
+
export: true,
|
202
|
+
params: [ Valtype.i32 ],
|
203
|
+
returns: [ Valtype.i32 ],
|
204
|
+
returnType: 0xffffffffffff1, // boolean - todo: do not hardcode this
|
205
|
+
locals: {
|
206
|
+
basePointer: { idx: 0, type: Valtype.i32 },
|
207
|
+
iterPointer: { idx: 1, type: Valtype.i32 },
|
208
|
+
endPointer: { idx: 2, type: Valtype.i32 },
|
209
|
+
counter: { idx: 3, type: Valtype.i32 },
|
210
|
+
pointer: { idx: 4, type: Valtype.i32 },
|
211
|
+
length: { idx: 5, type: Valtype.i32 },
|
212
|
+
tmp: { idx: 6, type: Valtype.i32 },
|
213
|
+
}
|
214
|
+
});
|