porffor 0.2.0-fde989a → 0.2.0-fdf0fc5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CONTRIBUTING.md +256 -0
  2. package/LICENSE +20 -20
  3. package/README.md +123 -85
  4. package/asur/README.md +2 -0
  5. package/asur/index.js +1262 -0
  6. package/byg/index.js +237 -0
  7. package/compiler/2c.js +1 -1
  8. package/compiler/{sections.js → assemble.js} +63 -15
  9. package/compiler/builtins/annexb_string.js +72 -0
  10. package/compiler/builtins/annexb_string.ts +18 -0
  11. package/compiler/builtins/array.ts +149 -0
  12. package/compiler/builtins/base64.ts +76 -0
  13. package/compiler/builtins/boolean.ts +20 -0
  14. package/compiler/builtins/crypto.ts +120 -0
  15. package/compiler/builtins/date.ts +2070 -0
  16. package/compiler/builtins/escape.ts +141 -0
  17. package/compiler/builtins/function.ts +7 -0
  18. package/compiler/builtins/int.ts +147 -0
  19. package/compiler/builtins/number.ts +534 -0
  20. package/compiler/builtins/object.ts +6 -0
  21. package/compiler/builtins/porffor.d.ts +59 -0
  22. package/compiler/builtins/set.ts +5 -0
  23. package/compiler/builtins/string.ts +1080 -0
  24. package/compiler/builtins.js +435 -279
  25. package/compiler/{codeGen.js → codegen.js} +1034 -404
  26. package/compiler/decompile.js +0 -1
  27. package/compiler/embedding.js +22 -22
  28. package/compiler/encoding.js +108 -10
  29. package/compiler/generated_builtins.js +1526 -0
  30. package/compiler/index.js +25 -34
  31. package/compiler/log.js +6 -3
  32. package/compiler/opt.js +50 -36
  33. package/compiler/parse.js +33 -23
  34. package/compiler/precompile.js +128 -0
  35. package/compiler/prefs.js +27 -0
  36. package/compiler/prototype.js +27 -42
  37. package/compiler/types.js +38 -0
  38. package/compiler/wasmSpec.js +28 -8
  39. package/compiler/wrap.js +51 -46
  40. package/package.json +9 -5
  41. package/porf +2 -0
  42. package/rhemyn/compile.js +46 -27
  43. package/rhemyn/parse.js +322 -320
  44. package/rhemyn/test/parse.js +58 -58
  45. package/runner/compare.js +34 -34
  46. package/runner/debug.js +122 -0
  47. package/runner/index.js +78 -11
  48. package/runner/profiler.js +102 -0
  49. package/runner/repl.js +42 -9
  50. package/runner/sizes.js +37 -37
  51. package/compiler/builtins/base64.js +0 -92
  52. package/filesize.cmd +0 -2
  53. package/runner/info.js +0 -89
  54. package/runner/profile.js +0 -46
  55. package/runner/results.json +0 -1
  56. package/runner/transform.js +0 -15
  57. package/tmp.c +0 -661
  58. package/util/enum.js +0 -20
@@ -2,33 +2,19 @@ import { Opcodes, Blocktype, Valtype, ValtypeSize, PageSize } from "./wasmSpec.j
2
2
  import { number } from "./embedding.js";
3
3
  import { unsignedLEB128 } from "./encoding.js";
4
4
  import { UNDEFINED } from "./builtins.js";
5
-
6
- // todo: do not duplicate this
7
- const TYPES = {
8
- number: 0x00,
9
- boolean: 0x01,
10
- string: 0x02,
11
- undefined: 0x03,
12
- object: 0x04,
13
- function: 0x05,
14
- symbol: 0x06,
15
- bigint: 0x07,
16
-
17
- // these are not "typeof" types but tracked internally
18
- _array: 0x10,
19
- _regexp: 0x11,
20
- _bytestring: 0x12
21
- };
5
+ import Prefs from './prefs.js';
6
+ import { TYPES } from './types.js';
22
7
 
23
8
  // todo: turn these into built-ins once arrays and these become less hacky
24
9
 
25
10
  export const PrototypeFuncs = function() {
26
- const noUnlikelyChecks = process.argv.includes('-funsafe-no-unlikely-proto-checks');
27
- let zeroChecks = process.argv.find(x => x.startsWith('-funsafe-zero-proto-checks='));
28
- if (zeroChecks) zeroChecks = zeroChecks.split('=')[1].split(',').reduce((acc, x) => { acc[x.toLowerCase()] = true; return acc; }, {});
11
+ const noUnlikelyChecks = Prefs.funsafeNoUnlikelyProtoChecks;
12
+
13
+ let zeroChecks;
14
+ if (Prefs.zeroChecks) zeroChecks = Prefs.zeroChecks.split('=')[1].split(',').reduce((acc, x) => { acc[x.toLowerCase()] = true; return acc; }, {});
29
15
  else zeroChecks = {};
30
16
 
31
- this[TYPES._array] = {
17
+ this[TYPES.array] = {
32
18
  // lX = local accessor of X ({ get, set }), iX = local index of X, wX = wasm ops of X
33
19
  at: (pointer, length, wIndex, iTmp) => [
34
20
  ...wIndex,
@@ -148,7 +134,7 @@ export const PrototypeFuncs = function() {
148
134
  shift: (pointer, length) => [
149
135
  // if length == 0, noop
150
136
  ...length.getCachedI32(),
151
- Opcodes.i32_eqz,
137
+ [ Opcodes.i32_eqz ],
152
138
  [ Opcodes.if, Blocktype.void ],
153
139
  ...number(UNDEFINED),
154
140
  [ Opcodes.br, 1 ],
@@ -267,10 +253,10 @@ export const PrototypeFuncs = function() {
267
253
  ]
268
254
  };
269
255
 
270
- this[TYPES._array].at.local = Valtype.i32;
271
- this[TYPES._array].push.noArgRetLength = true;
272
- this[TYPES._array].fill.local = valtypeBinary;
273
- this[TYPES._array].fill.returnType = TYPES._array;
256
+ this[TYPES.array].at.local = Valtype.i32;
257
+ this[TYPES.array].push.noArgRetLength = true;
258
+ this[TYPES.array].fill.local = valtypeBinary;
259
+ this[TYPES.array].fill.returnType = TYPES.array;
274
260
 
275
261
  this[TYPES.string] = {
276
262
  at: (pointer, length, wIndex, iTmp, _, arrayShell) => {
@@ -342,8 +328,8 @@ export const PrototypeFuncs = function() {
342
328
  ...number(0, Valtype.i32), // base 0 for store later
343
329
 
344
330
  ...wIndex,
345
-
346
331
  Opcodes.i32_to,
332
+
347
333
  ...number(ValtypeSize.i16, Valtype.i32),
348
334
  [ Opcodes.i32_mul ],
349
335
 
@@ -383,7 +369,7 @@ export const PrototypeFuncs = function() {
383
369
 
384
370
  ...(noUnlikelyChecks ? [] : [ [ Opcodes.i32_or ] ]),
385
371
  [ Opcodes.if, Blocktype.void ],
386
- ...number(NaN),
372
+ ...number(valtype === 'i32' ? -1 : NaN),
387
373
  [ Opcodes.br, 1 ],
388
374
  [ Opcodes.end ],
389
375
 
@@ -489,10 +475,10 @@ export const PrototypeFuncs = function() {
489
475
  this[TYPES.string].isWellFormed.local2 = Valtype.i32;
490
476
  this[TYPES.string].isWellFormed.returnType = TYPES.boolean;
491
477
 
492
- if (process.argv.includes('-bytestring')) {
493
- this[TYPES._bytestring] = {
478
+ if (Prefs.bytestring) {
479
+ this[TYPES.bytestring] = {
494
480
  at: (pointer, length, wIndex, iTmp, _, arrayShell) => {
495
- const [ newOut, newPointer ] = arrayShell(1, 'i16');
481
+ const [ newOut, newPointer ] = arrayShell(1, 'i8');
496
482
 
497
483
  return [
498
484
  // setup new/out array
@@ -548,7 +534,7 @@ export const PrototypeFuncs = function() {
548
534
 
549
535
  // todo: out of bounds properly
550
536
  charAt: (pointer, length, wIndex, _1, _2, arrayShell) => {
551
- const [ newOut, newPointer ] = arrayShell(1, 'i16');
537
+ const [ newOut, newPointer ] = arrayShell(1, 'i8');
552
538
 
553
539
  return [
554
540
  // setup new/out array
@@ -558,7 +544,6 @@ export const PrototypeFuncs = function() {
558
544
  ...number(0, Valtype.i32), // base 0 for store later
559
545
 
560
546
  ...wIndex,
561
-
562
547
  Opcodes.i32_to,
563
548
 
564
549
  ...pointer,
@@ -597,7 +582,7 @@ export const PrototypeFuncs = function() {
597
582
 
598
583
  ...(noUnlikelyChecks ? [] : [ [ Opcodes.i32_or ] ]),
599
584
  [ Opcodes.if, Blocktype.void ],
600
- ...number(NaN),
585
+ ...number(valtype === 'i32' ? -1 : NaN),
601
586
  [ Opcodes.br, 1 ],
602
587
  [ Opcodes.end ],
603
588
 
@@ -621,14 +606,14 @@ export const PrototypeFuncs = function() {
621
606
  }
622
607
  };
623
608
 
624
- this[TYPES._bytestring].at.local = Valtype.i32;
625
- this[TYPES._bytestring].at.returnType = TYPES._bytestring;
626
- this[TYPES._bytestring].charAt.returnType = TYPES._bytestring;
627
- this[TYPES._bytestring].charCodeAt.local = Valtype.i32;
628
- this[TYPES._bytestring].charCodeAt.noPointerCache = zeroChecks.charcodeat;
609
+ this[TYPES.bytestring].at.local = Valtype.i32;
610
+ this[TYPES.bytestring].at.returnType = TYPES.bytestring;
611
+ this[TYPES.bytestring].charAt.returnType = TYPES.bytestring;
612
+ this[TYPES.bytestring].charCodeAt.local = Valtype.i32;
613
+ this[TYPES.bytestring].charCodeAt.noPointerCache = zeroChecks.charcodeat;
629
614
 
630
- this[TYPES._bytestring].isWellFormed.local = Valtype.i32;
631
- this[TYPES._bytestring].isWellFormed.local2 = Valtype.i32;
632
- this[TYPES._bytestring].isWellFormed.returnType = TYPES.boolean;
615
+ this[TYPES.bytestring].isWellFormed.local = Valtype.i32;
616
+ this[TYPES.bytestring].isWellFormed.local2 = Valtype.i32;
617
+ this[TYPES.bytestring].isWellFormed.returnType = TYPES.boolean;
633
618
  }
634
619
  };
@@ -0,0 +1,38 @@
1
+ export const TYPES = {
2
+ number: 0x00,
3
+ boolean: 0x01,
4
+ string: 0x02,
5
+ undefined: 0x03,
6
+ object: 0x04,
7
+ function: 0x05,
8
+ symbol: 0x06,
9
+ bigint: 0x07
10
+ };
11
+
12
+ export const TYPE_NAMES = {
13
+ [TYPES.number]: 'Number',
14
+ [TYPES.boolean]: 'Boolean',
15
+ [TYPES.string]: 'String',
16
+ [TYPES.undefined]: 'undefined',
17
+ [TYPES.object]: 'Object',
18
+ [TYPES.function]: 'Function',
19
+ [TYPES.symbol]: 'Symbol',
20
+ [TYPES.bigint]: 'BigInt'
21
+ };
22
+
23
+ export const INTERNAL_TYPE_BASE = 0x10;
24
+ let internalTypeIndex = INTERNAL_TYPE_BASE;
25
+ const registerInternalType = name => {
26
+ const n = internalTypeIndex++;
27
+ TYPES[name.toLowerCase()] = n;
28
+ TYPE_NAMES[n] = name;
29
+ };
30
+
31
+ // note: when adding a new internal type, please also add a deserializer to wrap.js
32
+ // (it is okay to add a throw todo deserializer for wips)
33
+
34
+ registerInternalType('Array');
35
+ registerInternalType('RegExp');
36
+ registerInternalType('ByteString');
37
+ registerInternalType('Date');
38
+ registerInternalType('Set');
@@ -1,4 +1,13 @@
1
- import { enumify } from "../util/enum.js";
1
+ const enumify = (...args) => {
2
+ const obj = {};
3
+
4
+ for (let i = 0; i < args.length; i++) {
5
+ obj[i] = args[i];
6
+ obj[args[i]] = i;
7
+ }
8
+
9
+ return obj;
10
+ };
2
11
 
3
12
  export const Section = enumify('custom', 'type', 'import', 'func', 'table', 'memory', 'global', 'export', 'start', 'element', 'code', 'data', 'data_count', 'tag');
4
13
  export const ExportDesc = enumify('func', 'table', 'mem', 'global', 'tag');
@@ -32,17 +41,16 @@ export const Opcodes = {
32
41
  throw: 0x08,
33
42
  rethrow: 0x09,
34
43
 
35
- call: 0x10,
36
- call_indirect: 0x11,
37
- return_call: 0x12,
38
- return_call_indirect: 0x13,
39
-
40
44
  end: 0x0b,
41
45
  br: 0x0c,
42
46
  br_if: 0x0d,
43
47
  br_table: 0x0e,
44
48
  return: 0x0f,
49
+
45
50
  call: 0x10,
51
+ call_indirect: 0x11,
52
+ return_call: 0x12,
53
+ return_call_indirect: 0x13,
46
54
 
47
55
  drop: 0x1a,
48
56
 
@@ -62,13 +70,22 @@ export const Opcodes = {
62
70
  i32_load16_s: 0x2e,
63
71
  i32_load16_u: 0x2f,
64
72
 
65
- i32_store8: 0x3a,
66
- i32_store16: 0x3b,
73
+ i64_load8_s: 0x30,
74
+ i64_load8_u: 0x31,
75
+ i64_load16_s: 0x32,
76
+ i64_load16_u: 0x33,
67
77
 
68
78
  i32_store: 0x36,
69
79
  i64_store: 0x37,
70
80
  f64_store: 0x39,
71
81
 
82
+ i32_store8: 0x3a,
83
+ i32_store16: 0x3b,
84
+
85
+ i64_store8: 0x3c,
86
+ i64_store16: 0x3d,
87
+
88
+ memory_size: 0x3f,
72
89
  memory_grow: 0x40,
73
90
 
74
91
  i32_const: 0x41,
@@ -100,6 +117,8 @@ export const Opcodes = {
100
117
  i32_shl: 0x74,
101
118
  i32_shr_s: 0x75,
102
119
  i32_shr_u: 0x76,
120
+ i32_rotl: 0x77,
121
+ i32_rotr: 0x78,
103
122
 
104
123
  i64_eqz: 0x50,
105
124
  i64_eq: 0x51,
@@ -123,6 +142,7 @@ export const Opcodes = {
123
142
  i64_shr_s: 0x87,
124
143
  i64_shr_u: 0x88,
125
144
  i64_rotl: 0x89,
145
+ i64_rotr: 0x8a,
126
146
 
127
147
  f64_eq: 0x61,
128
148
  f64_ne: 0x62,
package/compiler/wrap.js CHANGED
@@ -1,50 +1,45 @@
1
1
  import compile from './index.js';
2
2
  import decompile from './decompile.js';
3
3
  import { encodeVector, encodeLocal } from './encoding.js';
4
- // import fs from 'node:fs';
4
+ import Prefs from './prefs.js';
5
+ import { log } from './log.js';
6
+ import { TYPES } from './types.js';
5
7
 
6
8
  const bold = x => `\u001b[1m${x}\u001b[0m`;
7
9
 
8
- const typeBase = 0x00;
9
- const internalTypeBase = 0x10;
10
- const TYPES = {
11
- [typeBase]: 'number',
12
- [typeBase + 1]: 'boolean',
13
- [typeBase + 2]: 'string',
14
- [typeBase + 3]: 'undefined',
15
- [typeBase + 4]: 'object',
16
- [typeBase + 5]: 'function',
17
- [typeBase + 6]: 'symbol',
18
- [typeBase + 7]: 'bigint',
19
-
20
- // internal
21
- [internalTypeBase]: '_array',
22
- [internalTypeBase + 1]: '_regexp',
23
- [internalTypeBase + 2]: '_bytestring'
24
- };
25
-
26
10
  export default async (source, flags = [ 'module' ], customImports = {}, print = str => process.stdout.write(str)) => {
27
11
  const times = [];
28
12
 
29
13
  const t1 = performance.now();
30
14
  const { wasm, funcs, globals, tags, exceptions, pages, c } = compile(source, flags);
31
15
 
16
+ globalThis.porfDebugInfo = { funcs, globals };
17
+
32
18
  if (source.includes('export function')) flags.push('module');
33
19
 
34
- // fs.writeFileSync('out.wasm', Buffer.from(wasm));
20
+ // (await import('node:fs')).writeFileSync('out.wasm', Buffer.from(wasm));
35
21
 
36
22
  times.push(performance.now() - t1);
37
- if (flags.includes('info')) console.log(bold(`compiled in ${times[0].toFixed(2)}ms`));
23
+ if (Prefs.profileCompiler) console.log(bold(`compiled in ${times[0].toFixed(2)}ms`));
38
24
 
39
25
  const t2 = performance.now();
40
26
 
41
27
  let instance;
42
28
  try {
43
- 0, { instance } = await WebAssembly.instantiate(wasm, {
29
+ let wasmEngine = WebAssembly;
30
+ if (Prefs.asur) {
31
+ log.warning('wrap', 'using our !experimental! asur wasm engine instead of host to run');
32
+ wasmEngine = await import('../asur/index.js');
33
+ }
34
+
35
+ 0, { instance } = await wasmEngine.instantiate(wasm, {
44
36
  '': {
45
37
  p: valtype === 'i64' ? i => print(Number(i).toString()) : i => print(i.toString()),
46
38
  c: valtype === 'i64' ? i => print(String.fromCharCode(Number(i))) : i => print(String.fromCharCode(i)),
47
- t: _ => performance.now(),
39
+ t: () => performance.now(),
40
+ u: () => performance.timeOrigin,
41
+ y: () => {},
42
+ z: () => {},
48
43
  ...customImports
49
44
  }
50
45
  });
@@ -52,8 +47,10 @@ export default async (source, flags = [ 'module' ], customImports = {}, print =
52
47
  // only backtrace for runner, not test262/etc
53
48
  if (!process.argv[1].includes('/runner')) throw e;
54
49
 
55
- const funcInd = parseInt(e.message.match(/function #([0-9]+) /)[1]);
56
- const blobOffset = parseInt(e.message.split('@')[1]);
50
+ const funcInd = parseInt(e.message.match(/function #([0-9]+) /)?.[1]);
51
+ const blobOffset = parseInt(e.message.split('@')?.[1]);
52
+
53
+ if (!funcInd) throw e;
57
54
 
58
55
  // convert blob offset -> function wasm offset.
59
56
  // this is not good code and is somewhat duplicated
@@ -131,7 +128,7 @@ export default async (source, flags = [ 'module' ], customImports = {}, print =
131
128
  }
132
129
 
133
130
  times.push(performance.now() - t2);
134
- if (flags.includes('info')) console.log(`instantiated in ${times[1].toFixed(2)}ms`);
131
+ if (Prefs.profileCompiler) console.log(`instantiated in ${times[1].toFixed(2)}ms`);
135
132
 
136
133
  const exports = {};
137
134
 
@@ -159,42 +156,50 @@ export default async (source, flags = [ 'module' ], customImports = {}, print =
159
156
 
160
157
  // if (ret >= typeBase && ret <= typeBase + 8) return ret > (typeBase + 7) ? 'object' : TYPES[ret];
161
158
 
162
- switch (TYPES[type]) {
163
- case 'boolean': return Boolean(ret);
164
- case 'undefined': return undefined;
165
- case 'object': return ret === 0 ? null : {};
159
+ switch (type) {
160
+ case TYPES.boolean: return Boolean(ret);
161
+ case TYPES.undefined: return undefined;
162
+ case TYPES.object: return ret === 0 ? null : {};
166
163
 
167
- case '_array': {
164
+ case TYPES.string: {
168
165
  const pointer = ret;
169
- const length = new Int32Array(memory.buffer, pointer, 1);
166
+ const length = (new Int32Array(memory.buffer, pointer, 1))[0];
170
167
 
171
- // have to slice because of memory alignment
172
- const buf = memory.buffer.slice(pointer + 4, pointer + 4 + 8 * length);
168
+ return Array.from(new Uint16Array(memory.buffer, pointer + 4, length)).map(x => String.fromCharCode(x)).join('');
169
+ }
173
170
 
174
- return Array.from(new Float64Array(buf));
171
+ case TYPES.function: {
172
+ // wasm func index, including all imports
173
+ const func = funcs.find(x => (x.originalIndex ?? x.index) === ret);
174
+ // if (!func) return ret;
175
+ if (!func) return function () {};
176
+
177
+ // make fake empty func for repl/etc
178
+ return {[func.name]() {}}[func.name];
175
179
  }
176
180
 
177
- case 'string': {
181
+ case TYPES.array: {
178
182
  const pointer = ret;
179
- const length = new Int32Array(memory.buffer, pointer, 1);
183
+ const length = (new Int32Array(memory.buffer, pointer, 1))[0];
180
184
 
181
- return Array.from(new Uint16Array(memory.buffer, pointer + 4, length)).map(x => String.fromCharCode(x)).join('');
185
+ // have to slice because of memory alignment
186
+ const buf = memory.buffer.slice(pointer + 4, pointer + 4 + 8 * length);
187
+
188
+ return Array.from(new Float64Array(buf));
182
189
  }
183
190
 
184
- case '_bytestring': {
191
+ case TYPES.bytestring: {
185
192
  const pointer = ret;
186
- const length = new Int32Array(memory.buffer, pointer, 1);
193
+ const length = (new Int32Array(memory.buffer, pointer, 1))[0];
187
194
 
188
195
  return Array.from(new Uint8Array(memory.buffer, pointer + 4, length)).map(x => String.fromCharCode(x)).join('');
189
196
  }
190
197
 
191
- case 'function': {
192
- // wasm func index, including all imports
193
- const func = funcs.find(x => (x.originalIndex ?? x.index) === ret);
194
- if (!func) return ret;
198
+ case TYPES.date: {
199
+ const pointer = ret;
200
+ const value = (new Float64Array(memory.buffer, pointer, 1))[0];
195
201
 
196
- // make fake empty func for repl/etc
197
- return {[func.name]() {}}[func.name];
202
+ return new Date(value);
198
203
  }
199
204
 
200
205
  default: return ret;
package/package.json CHANGED
@@ -1,21 +1,25 @@
1
1
  {
2
2
  "name": "porffor",
3
3
  "description": "a basic experimental wip aot optimizing js -> wasm engine/compiler/runtime in js",
4
- "version": "0.2.0-fde989a",
4
+ "version": "0.2.0-fdf0fc5",
5
5
  "author": "CanadaHonk",
6
6
  "license": "MIT",
7
+ "scripts": {
8
+ "precompile": "node ./compiler/precompile.js"
9
+ },
7
10
  "dependencies": {
8
- "acorn": "^8.9.0"
11
+ "acorn": "^8.11.3",
12
+ "node-repl-polyfill": "^0.1.1"
9
13
  },
10
14
  "optionalDependencies": {
11
- "@babel/parser": "^7.23.6",
15
+ "@babel/parser": "^7.24.4",
12
16
  "hermes-parser": "^0.18.2",
13
17
  "meriyah": "^4.3.9"
14
18
  },
15
19
  "bin": {
16
20
  "porf": "./runner/index.js"
17
21
  },
18
- "main": "./runner/index.js",
22
+ "main": "./compiler/wrap.js",
19
23
  "type": "module",
20
24
  "repository": {
21
25
  "type": "git",
@@ -25,4 +29,4 @@
25
29
  "url": "https://github.com/CanadaHonk/porffor/issues"
26
30
  },
27
31
  "homepage": "https://porffor.goose.icu"
28
- }
32
+ }
package/porf CHANGED
@@ -1,2 +1,4 @@
1
1
  #!/bin/sh
2
2
  node runner/index.js "$@"
3
+ # deno run -A runner/index.js "$@"
4
+ # bun runner/index.js "$@"
package/rhemyn/compile.js CHANGED
@@ -1,7 +1,8 @@
1
- import { Blocktype, Opcodes, Valtype, PageSize, ValtypeSize } from '../compiler/wasmSpec.js';
1
+ import { Blocktype, Opcodes, Valtype, ValtypeSize } from '../compiler/wasmSpec.js';
2
2
  import { number } from '../compiler/embedding.js';
3
- import { signedLEB128, unsignedLEB128 } from '../compiler/encoding.js';
4
3
  import parse from './parse.js';
4
+ import Prefs from '../compiler/prefs.js';
5
+ import { TYPES } from '../compiler/types.js';
5
6
 
6
7
  // local indexes
7
8
  const BasePointer = 0; // base string pointer
@@ -13,7 +14,7 @@ const Length = 5;
13
14
  const Tmp = 6;
14
15
 
15
16
  let exprLastGet = false;
16
- const generate = (node, negated = false, get = true, func = 'test') => {
17
+ const generate = (node, negated = false, get = true, stringSize = 2, func = 'test') => {
17
18
  let out = [];
18
19
  switch (node.type) {
19
20
  case 'Expression':
@@ -41,7 +42,7 @@ const generate = (node, negated = false, get = true, func = 'test') => {
41
42
  // generate checks
42
43
  ...node.body.flatMap((x, i) => {
43
44
  exprLastGet = x.type !== 'Group' && i === (node.body.length - 1);
44
- return generate(x, negated);
45
+ return generate(x, negated, true, stringSize, func);
45
46
  }),
46
47
 
47
48
  // reached end without branching out, successful match
@@ -55,9 +56,9 @@ const generate = (node, negated = false, get = true, func = 'test') => {
55
56
 
56
57
  [ Opcodes.end ],
57
58
 
58
- // increment iter pointer by sizeof i16
59
+ // increment iter pointer by string size
59
60
  [ Opcodes.local_get, IterPointer ],
60
- ...number(ValtypeSize.i16, Valtype.i32),
61
+ ...number(stringSize, Valtype.i32),
61
62
  [ Opcodes.i32_add ],
62
63
  [ Opcodes.local_set, IterPointer ],
63
64
 
@@ -80,7 +81,7 @@ const generate = (node, negated = false, get = true, func = 'test') => {
80
81
  })[func], Valtype.i32)
81
82
  ];
82
83
 
83
- if (globalThis.regexLog) {
84
+ if (Prefs.regexLog) {
84
85
  const underline = x => `\u001b[4m\u001b[1m${x}\u001b[0m`;
85
86
  console.log(`\n${underline('ast')}`);
86
87
  console.log(node);
@@ -90,34 +91,34 @@ const generate = (node, negated = false, get = true, func = 'test') => {
90
91
  break;
91
92
 
92
93
  case 'Character':
93
- out = generateChar(node, node.negated ^ negated, get);
94
+ out = generateChar(node, node.negated ^ negated, get, stringSize);
94
95
  break;
95
96
 
96
97
  case 'Set':
97
- out = generateSet(node, node.negated, get);
98
+ out = generateSet(node, node.negated, get, stringSize);
98
99
  break;
99
100
 
100
101
  case 'Group':
101
- out = generateGroup(node, negated, get);
102
+ out = generateGroup(node, negated, get, stringSize);
102
103
  break;
103
104
 
104
105
  case 'Range':
105
- out = generateRange(node, negated, get);
106
+ out = generateRange(node, negated, get, stringSize);
106
107
  break;
107
108
  }
108
109
 
109
110
  return out;
110
111
  };
111
112
 
112
- const getNextChar = () => [
113
+ const getNextChar = (stringSize) => [
113
114
  // get char from pointer
114
115
  [ Opcodes.local_get, Pointer ],
115
- [ Opcodes.i32_load16_u, Math.log2(ValtypeSize.i16) - 1, ...unsignedLEB128(0) ],
116
+ [ stringSize == 2 ? Opcodes.i32_load16_u : Opcodes.i32_load8_u, 0, 0 ],
116
117
 
117
118
  ...(exprLastGet ? [] : [
118
- // pointer += sizeof i16
119
+ // pointer += string size
119
120
  [ Opcodes.local_get, Pointer ],
120
- ...number(ValtypeSize.i16, Valtype.i32),
121
+ ...number(stringSize, Valtype.i32),
121
122
  [ Opcodes.i32_add ],
122
123
  [ Opcodes.local_set, Pointer ]
123
124
  ])
@@ -133,21 +134,21 @@ const checkFailure = () => [
133
134
  [ Opcodes.br_if, 0 ]
134
135
  ];
135
136
 
136
- const generateChar = (node, negated, get) => {
137
+ const generateChar = (node, negated, get, stringSize) => {
137
138
  return [
138
- ...(get ? getNextChar() : []),
139
+ ...(get ? getNextChar(stringSize) : []),
139
140
  ...number(node.char.charCodeAt(0), Valtype.i32),
140
141
  negated ? [ Opcodes.i32_eq ] : [ Opcodes.i32_ne ],
141
142
  ...(get ? checkFailure(): [])
142
143
  ];
143
144
  };
144
145
 
145
- const generateSet = (node, negated, get) => {
146
+ const generateSet = (node, negated, get, stringSize) => {
146
147
  // for a single char we do not need a tmp, it is like just
147
148
  const singleChar = node.body.length === 1 && node.body[0].type === 'Character';
148
149
 
149
150
  let out = [
150
- ...(get ? getNextChar() : []),
151
+ ...(get ? getNextChar(stringSize) : []),
151
152
  ...(singleChar ? [] : [ [ Opcodes.local_set, Tmp ] ]),
152
153
  ];
153
154
 
@@ -155,11 +156,11 @@ const generateSet = (node, negated, get) => {
155
156
  out = [
156
157
  ...out,
157
158
  ...(singleChar ? [] : [ [ Opcodes.local_get, Tmp ] ]),
158
- ...generate(x, negated, false)
159
+ ...generate(x, negated, false, stringSize)
159
160
  ];
160
161
  }
161
162
 
162
- out = out.concat(new Array(node.body.length - 1).fill(negated ? [ Opcodes.i32_or ] : [ Opcodes.i32_and ]));
163
+ if (node.body.length > 0) out = out.concat(new Array(node.body.length - 1).fill(negated ? [ Opcodes.i32_or ] : [ Opcodes.i32_and ]));
163
164
 
164
165
  return [
165
166
  ...out,
@@ -167,9 +168,9 @@ const generateSet = (node, negated, get) => {
167
168
  ];
168
169
  };
169
170
 
170
- const generateRange = (node, negated, get) => {
171
+ const generateRange = (node, negated, get, stringSize) => {
171
172
  return [
172
- ...(get ? getNextChar() : []),
173
+ ...(get ? getNextChar(stringSize) : []),
173
174
  ...(get ? [ [ Opcodes.local_tee, Tmp ] ] : []),
174
175
 
175
176
  ...number(node.from.charCodeAt(0), Valtype.i32),
@@ -187,11 +188,29 @@ const generateRange = (node, negated, get) => {
187
188
  };
188
189
 
189
190
  const generateGroup = (node, negated, get) => {
191
+ // todo
192
+ return [];
193
+ };
190
194
 
195
+ const wrapFunc = (regex, func, name, index) => {
196
+ const parsed = parse(regex);
197
+
198
+ return outputFunc([
199
+ [ Opcodes.local_get, 1 ],
200
+ ...number(TYPES.string, Valtype.i32),
201
+ [ Opcodes.i32_eq ],
202
+ [ Opcodes.if, Valtype.i32 ],
203
+ // string
204
+ ...generate(parsed, false, true, 2, func),
205
+ [ Opcodes.else ],
206
+ // bytestring
207
+ ...generate(parsed, false, true, 1, func),
208
+ [ Opcodes.end ]
209
+ ], name, index);
191
210
  };
192
211
 
193
- export const test = (regex, index = 0, name = 'regex_test_' + regex) => outputFunc(generate(parse(regex), false, true, 'test'), name, index);
194
- export const search = (regex, index = 0, name = 'regex_search_' + regex) => outputFunc(generate(parse(regex), false, true, 'search'), name, index);
212
+ export const test = (regex, index = 0, name = 'regex_test_' + regex) => wrapFunc(regex, 'test', name, index);
213
+ export const search = (regex, index = 0, name = 'regex_search_' + regex) => wrapFunc(regex, 'search', name, index);
195
214
 
196
215
  const outputFunc = (wasm, name, index) => ({
197
216
  name,
@@ -199,9 +218,9 @@ const outputFunc = (wasm, name, index) => ({
199
218
  wasm,
200
219
 
201
220
  export: true,
202
- params: [ Valtype.i32 ],
221
+ params: [ Valtype.i32, Valtype.i32 ],
203
222
  returns: [ Valtype.i32 ],
204
- returnType: 0xffffffffffff1, // boolean - todo: do not hardcode this
223
+ returnType: TYPES.boolean,
205
224
  locals: {
206
225
  basePointer: { idx: 0, type: Valtype.i32 },
207
226
  iterPointer: { idx: 1, type: Valtype.i32 },