porffor 0.2.0-4b72c49 → 0.2.0-4d189b5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CONTRIBUTING.md +248 -0
  2. package/LICENSE +20 -20
  3. package/README.md +129 -84
  4. package/asur/README.md +2 -0
  5. package/asur/index.js +1262 -0
  6. package/byg/index.js +237 -0
  7. package/compiler/2c.js +1 -1
  8. package/compiler/{sections.js → assemble.js} +59 -12
  9. package/compiler/builtins/annexb_string.js +72 -0
  10. package/compiler/builtins/annexb_string.ts +18 -0
  11. package/compiler/builtins/array.ts +145 -0
  12. package/compiler/builtins/base64.ts +7 -84
  13. package/compiler/builtins/crypto.ts +120 -0
  14. package/compiler/builtins/date.ts +2071 -0
  15. package/compiler/builtins/escape.ts +141 -0
  16. package/compiler/builtins/int.ts +147 -0
  17. package/compiler/builtins/number.ts +527 -0
  18. package/compiler/builtins/porffor.d.ts +43 -7
  19. package/compiler/builtins/string.ts +1055 -0
  20. package/compiler/builtins/tostring.ts +45 -0
  21. package/compiler/builtins.js +403 -110
  22. package/compiler/{codeGen.js → codegen.js} +820 -309
  23. package/compiler/decompile.js +0 -1
  24. package/compiler/embedding.js +22 -22
  25. package/compiler/encoding.js +108 -10
  26. package/compiler/generated_builtins.js +1470 -4
  27. package/compiler/index.js +16 -14
  28. package/compiler/log.js +6 -3
  29. package/compiler/opt.js +23 -22
  30. package/compiler/parse.js +30 -22
  31. package/compiler/precompile.js +25 -26
  32. package/compiler/prefs.js +7 -6
  33. package/compiler/prototype.js +2 -18
  34. package/compiler/types.js +37 -0
  35. package/compiler/wasmSpec.js +18 -6
  36. package/compiler/wrap.js +51 -47
  37. package/package.json +9 -5
  38. package/porf +2 -0
  39. package/rhemyn/compile.js +44 -26
  40. package/rhemyn/parse.js +322 -320
  41. package/rhemyn/test/parse.js +58 -58
  42. package/runner/compare.js +34 -34
  43. package/runner/debug.js +122 -0
  44. package/runner/index.js +74 -11
  45. package/runner/profiler.js +102 -0
  46. package/runner/repl.js +42 -9
  47. package/runner/sizes.js +37 -37
  48. package/demo.js +0 -15
  49. package/runner/info.js +0 -89
  50. package/runner/profile.js +0 -46
  51. package/runner/results.json +0 -1
  52. package/runner/transform.js +0 -15
  53. package/util/enum.js +0 -20
package/compiler/wrap.js CHANGED
@@ -1,50 +1,45 @@
1
1
  import compile from './index.js';
2
2
  import decompile from './decompile.js';
3
3
  import { encodeVector, encodeLocal } from './encoding.js';
4
- // import fs from 'node:fs';
4
+ import Prefs from './prefs.js';
5
+ import { log } from './log.js';
6
+ import { TYPES } from './types.js';
5
7
 
6
8
  const bold = x => `\u001b[1m${x}\u001b[0m`;
7
9
 
8
- const typeBase = 0x00;
9
- const internalTypeBase = 0x10;
10
- const TYPES = {
11
- [typeBase]: 'number',
12
- [typeBase + 1]: 'boolean',
13
- [typeBase + 2]: 'string',
14
- [typeBase + 3]: 'undefined',
15
- [typeBase + 4]: 'object',
16
- [typeBase + 5]: 'function',
17
- [typeBase + 6]: 'symbol',
18
- [typeBase + 7]: 'bigint',
19
-
20
- // internal
21
- [internalTypeBase]: '_array',
22
- [internalTypeBase + 1]: '_regexp',
23
- [internalTypeBase + 2]: '_bytestring'
24
- };
25
-
26
10
  export default async (source, flags = [ 'module' ], customImports = {}, print = str => process.stdout.write(str)) => {
27
11
  const times = [];
28
12
 
29
13
  const t1 = performance.now();
30
14
  const { wasm, funcs, globals, tags, exceptions, pages, c } = compile(source, flags);
31
15
 
16
+ globalThis.porfDebugInfo = { funcs, globals };
17
+
32
18
  if (source.includes('export function')) flags.push('module');
33
19
 
34
- // fs.writeFileSync('out.wasm', Buffer.from(wasm));
20
+ // (await import('node:fs')).writeFileSync('out.wasm', Buffer.from(wasm));
35
21
 
36
22
  times.push(performance.now() - t1);
37
- if (flags.includes('info')) console.log(bold(`compiled in ${times[0].toFixed(2)}ms`));
23
+ if (Prefs.profileCompiler) console.log(bold(`compiled in ${times[0].toFixed(2)}ms`));
38
24
 
39
25
  const t2 = performance.now();
40
26
 
41
27
  let instance;
42
28
  try {
43
- 0, { instance } = await WebAssembly.instantiate(wasm, {
29
+ let wasmEngine = WebAssembly;
30
+ if (Prefs.asur) {
31
+ log.warning('wrap', 'using our !experimental! asur wasm engine instead of host to run');
32
+ wasmEngine = await import('../asur/index.js');
33
+ }
34
+
35
+ 0, { instance } = await wasmEngine.instantiate(wasm, {
44
36
  '': {
45
37
  p: valtype === 'i64' ? i => print(Number(i).toString()) : i => print(i.toString()),
46
38
  c: valtype === 'i64' ? i => print(String.fromCharCode(Number(i))) : i => print(String.fromCharCode(i)),
47
- t: _ => performance.now(),
39
+ t: () => performance.now(),
40
+ u: () => performance.timeOrigin,
41
+ y: () => {},
42
+ z: () => {},
48
43
  ...customImports
49
44
  }
50
45
  });
@@ -52,8 +47,10 @@ export default async (source, flags = [ 'module' ], customImports = {}, print =
52
47
  // only backtrace for runner, not test262/etc
53
48
  if (!process.argv[1].includes('/runner')) throw e;
54
49
 
55
- const funcInd = parseInt(e.message.match(/function #([0-9]+) /)[1]);
56
- const blobOffset = parseInt(e.message.split('@')[1]);
50
+ const funcInd = parseInt(e.message.match(/function #([0-9]+) /)?.[1]);
51
+ const blobOffset = parseInt(e.message.split('@')?.[1]);
52
+
53
+ if (!funcInd) throw e;
57
54
 
58
55
  // convert blob offset -> function wasm offset.
59
56
  // this is not good code and is somewhat duplicated
@@ -131,7 +128,7 @@ export default async (source, flags = [ 'module' ], customImports = {}, print =
131
128
  }
132
129
 
133
130
  times.push(performance.now() - t2);
134
- if (flags.includes('info')) console.log(`instantiated in ${times[1].toFixed(2)}ms`);
131
+ if (Prefs.profileCompiler) console.log(`instantiated in ${times[1].toFixed(2)}ms`);
135
132
 
136
133
  const exports = {};
137
134
 
@@ -159,43 +156,50 @@ export default async (source, flags = [ 'module' ], customImports = {}, print =
159
156
 
160
157
  // if (ret >= typeBase && ret <= typeBase + 8) return ret > (typeBase + 7) ? 'object' : TYPES[ret];
161
158
 
162
- switch (TYPES[type]) {
163
- case 'boolean': return Boolean(ret);
164
- case 'undefined': return undefined;
165
- case 'object': return ret === 0 ? null : {};
159
+ switch (type) {
160
+ case TYPES.boolean: return Boolean(ret);
161
+ case TYPES.undefined: return undefined;
162
+ case TYPES.object: return ret === 0 ? null : {};
166
163
 
167
- case '_array': {
164
+ case TYPES.string: {
168
165
  const pointer = ret;
169
- const length = new Int32Array(memory.buffer, pointer, 1);
166
+ const length = (new Int32Array(memory.buffer, pointer, 1))[0];
170
167
 
171
- // have to slice because of memory alignment
172
- const buf = memory.buffer.slice(pointer + 4, pointer + 4 + 8 * length);
168
+ return Array.from(new Uint16Array(memory.buffer, pointer + 4, length)).map(x => String.fromCharCode(x)).join('');
169
+ }
173
170
 
174
- return Array.from(new Float64Array(buf));
171
+ case TYPES.function: {
172
+ // wasm func index, including all imports
173
+ const func = funcs.find(x => (x.originalIndex ?? x.index) === ret);
174
+ // if (!func) return ret;
175
+ if (!func) return function () {};
176
+
177
+ // make fake empty func for repl/etc
178
+ return {[func.name]() {}}[func.name];
175
179
  }
176
180
 
177
- case 'string': {
181
+ case TYPES._array: {
178
182
  const pointer = ret;
179
- const length = new Int32Array(memory.buffer, pointer, 1);
183
+ const length = (new Int32Array(memory.buffer, pointer, 1))[0];
180
184
 
181
- return Array.from(new Uint16Array(memory.buffer, pointer + 4, length)).map(x => String.fromCharCode(x)).join('');
185
+ // have to slice because of memory alignment
186
+ const buf = memory.buffer.slice(pointer + 4, pointer + 4 + 8 * length);
187
+
188
+ return Array.from(new Float64Array(buf));
182
189
  }
183
190
 
184
- case '_bytestring': {
191
+ case TYPES._bytestring: {
185
192
  const pointer = ret;
186
- const length = new Int32Array(memory.buffer, pointer, 1);
193
+ const length = (new Int32Array(memory.buffer, pointer, 1))[0];
187
194
 
188
195
  return Array.from(new Uint8Array(memory.buffer, pointer + 4, length)).map(x => String.fromCharCode(x)).join('');
189
196
  }
190
197
 
191
- case 'function': {
192
- // wasm func index, including all imports
193
- const func = funcs.find(x => (x.originalIndex ?? x.index) === ret);
194
- // if (!func) return ret;
195
- if (!func) return function () {};
198
+ case TYPES._date: {
199
+ const pointer = ret;
200
+ const value = (new Float64Array(memory.buffer, pointer, 1))[0];
196
201
 
197
- // make fake empty func for repl/etc
198
- return {[func.name]() {}}[func.name];
202
+ return new Date(value);
199
203
  }
200
204
 
201
205
  default: return ret;
package/package.json CHANGED
@@ -1,21 +1,25 @@
1
1
  {
2
2
  "name": "porffor",
3
3
  "description": "a basic experimental wip aot optimizing js -> wasm engine/compiler/runtime in js",
4
- "version": "0.2.0-4b72c49",
4
+ "version": "0.2.0-4d189b5",
5
5
  "author": "CanadaHonk",
6
6
  "license": "MIT",
7
+ "scripts": {
8
+ "precompile": "node ./compiler/precompile.js"
9
+ },
7
10
  "dependencies": {
8
- "acorn": "^8.9.0"
11
+ "acorn": "^8.11.3",
12
+ "node-repl-polyfill": "^0.1.1"
9
13
  },
10
14
  "optionalDependencies": {
11
- "@babel/parser": "^7.23.6",
15
+ "@babel/parser": "^7.24.4",
12
16
  "hermes-parser": "^0.18.2",
13
17
  "meriyah": "^4.3.9"
14
18
  },
15
19
  "bin": {
16
20
  "porf": "./runner/index.js"
17
21
  },
18
- "main": "./runner/index.js",
22
+ "main": "./compiler/wrap.js",
19
23
  "type": "module",
20
24
  "repository": {
21
25
  "type": "git",
@@ -25,4 +29,4 @@
25
29
  "url": "https://github.com/CanadaHonk/porffor/issues"
26
30
  },
27
31
  "homepage": "https://porffor.goose.icu"
28
- }
32
+ }
package/porf CHANGED
@@ -1,2 +1,4 @@
1
1
  #!/bin/sh
2
2
  node runner/index.js "$@"
3
+ # deno run -A runner/index.js "$@"
4
+ # bun runner/index.js "$@"
package/rhemyn/compile.js CHANGED
@@ -1,8 +1,8 @@
1
- import { Blocktype, Opcodes, Valtype, PageSize, ValtypeSize } from '../compiler/wasmSpec.js';
1
+ import { Blocktype, Opcodes, Valtype, ValtypeSize } from '../compiler/wasmSpec.js';
2
2
  import { number } from '../compiler/embedding.js';
3
- import { signedLEB128, unsignedLEB128 } from '../compiler/encoding.js';
4
3
  import parse from './parse.js';
5
4
  import Prefs from '../compiler/prefs.js';
5
+ import { TYPES } from '../compiler/types.js';
6
6
 
7
7
  // local indexes
8
8
  const BasePointer = 0; // base string pointer
@@ -14,7 +14,7 @@ const Length = 5;
14
14
  const Tmp = 6;
15
15
 
16
16
  let exprLastGet = false;
17
- const generate = (node, negated = false, get = true, func = 'test') => {
17
+ const generate = (node, negated = false, get = true, stringSize = 2, func = 'test') => {
18
18
  let out = [];
19
19
  switch (node.type) {
20
20
  case 'Expression':
@@ -42,7 +42,7 @@ const generate = (node, negated = false, get = true, func = 'test') => {
42
42
  // generate checks
43
43
  ...node.body.flatMap((x, i) => {
44
44
  exprLastGet = x.type !== 'Group' && i === (node.body.length - 1);
45
- return generate(x, negated);
45
+ return generate(x, negated, true, stringSize, func);
46
46
  }),
47
47
 
48
48
  // reached end without branching out, successful match
@@ -56,9 +56,9 @@ const generate = (node, negated = false, get = true, func = 'test') => {
56
56
 
57
57
  [ Opcodes.end ],
58
58
 
59
- // increment iter pointer by sizeof i16
59
+ // increment iter pointer by string size
60
60
  [ Opcodes.local_get, IterPointer ],
61
- ...number(ValtypeSize.i16, Valtype.i32),
61
+ ...number(stringSize, Valtype.i32),
62
62
  [ Opcodes.i32_add ],
63
63
  [ Opcodes.local_set, IterPointer ],
64
64
 
@@ -91,34 +91,34 @@ const generate = (node, negated = false, get = true, func = 'test') => {
91
91
  break;
92
92
 
93
93
  case 'Character':
94
- out = generateChar(node, node.negated ^ negated, get);
94
+ out = generateChar(node, node.negated ^ negated, get, stringSize);
95
95
  break;
96
96
 
97
97
  case 'Set':
98
- out = generateSet(node, node.negated, get);
98
+ out = generateSet(node, node.negated, get, stringSize);
99
99
  break;
100
100
 
101
101
  case 'Group':
102
- out = generateGroup(node, negated, get);
102
+ out = generateGroup(node, negated, get, stringSize);
103
103
  break;
104
104
 
105
105
  case 'Range':
106
- out = generateRange(node, negated, get);
106
+ out = generateRange(node, negated, get, stringSize);
107
107
  break;
108
108
  }
109
109
 
110
110
  return out;
111
111
  };
112
112
 
113
- const getNextChar = () => [
113
+ const getNextChar = (stringSize) => [
114
114
  // get char from pointer
115
115
  [ Opcodes.local_get, Pointer ],
116
- [ Opcodes.i32_load16_u, Math.log2(ValtypeSize.i16) - 1, ...unsignedLEB128(0) ],
116
+ [ stringSize == 2 ? Opcodes.i32_load16_u : Opcodes.i32_load8_u, 0, 0 ],
117
117
 
118
118
  ...(exprLastGet ? [] : [
119
- // pointer += sizeof i16
119
+ // pointer += string size
120
120
  [ Opcodes.local_get, Pointer ],
121
- ...number(ValtypeSize.i16, Valtype.i32),
121
+ ...number(stringSize, Valtype.i32),
122
122
  [ Opcodes.i32_add ],
123
123
  [ Opcodes.local_set, Pointer ]
124
124
  ])
@@ -134,21 +134,21 @@ const checkFailure = () => [
134
134
  [ Opcodes.br_if, 0 ]
135
135
  ];
136
136
 
137
- const generateChar = (node, negated, get) => {
137
+ const generateChar = (node, negated, get, stringSize) => {
138
138
  return [
139
- ...(get ? getNextChar() : []),
139
+ ...(get ? getNextChar(stringSize) : []),
140
140
  ...number(node.char.charCodeAt(0), Valtype.i32),
141
141
  negated ? [ Opcodes.i32_eq ] : [ Opcodes.i32_ne ],
142
142
  ...(get ? checkFailure(): [])
143
143
  ];
144
144
  };
145
145
 
146
- const generateSet = (node, negated, get) => {
146
+ const generateSet = (node, negated, get, stringSize) => {
147
147
  // for a single char we do not need a tmp, it is like just
148
148
  const singleChar = node.body.length === 1 && node.body[0].type === 'Character';
149
149
 
150
150
  let out = [
151
- ...(get ? getNextChar() : []),
151
+ ...(get ? getNextChar(stringSize) : []),
152
152
  ...(singleChar ? [] : [ [ Opcodes.local_set, Tmp ] ]),
153
153
  ];
154
154
 
@@ -156,11 +156,11 @@ const generateSet = (node, negated, get) => {
156
156
  out = [
157
157
  ...out,
158
158
  ...(singleChar ? [] : [ [ Opcodes.local_get, Tmp ] ]),
159
- ...generate(x, negated, false)
159
+ ...generate(x, negated, false, stringSize)
160
160
  ];
161
161
  }
162
162
 
163
- out = out.concat(new Array(node.body.length - 1).fill(negated ? [ Opcodes.i32_or ] : [ Opcodes.i32_and ]));
163
+ if (node.body.length > 0) out = out.concat(new Array(node.body.length - 1).fill(negated ? [ Opcodes.i32_or ] : [ Opcodes.i32_and ]));
164
164
 
165
165
  return [
166
166
  ...out,
@@ -168,9 +168,9 @@ const generateSet = (node, negated, get) => {
168
168
  ];
169
169
  };
170
170
 
171
- const generateRange = (node, negated, get) => {
171
+ const generateRange = (node, negated, get, stringSize) => {
172
172
  return [
173
- ...(get ? getNextChar() : []),
173
+ ...(get ? getNextChar(stringSize) : []),
174
174
  ...(get ? [ [ Opcodes.local_tee, Tmp ] ] : []),
175
175
 
176
176
  ...number(node.from.charCodeAt(0), Valtype.i32),
@@ -188,11 +188,29 @@ const generateRange = (node, negated, get) => {
188
188
  };
189
189
 
190
190
  const generateGroup = (node, negated, get) => {
191
+ // todo
192
+ return [];
193
+ };
191
194
 
195
+ const wrapFunc = (regex, func, name, index) => {
196
+ const parsed = parse(regex);
197
+
198
+ return outputFunc([
199
+ [ Opcodes.local_get, 1 ],
200
+ ...number(TYPES.string, Valtype.i32),
201
+ [ Opcodes.i32_eq ],
202
+ [ Opcodes.if, Valtype.i32 ],
203
+ // string
204
+ ...generate(parsed, false, true, 2, func),
205
+ [ Opcodes.else ],
206
+ // bytestring
207
+ ...generate(parsed, false, true, 1, func),
208
+ [ Opcodes.end ]
209
+ ], name, index);
192
210
  };
193
211
 
194
- export const test = (regex, index = 0, name = 'regex_test_' + regex) => outputFunc(generate(parse(regex), false, true, 'test'), name, index);
195
- export const search = (regex, index = 0, name = 'regex_search_' + regex) => outputFunc(generate(parse(regex), false, true, 'search'), name, index);
212
+ export const test = (regex, index = 0, name = 'regex_test_' + regex) => wrapFunc(regex, 'test', name, index);
213
+ export const search = (regex, index = 0, name = 'regex_search_' + regex) => wrapFunc(regex, 'search', name, index);
196
214
 
197
215
  const outputFunc = (wasm, name, index) => ({
198
216
  name,
@@ -200,9 +218,9 @@ const outputFunc = (wasm, name, index) => ({
200
218
  wasm,
201
219
 
202
220
  export: true,
203
- params: [ Valtype.i32 ],
221
+ params: [ Valtype.i32, Valtype.i32 ],
204
222
  returns: [ Valtype.i32 ],
205
- returnType: 0xffffffffffff1, // boolean - todo: do not hardcode this
223
+ returnType: TYPES.boolean,
206
224
  locals: {
207
225
  basePointer: { idx: 0, type: Valtype.i32 },
208
226
  iterPointer: { idx: 1, type: Valtype.i32 },