porffor 0.2.0-6aff0fa → 0.2.0-75bc012

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CONTRIBUTING.md +256 -0
  2. package/LICENSE +20 -20
  3. package/README.md +115 -82
  4. package/asur/index.js +624 -340
  5. package/byg/index.js +237 -0
  6. package/compiler/2c.js +1 -1
  7. package/compiler/{sections.js → assemble.js} +59 -12
  8. package/compiler/builtins/annexb_string.js +72 -0
  9. package/compiler/builtins/annexb_string.ts +18 -0
  10. package/compiler/builtins/array.ts +145 -0
  11. package/compiler/builtins/base64.ts +7 -84
  12. package/compiler/builtins/boolean.ts +20 -0
  13. package/compiler/builtins/crypto.ts +120 -0
  14. package/compiler/builtins/date.ts +2070 -0
  15. package/compiler/builtins/escape.ts +141 -0
  16. package/compiler/builtins/int.ts +147 -0
  17. package/compiler/builtins/number.ts +534 -0
  18. package/compiler/builtins/porffor.d.ts +43 -7
  19. package/compiler/builtins/string.ts +1080 -0
  20. package/compiler/builtins/tostring.ts +25 -0
  21. package/compiler/builtins.js +398 -115
  22. package/compiler/{codeGen.js → codegen.js} +856 -323
  23. package/compiler/decompile.js +0 -1
  24. package/compiler/embedding.js +22 -22
  25. package/compiler/encoding.js +108 -10
  26. package/compiler/generated_builtins.js +1504 -2
  27. package/compiler/index.js +16 -14
  28. package/compiler/log.js +2 -2
  29. package/compiler/opt.js +23 -22
  30. package/compiler/parse.js +30 -22
  31. package/compiler/precompile.js +26 -27
  32. package/compiler/prefs.js +7 -6
  33. package/compiler/prototype.js +16 -32
  34. package/compiler/types.js +37 -0
  35. package/compiler/wasmSpec.js +14 -1
  36. package/compiler/wrap.js +41 -44
  37. package/package.json +9 -5
  38. package/porf +2 -0
  39. package/rhemyn/compile.js +44 -26
  40. package/rhemyn/parse.js +322 -320
  41. package/rhemyn/test/parse.js +58 -58
  42. package/runner/compare.js +34 -34
  43. package/runner/debug.js +122 -0
  44. package/runner/index.js +69 -12
  45. package/runner/profiler.js +45 -26
  46. package/runner/repl.js +42 -9
  47. package/runner/sizes.js +37 -37
  48. package/runner/info.js +0 -89
  49. package/runner/transform.js +0 -15
  50. package/util/enum.js +0 -20
package/compiler/wrap.js CHANGED
@@ -3,39 +3,24 @@ import decompile from './decompile.js';
3
3
  import { encodeVector, encodeLocal } from './encoding.js';
4
4
  import Prefs from './prefs.js';
5
5
  import { log } from './log.js';
6
+ import { TYPES } from './types.js';
6
7
 
7
8
  const bold = x => `\u001b[1m${x}\u001b[0m`;
8
9
 
9
- const typeBase = 0x00;
10
- const internalTypeBase = 0x10;
11
- const TYPES = {
12
- [typeBase]: 'number',
13
- [typeBase + 1]: 'boolean',
14
- [typeBase + 2]: 'string',
15
- [typeBase + 3]: 'undefined',
16
- [typeBase + 4]: 'object',
17
- [typeBase + 5]: 'function',
18
- [typeBase + 6]: 'symbol',
19
- [typeBase + 7]: 'bigint',
20
-
21
- // internal
22
- [internalTypeBase]: '_array',
23
- [internalTypeBase + 1]: '_regexp',
24
- [internalTypeBase + 2]: '_bytestring'
25
- };
26
-
27
10
  export default async (source, flags = [ 'module' ], customImports = {}, print = str => process.stdout.write(str)) => {
28
11
  const times = [];
29
12
 
30
13
  const t1 = performance.now();
31
14
  const { wasm, funcs, globals, tags, exceptions, pages, c } = compile(source, flags);
32
15
 
16
+ globalThis.porfDebugInfo = { funcs, globals };
17
+
33
18
  if (source.includes('export function')) flags.push('module');
34
19
 
35
20
  // (await import('node:fs')).writeFileSync('out.wasm', Buffer.from(wasm));
36
21
 
37
22
  times.push(performance.now() - t1);
38
- if (flags.includes('info')) console.log(bold(`compiled in ${times[0].toFixed(2)}ms`));
23
+ if (Prefs.profileCompiler) console.log(bold(`compiled in ${times[0].toFixed(2)}ms`));
39
24
 
40
25
  const t2 = performance.now();
41
26
 
@@ -51,7 +36,10 @@ export default async (source, flags = [ 'module' ], customImports = {}, print =
51
36
  '': {
52
37
  p: valtype === 'i64' ? i => print(Number(i).toString()) : i => print(i.toString()),
53
38
  c: valtype === 'i64' ? i => print(String.fromCharCode(Number(i))) : i => print(String.fromCharCode(i)),
54
- t: _ => performance.now(),
39
+ t: () => performance.now(),
40
+ u: () => performance.timeOrigin,
41
+ y: () => {},
42
+ z: () => {},
55
43
  ...customImports
56
44
  }
57
45
  });
@@ -59,8 +47,10 @@ export default async (source, flags = [ 'module' ], customImports = {}, print =
59
47
  // only backtrace for runner, not test262/etc
60
48
  if (!process.argv[1].includes('/runner')) throw e;
61
49
 
62
- const funcInd = parseInt(e.message.match(/function #([0-9]+) /)[1]);
63
- const blobOffset = parseInt(e.message.split('@')[1]);
50
+ const funcInd = parseInt(e.message.match(/function #([0-9]+) /)?.[1]);
51
+ const blobOffset = parseInt(e.message.split('@')?.[1]);
52
+
53
+ if (!funcInd) throw e;
64
54
 
65
55
  // convert blob offset -> function wasm offset.
66
56
  // this is not good code and is somewhat duplicated
@@ -138,7 +128,7 @@ export default async (source, flags = [ 'module' ], customImports = {}, print =
138
128
  }
139
129
 
140
130
  times.push(performance.now() - t2);
141
- if (flags.includes('info')) console.log(`instantiated in ${times[1].toFixed(2)}ms`);
131
+ if (Prefs.profileCompiler) console.log(`instantiated in ${times[1].toFixed(2)}ms`);
142
132
 
143
133
  const exports = {};
144
134
 
@@ -166,43 +156,50 @@ export default async (source, flags = [ 'module' ], customImports = {}, print =
166
156
 
167
157
  // if (ret >= typeBase && ret <= typeBase + 8) return ret > (typeBase + 7) ? 'object' : TYPES[ret];
168
158
 
169
- switch (TYPES[type]) {
170
- case 'boolean': return Boolean(ret);
171
- case 'undefined': return undefined;
172
- case 'object': return ret === 0 ? null : {};
159
+ switch (type) {
160
+ case TYPES.boolean: return Boolean(ret);
161
+ case TYPES.undefined: return undefined;
162
+ case TYPES.object: return ret === 0 ? null : {};
173
163
 
174
- case '_array': {
164
+ case TYPES.string: {
175
165
  const pointer = ret;
176
- const length = new Int32Array(memory.buffer, pointer, 1);
166
+ const length = (new Int32Array(memory.buffer, pointer, 1))[0];
177
167
 
178
- // have to slice because of memory alignment
179
- const buf = memory.buffer.slice(pointer + 4, pointer + 4 + 8 * length);
168
+ return Array.from(new Uint16Array(memory.buffer, pointer + 4, length)).map(x => String.fromCharCode(x)).join('');
169
+ }
180
170
 
181
- return Array.from(new Float64Array(buf));
171
+ case TYPES.function: {
172
+ // wasm func index, including all imports
173
+ const func = funcs.find(x => (x.originalIndex ?? x.index) === ret);
174
+ // if (!func) return ret;
175
+ if (!func) return function () {};
176
+
177
+ // make fake empty func for repl/etc
178
+ return {[func.name]() {}}[func.name];
182
179
  }
183
180
 
184
- case 'string': {
181
+ case TYPES.array: {
185
182
  const pointer = ret;
186
- const length = new Int32Array(memory.buffer, pointer, 1);
183
+ const length = (new Int32Array(memory.buffer, pointer, 1))[0];
187
184
 
188
- return Array.from(new Uint16Array(memory.buffer, pointer + 4, length)).map(x => String.fromCharCode(x)).join('');
185
+ // have to slice because of memory alignment
186
+ const buf = memory.buffer.slice(pointer + 4, pointer + 4 + 8 * length);
187
+
188
+ return Array.from(new Float64Array(buf));
189
189
  }
190
190
 
191
- case '_bytestring': {
191
+ case TYPES.bytestring: {
192
192
  const pointer = ret;
193
- const length = new Int32Array(memory.buffer, pointer, 1);
193
+ const length = (new Int32Array(memory.buffer, pointer, 1))[0];
194
194
 
195
195
  return Array.from(new Uint8Array(memory.buffer, pointer + 4, length)).map(x => String.fromCharCode(x)).join('');
196
196
  }
197
197
 
198
- case 'function': {
199
- // wasm func index, including all imports
200
- const func = funcs.find(x => (x.originalIndex ?? x.index) === ret);
201
- // if (!func) return ret;
202
- if (!func) return function () {};
198
+ case TYPES.date: {
199
+ const pointer = ret;
200
+ const value = (new Float64Array(memory.buffer, pointer, 1))[0];
203
201
 
204
- // make fake empty func for repl/etc
205
- return {[func.name]() {}}[func.name];
202
+ return new Date(value);
206
203
  }
207
204
 
208
205
  default: return ret;
package/package.json CHANGED
@@ -1,21 +1,25 @@
1
1
  {
2
2
  "name": "porffor",
3
3
  "description": "a basic experimental wip aot optimizing js -> wasm engine/compiler/runtime in js",
4
- "version": "0.2.0-6aff0fa",
4
+ "version": "0.2.0-75bc012",
5
5
  "author": "CanadaHonk",
6
6
  "license": "MIT",
7
+ "scripts": {
8
+ "precompile": "node ./compiler/precompile.js"
9
+ },
7
10
  "dependencies": {
8
- "acorn": "^8.9.0"
11
+ "acorn": "^8.11.3",
12
+ "node-repl-polyfill": "^0.1.1"
9
13
  },
10
14
  "optionalDependencies": {
11
- "@babel/parser": "^7.23.6",
15
+ "@babel/parser": "^7.24.4",
12
16
  "hermes-parser": "^0.18.2",
13
17
  "meriyah": "^4.3.9"
14
18
  },
15
19
  "bin": {
16
20
  "porf": "./runner/index.js"
17
21
  },
18
- "main": "./runner/index.js",
22
+ "main": "./compiler/wrap.js",
19
23
  "type": "module",
20
24
  "repository": {
21
25
  "type": "git",
@@ -25,4 +29,4 @@
25
29
  "url": "https://github.com/CanadaHonk/porffor/issues"
26
30
  },
27
31
  "homepage": "https://porffor.goose.icu"
28
- }
32
+ }
package/porf CHANGED
@@ -1,2 +1,4 @@
1
1
  #!/bin/sh
2
2
  node runner/index.js "$@"
3
+ # deno run -A runner/index.js "$@"
4
+ # bun runner/index.js "$@"
package/rhemyn/compile.js CHANGED
@@ -1,8 +1,8 @@
1
- import { Blocktype, Opcodes, Valtype, PageSize, ValtypeSize } from '../compiler/wasmSpec.js';
1
+ import { Blocktype, Opcodes, Valtype, ValtypeSize } from '../compiler/wasmSpec.js';
2
2
  import { number } from '../compiler/embedding.js';
3
- import { signedLEB128, unsignedLEB128 } from '../compiler/encoding.js';
4
3
  import parse from './parse.js';
5
4
  import Prefs from '../compiler/prefs.js';
5
+ import { TYPES } from '../compiler/types.js';
6
6
 
7
7
  // local indexes
8
8
  const BasePointer = 0; // base string pointer
@@ -14,7 +14,7 @@ const Length = 5;
14
14
  const Tmp = 6;
15
15
 
16
16
  let exprLastGet = false;
17
- const generate = (node, negated = false, get = true, func = 'test') => {
17
+ const generate = (node, negated = false, get = true, stringSize = 2, func = 'test') => {
18
18
  let out = [];
19
19
  switch (node.type) {
20
20
  case 'Expression':
@@ -42,7 +42,7 @@ const generate = (node, negated = false, get = true, func = 'test') => {
42
42
  // generate checks
43
43
  ...node.body.flatMap((x, i) => {
44
44
  exprLastGet = x.type !== 'Group' && i === (node.body.length - 1);
45
- return generate(x, negated);
45
+ return generate(x, negated, true, stringSize, func);
46
46
  }),
47
47
 
48
48
  // reached end without branching out, successful match
@@ -56,9 +56,9 @@ const generate = (node, negated = false, get = true, func = 'test') => {
56
56
 
57
57
  [ Opcodes.end ],
58
58
 
59
- // increment iter pointer by sizeof i16
59
+ // increment iter pointer by string size
60
60
  [ Opcodes.local_get, IterPointer ],
61
- ...number(ValtypeSize.i16, Valtype.i32),
61
+ ...number(stringSize, Valtype.i32),
62
62
  [ Opcodes.i32_add ],
63
63
  [ Opcodes.local_set, IterPointer ],
64
64
 
@@ -91,34 +91,34 @@ const generate = (node, negated = false, get = true, func = 'test') => {
91
91
  break;
92
92
 
93
93
  case 'Character':
94
- out = generateChar(node, node.negated ^ negated, get);
94
+ out = generateChar(node, node.negated ^ negated, get, stringSize);
95
95
  break;
96
96
 
97
97
  case 'Set':
98
- out = generateSet(node, node.negated, get);
98
+ out = generateSet(node, node.negated, get, stringSize);
99
99
  break;
100
100
 
101
101
  case 'Group':
102
- out = generateGroup(node, negated, get);
102
+ out = generateGroup(node, negated, get, stringSize);
103
103
  break;
104
104
 
105
105
  case 'Range':
106
- out = generateRange(node, negated, get);
106
+ out = generateRange(node, negated, get, stringSize);
107
107
  break;
108
108
  }
109
109
 
110
110
  return out;
111
111
  };
112
112
 
113
- const getNextChar = () => [
113
+ const getNextChar = (stringSize) => [
114
114
  // get char from pointer
115
115
  [ Opcodes.local_get, Pointer ],
116
- [ Opcodes.i32_load16_u, Math.log2(ValtypeSize.i16) - 1, ...unsignedLEB128(0) ],
116
+ [ stringSize == 2 ? Opcodes.i32_load16_u : Opcodes.i32_load8_u, 0, 0 ],
117
117
 
118
118
  ...(exprLastGet ? [] : [
119
- // pointer += sizeof i16
119
+ // pointer += string size
120
120
  [ Opcodes.local_get, Pointer ],
121
- ...number(ValtypeSize.i16, Valtype.i32),
121
+ ...number(stringSize, Valtype.i32),
122
122
  [ Opcodes.i32_add ],
123
123
  [ Opcodes.local_set, Pointer ]
124
124
  ])
@@ -134,21 +134,21 @@ const checkFailure = () => [
134
134
  [ Opcodes.br_if, 0 ]
135
135
  ];
136
136
 
137
- const generateChar = (node, negated, get) => {
137
+ const generateChar = (node, negated, get, stringSize) => {
138
138
  return [
139
- ...(get ? getNextChar() : []),
139
+ ...(get ? getNextChar(stringSize) : []),
140
140
  ...number(node.char.charCodeAt(0), Valtype.i32),
141
141
  negated ? [ Opcodes.i32_eq ] : [ Opcodes.i32_ne ],
142
142
  ...(get ? checkFailure(): [])
143
143
  ];
144
144
  };
145
145
 
146
- const generateSet = (node, negated, get) => {
146
+ const generateSet = (node, negated, get, stringSize) => {
147
147
  // for a single char we do not need a tmp, it is like just
148
148
  const singleChar = node.body.length === 1 && node.body[0].type === 'Character';
149
149
 
150
150
  let out = [
151
- ...(get ? getNextChar() : []),
151
+ ...(get ? getNextChar(stringSize) : []),
152
152
  ...(singleChar ? [] : [ [ Opcodes.local_set, Tmp ] ]),
153
153
  ];
154
154
 
@@ -156,11 +156,11 @@ const generateSet = (node, negated, get) => {
156
156
  out = [
157
157
  ...out,
158
158
  ...(singleChar ? [] : [ [ Opcodes.local_get, Tmp ] ]),
159
- ...generate(x, negated, false)
159
+ ...generate(x, negated, false, stringSize)
160
160
  ];
161
161
  }
162
162
 
163
- out = out.concat(new Array(node.body.length - 1).fill(negated ? [ Opcodes.i32_or ] : [ Opcodes.i32_and ]));
163
+ if (node.body.length > 0) out = out.concat(new Array(node.body.length - 1).fill(negated ? [ Opcodes.i32_or ] : [ Opcodes.i32_and ]));
164
164
 
165
165
  return [
166
166
  ...out,
@@ -168,9 +168,9 @@ const generateSet = (node, negated, get) => {
168
168
  ];
169
169
  };
170
170
 
171
- const generateRange = (node, negated, get) => {
171
+ const generateRange = (node, negated, get, stringSize) => {
172
172
  return [
173
- ...(get ? getNextChar() : []),
173
+ ...(get ? getNextChar(stringSize) : []),
174
174
  ...(get ? [ [ Opcodes.local_tee, Tmp ] ] : []),
175
175
 
176
176
  ...number(node.from.charCodeAt(0), Valtype.i32),
@@ -188,11 +188,29 @@ const generateRange = (node, negated, get) => {
188
188
  };
189
189
 
190
190
  const generateGroup = (node, negated, get) => {
191
+ // todo
192
+ return [];
193
+ };
191
194
 
195
+ const wrapFunc = (regex, func, name, index) => {
196
+ const parsed = parse(regex);
197
+
198
+ return outputFunc([
199
+ [ Opcodes.local_get, 1 ],
200
+ ...number(TYPES.string, Valtype.i32),
201
+ [ Opcodes.i32_eq ],
202
+ [ Opcodes.if, Valtype.i32 ],
203
+ // string
204
+ ...generate(parsed, false, true, 2, func),
205
+ [ Opcodes.else ],
206
+ // bytestring
207
+ ...generate(parsed, false, true, 1, func),
208
+ [ Opcodes.end ]
209
+ ], name, index);
192
210
  };
193
211
 
194
- export const test = (regex, index = 0, name = 'regex_test_' + regex) => outputFunc(generate(parse(regex), false, true, 'test'), name, index);
195
- export const search = (regex, index = 0, name = 'regex_search_' + regex) => outputFunc(generate(parse(regex), false, true, 'search'), name, index);
212
+ export const test = (regex, index = 0, name = 'regex_test_' + regex) => wrapFunc(regex, 'test', name, index);
213
+ export const search = (regex, index = 0, name = 'regex_search_' + regex) => wrapFunc(regex, 'search', name, index);
196
214
 
197
215
  const outputFunc = (wasm, name, index) => ({
198
216
  name,
@@ -200,9 +218,9 @@ const outputFunc = (wasm, name, index) => ({
200
218
  wasm,
201
219
 
202
220
  export: true,
203
- params: [ Valtype.i32 ],
221
+ params: [ Valtype.i32, Valtype.i32 ],
204
222
  returns: [ Valtype.i32 ],
205
- returnType: 0xffffffffffff1, // boolean - todo: do not hardcode this
223
+ returnType: TYPES.boolean,
206
224
  locals: {
207
225
  basePointer: { idx: 0, type: Valtype.i32 },
208
226
  iterPointer: { idx: 1, type: Valtype.i32 },