porffor 0.2.0-eeb45f8 → 0.2.0-ef043de

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/LICENSE +20 -20
  2. package/README.md +159 -88
  3. package/asur/README.md +2 -0
  4. package/asur/index.js +1262 -0
  5. package/byg/index.js +237 -0
  6. package/compiler/2c.js +317 -72
  7. package/compiler/{sections.js → assemble.js} +63 -15
  8. package/compiler/builtins/annexb_string.js +72 -0
  9. package/compiler/builtins/annexb_string.ts +19 -0
  10. package/compiler/builtins/array.ts +145 -0
  11. package/compiler/builtins/base64.ts +151 -0
  12. package/compiler/builtins/crypto.ts +120 -0
  13. package/compiler/builtins/date.ts +1370 -0
  14. package/compiler/builtins/escape.ts +141 -0
  15. package/compiler/builtins/int.ts +147 -0
  16. package/compiler/builtins/number.ts +527 -0
  17. package/compiler/builtins/porffor.d.ts +42 -0
  18. package/compiler/builtins/string.ts +1055 -0
  19. package/compiler/builtins/tostring.ts +45 -0
  20. package/compiler/builtins.js +470 -269
  21. package/compiler/{codeGen.js → codegen.js} +958 -370
  22. package/compiler/embedding.js +22 -22
  23. package/compiler/encoding.js +108 -10
  24. package/compiler/generated_builtins.js +1262 -0
  25. package/compiler/index.js +36 -34
  26. package/compiler/log.js +6 -3
  27. package/compiler/opt.js +50 -36
  28. package/compiler/parse.js +35 -27
  29. package/compiler/precompile.js +123 -0
  30. package/compiler/prefs.js +26 -0
  31. package/compiler/prototype.js +13 -28
  32. package/compiler/types.js +37 -0
  33. package/compiler/wasmSpec.js +28 -8
  34. package/compiler/wrap.js +51 -46
  35. package/fib.js +7 -0
  36. package/package.json +9 -5
  37. package/porf +4 -0
  38. package/rhemyn/compile.js +5 -3
  39. package/rhemyn/parse.js +323 -320
  40. package/rhemyn/test/parse.js +58 -58
  41. package/runner/compare.js +34 -34
  42. package/runner/debug.js +122 -0
  43. package/runner/index.js +62 -10
  44. package/runner/profiler.js +102 -0
  45. package/runner/repl.js +40 -7
  46. package/runner/sizes.js +37 -37
  47. package/compiler/builtins/base64.js +0 -92
  48. package/runner/info.js +0 -89
  49. package/runner/profile.js +0 -46
  50. package/runner/results.json +0 -1
  51. package/runner/transform.js +0 -15
  52. package/util/enum.js +0 -20
package/rhemyn/parse.js CHANGED
@@ -1,321 +1,324 @@
1
- const State = {
2
- none: 0,
3
- insideSet: 1
4
- };
5
-
6
- const Quantifiers = {
7
- '*': [ 0 ], // 0 -
8
- '+': [ 1 ], // 1 -
9
- '?': [ 0, 1 ], // 0 - 1
10
- };
11
- const QuantifierKeys = Object.keys(Quantifiers);
12
-
13
- const getArg = (name, def) => {
14
- const arg = (typeof process !== 'undefined' ? process.argv : Deno.args).find(x => x.startsWith(`-${name}=`));
15
- if (arg) return arg.split('=')[0];
16
-
17
- return def;
18
- };
19
-
20
- // full is spec-compliant but slower. not needed most of the time. (evil)
21
- const DotChars = () => ({
22
- full: [ '\n', '\r', '\u2028', '\u2029' ],
23
- simple: [ '\n', '\r' ],
24
- fast: [ '\n' ]
25
- })[getArg('regex-dot', 'fast')];
26
-
27
- const WordChars = () => ({
28
- full: [ [ 'a', 'z' ], [ 'A', 'Z' ], [ '0', '9' ], '_' ],
29
- fast: [ [ '_', 'z' ], [ 'A', 'Z' ], [ '0', '9' ] ] // skip individual _ with _-z BUT it also matches '`'
30
- })[getArg('regex-word', 'full')];
31
-
32
- const WhitespaceChars = () => ({
33
- full: [ ' ', '\t', '\n', '\r', '\u2028', '\u2029' ],
34
- simple: [ ' ', '\t', '\n', '\r' ]
35
- })[getArg('regex-ws', 'simple')];
36
-
37
- const _Metachars = () => ({
38
- unescaped: {
39
- '.': [ DotChars(), true ], // dot
40
- },
41
- escaped: {
42
- d: [ [ [ '0', '9' ] ], false ], // digit
43
- D: [ [ [ '0', '9' ] ], true ], // not digit
44
- w: [ WordChars(), false ], // word
45
- W: [ WordChars(), true ], // not word
46
- s: [ WhitespaceChars(), false ], // whitespace
47
- S: [ WhitespaceChars(), true ], // not whitespace
48
- }
49
- });
50
-
51
- const EscapeSequences = {
52
- f: '\f',
53
- n: '\n',
54
- r: '\r',
55
- t: '\t',
56
- v: '\v',
57
- '0': '\0'
58
- };
59
-
60
- const HexDigit = /[0-9a-fA-F]/;
61
-
62
- export default str => {
63
- const Metachars = _Metachars();
64
-
65
- const out = {
66
- type: 'Expression',
67
- body: []
68
- };
69
- let node = out, parents = [];
70
-
71
- let state = State.none, setIndex = 0, escape = false;
72
- for (let i = 0; i < str.length; i++) {
73
- const c = str[i];
74
-
75
- const charNode = char => ({
76
- type: 'Character',
77
- char
78
- });
79
-
80
- const rangeNode = (from, to) => ({
81
- type: 'Range',
82
- from,
83
- to
84
- });
85
-
86
- const addChar = (char = c) => {
87
- node.body.push(charNode(char));
88
- };
89
-
90
- const addSet = (matches, negated = false) => {
91
- let body = matches.map(x => x[1] ? rangeNode(x[0], x[1]) : charNode(x));
92
- if (state === State.insideSet) {
93
- // if negated, mark each node as negated for merge
94
- if (negated) body = body.map(x => {
95
- x.negated = true;
96
- return x;
97
- });
98
-
99
- // already in set, merge bodies
100
- node.body.push(...body);
101
- return;
102
- }
103
-
104
- node.body.push({
105
- type: 'Set',
106
- body,
107
- negated
108
- });
109
- };
110
-
111
- const addMetachar = meta => {
112
- const [ matches, negated = false ] = meta;
113
- return addSet(matches, negated);
114
- };
115
-
116
- // get next char and consume it
117
- const seek = (allowEscaped = true) => {
118
- const cNext = str[++i];
119
-
120
- if (cNext === '\\') return !allowEscaped ? undefined : [ str[++i], true ];
121
- return !allowEscaped ? cNext : [ cNext, false ];
122
- };
123
-
124
- // get next char without consuming
125
- const peek = (allowEscaped = true, offset = 0) => {
126
- const cNext = str[i + 1 + offset];
127
-
128
- if (cNext === '\\') return !allowEscaped ? undefined : [ str[i + 2 + offset], true ];
129
- return !allowEscaped ? cNext : [ cNext, false ];
130
- };
131
-
132
- if (escape) {
133
- escape = false;
134
- if (EscapeSequences[c]) {
135
- addChar(EscapeSequences[c]);
136
- continue;
137
- }
138
-
139
- if (Metachars.escaped[c]) {
140
- addMetachar(Metachars.escaped[c]);
141
- continue;
142
- }
143
-
144
- if (c === 'c') {
145
- // \c (not [A-Za-z] ...) = literal \c... (WHY)
146
- const next = peek(false);
147
- if (next == null || /[^a-zA-Z]/.test(next)) {
148
- addChar('\\');
149
- addChar('c');
150
- continue;
151
- }
152
-
153
- // \c[A-Za-z]
154
- const code = seek(false).charCodeAt(0);
155
- addChar(String.fromCharCode(code % 32));
156
- continue;
157
- }
158
-
159
- if (c === 'x') {
160
- // \x = x
161
- // \xH = xH
162
- // \x[0-9a-zA-Z][0-9a-zA-Z] = \xAB
163
- const next1 = peek(false);
164
- const next2 = peek(false, 1);
165
-
166
- // missing a char or invalid hex digit
167
- if (next1 == null || next2 == null || !HexDigit.test(next1) || !HexDigit.test(next2)) {
168
- addChar('x');
169
- continue;
170
- }
171
-
172
- const code = parseInt(seek(false) + seek(false), 16);
173
- addChar(String.fromCodePoint(code));
174
- continue;
175
- }
176
-
177
- if (c === 'u') {
178
- // '\u' = u
179
- // '\uHHH' = uHHH
180
- // '\uABCD' = \uABCD
181
- const next1 = peek(false);
182
- const next2 = peek(false, 1);
183
- const next3 = peek(false, 2);
184
- const next4 = peek(false, 3);
185
-
186
- // missing a char or invalid hex digit
187
- if (next1 == null || next2 == null || next3 == null || next4 == null || !HexDigit.test(next1) || !HexDigit.test(next2) || !HexDigit.test(next3) || !HexDigit.test(next4)) {
188
- addChar('u');
189
- continue;
190
- }
191
-
192
- const code = parseInt(seek(false) + seek(false) + seek(false) + seek(false), 16);
193
- addChar(String.fromCodePoint(code));
194
- continue;
195
- }
196
-
197
- addChar();
198
- continue;
199
- }
200
-
201
- if (c === '\\') {
202
- escape = true;
203
- continue;
204
- }
205
-
206
- switch (state) {
207
- case State.none:
208
- if (c === '[') {
209
- parents.push(node);
210
- node = {
211
- type: 'Set',
212
- body: [],
213
- negated: false
214
- };
215
-
216
- parents.at(-1).body.push(node);
217
-
218
- state = State.insideSet;
219
- setIndex = 0;
220
- continue;
221
- }
222
-
223
- if (c === '(') {
224
- parents.push(node);
225
- node = {
226
- type: 'Group',
227
- body: []
228
- };
229
-
230
- parents.at(-1).body.push(node);
231
- continue;
232
- }
233
-
234
- if (c === ')') {
235
- if (node.type !== 'Group') throw new SyntaxError('Unmatched closing parenthesis');
236
-
237
- node = parents.pop();
238
- continue;
239
- }
240
-
241
- if (QuantifierKeys.includes(c)) {
242
- node.body.at(-1).quantifier = Quantifiers[c];
243
-
244
- // lazy modifier
245
- if (peek(false) === '?') node.body.at(-1).lazy = true;
246
-
247
- continue;
248
- }
249
-
250
- if (Metachars.unescaped[c]) {
251
- addMetachar(Metachars.unescaped[c]);
252
- continue;
253
- }
254
-
255
- addChar();
256
- break;
257
-
258
- case State.insideSet:
259
- setIndex++;
260
- if (setIndex === 1) {
261
- // first char in set
262
- if (c === '^') {
263
- node.negated = true;
264
- continue;
265
- }
266
- }
267
-
268
- if (c === ']') {
269
- state = State.none;
270
- node = parents.pop();
271
-
272
- continue;
273
- }
274
-
275
- // range
276
- if (c === '-') {
277
- // start of set (or not char), just literal -
278
- if (node.body.at(-1)?.char == null) {
279
- addChar(); // add -
280
- continue;
281
- }
282
-
283
- const from = node.body.pop().char;
284
- const [ to, escaped ] = seek();
285
-
286
- // end of set, just literal -
287
- if (to == null || (!escaped && to === ']')) {
288
- addChar(from); // add from char back
289
- i--; // rollback seek
290
-
291
- addChar(); // add -
292
- continue;
293
- }
294
-
295
- // next char was escaped and a metachar, just literal -
296
- if (escaped && Metachars.escaped[to] != null) {
297
- i -= 2; // rollback seek
298
-
299
- addChar(); // add -
300
- continue;
301
- }
302
-
303
- if (to < from) throw new SyntaxError('Range out of order');
304
-
305
- node.body.push(rangeNode(from, to));
306
- continue;
307
- }
308
-
309
- addChar();
310
- break;
311
- }
312
- }
313
-
314
- // still in a group by the end
315
- if (node.type !== 'Expression') throw new SyntaxError('Unmatched opening parenthesis');
316
-
317
- // still in a set by the end
318
- if (state === State.insideSet) throw new SyntaxError('Unmatched opening square bracket');
319
-
320
- return out;
1
+ const State = {
2
+ none: 0,
3
+ insideSet: 1
4
+ };
5
+
6
+ const Quantifiers = {
7
+ '*': [ 0 ], // 0 -
8
+ '+': [ 1 ], // 1 -
9
+ '?': [ 0, 1 ], // 0 - 1
10
+ };
11
+ const QuantifierKeys = Object.keys(Quantifiers);
12
+
13
+ const getArg = (name, def) => {
14
+ const arg = (typeof process !== 'undefined' ? process.argv : Deno.args).find(x => x.startsWith(`-${name}=`));
15
+ if (arg) return arg.split('=')[0];
16
+
17
+ return def;
18
+ };
19
+
20
+ // full is spec-compliant but slower. not needed most of the time. (evil)
21
+ const DotChars = () => ({
22
+ full: [ '\n', '\r', '\u2028', '\u2029' ],
23
+ simple: [ '\n', '\r' ],
24
+ fast: [ '\n' ]
25
+ })[getArg('regex-dot', 'fast')];
26
+
27
+ const WordChars = () => ({
28
+ full: [ [ 'a', 'z' ], [ 'A', 'Z' ], [ '0', '9' ], '_' ],
29
+ fast: [ [ '_', 'z' ], [ 'A', 'Z' ], [ '0', '9' ] ] // skip individual _ with _-z BUT it also matches '`'
30
+ })[getArg('regex-word', 'full')];
31
+
32
+ const WhitespaceChars = () => ({
33
+ full: [ ' ', '\t', '\n', '\r', '\u2028', '\u2029' ],
34
+ simple: [ ' ', '\t', '\n', '\r' ]
35
+ })[getArg('regex-ws', 'simple')];
36
+
37
+ const _Metachars = () => ({
38
+ unescaped: {
39
+ '.': [ DotChars(), true ], // dot
40
+ },
41
+ escaped: {
42
+ d: [ [ [ '0', '9' ] ], false ], // digit
43
+ D: [ [ [ '0', '9' ] ], true ], // not digit
44
+ w: [ WordChars(), false ], // word
45
+ W: [ WordChars(), true ], // not word
46
+ s: [ WhitespaceChars(), false ], // whitespace
47
+ S: [ WhitespaceChars(), true ], // not whitespace
48
+ }
49
+ });
50
+
51
+ const EscapeSequences = {
52
+ f: '\f',
53
+ n: '\n',
54
+ r: '\r',
55
+ t: '\t',
56
+ v: '\v',
57
+ '0': '\0'
58
+ };
59
+
60
+ const HexDigit = /[0-9a-fA-F]/;
61
+
62
+ export default str => {
63
+ const Metachars = _Metachars();
64
+
65
+ const out = {
66
+ type: 'Expression',
67
+ body: []
68
+ };
69
+ let node = out, parents = [];
70
+
71
+ let state = State.none, setIndex = 0, escape = false;
72
+ for (let i = 0; i < str.length; i++) {
73
+ const c = str[i];
74
+
75
+ const charNode = char => ({
76
+ type: 'Character',
77
+ char
78
+ });
79
+
80
+ const rangeNode = (from, to) => ({
81
+ type: 'Range',
82
+ from,
83
+ to
84
+ });
85
+
86
+ const addChar = (char = c) => {
87
+ node.body.push(charNode(char));
88
+ };
89
+
90
+ const addSet = (matches, negated = false) => {
91
+ let body = matches.map(x => x[1] ? rangeNode(x[0], x[1]) : charNode(x));
92
+ if (state === State.insideSet) {
93
+ // if negated, mark each node as negated for merge
94
+ if (negated) body = body.map(x => {
95
+ x.negated = true;
96
+ return x;
97
+ });
98
+
99
+ // already in set, merge bodies
100
+ node.body.push(...body);
101
+ return;
102
+ }
103
+
104
+ node.body.push({
105
+ type: 'Set',
106
+ body,
107
+ negated
108
+ });
109
+ };
110
+
111
+ const addMetachar = meta => {
112
+ const [ matches, negated = false ] = meta;
113
+ return addSet(matches, negated);
114
+ };
115
+
116
+ // get next char and consume it
117
+ const seek = (allowEscaped = true) => {
118
+ const cNext = str[++i];
119
+
120
+ if (cNext === '\\') return !allowEscaped ? undefined : [ str[++i], true ];
121
+ return !allowEscaped ? cNext : [ cNext, false ];
122
+ };
123
+
124
+ // get next char without consuming
125
+ const peek = (allowEscaped = true, offset = 0) => {
126
+ const cNext = str[i + 1 + offset];
127
+
128
+ if (cNext === '\\') return !allowEscaped ? undefined : [ str[i + 2 + offset], true ];
129
+ return !allowEscaped ? cNext : [ cNext, false ];
130
+ };
131
+
132
+ if (escape) {
133
+ escape = false;
134
+ if (EscapeSequences[c]) {
135
+ addChar(EscapeSequences[c]);
136
+ continue;
137
+ }
138
+
139
+ if (Metachars.escaped[c]) {
140
+ addMetachar(Metachars.escaped[c]);
141
+ continue;
142
+ }
143
+
144
+ if (c === 'c') {
145
+ // \c (not [A-Za-z] ...) = literal \c... (WHY)
146
+ const next = peek(false);
147
+ if (next == null || /[^a-zA-Z]/.test(next)) {
148
+ addChar('\\');
149
+ addChar('c');
150
+ continue;
151
+ }
152
+
153
+ // \c[A-Za-z]
154
+ const code = seek(false).charCodeAt(0);
155
+ addChar(String.fromCharCode(code % 32));
156
+ continue;
157
+ }
158
+
159
+ if (c === 'x') {
160
+ // \x = x
161
+ // \xH = xH
162
+ // \x[0-9a-zA-Z][0-9a-zA-Z] = \xAB
163
+ const next1 = peek(false);
164
+ const next2 = peek(false, 1);
165
+
166
+ // missing a char or invalid hex digit
167
+ if (next1 == null || next2 == null || !HexDigit.test(next1) || !HexDigit.test(next2)) {
168
+ addChar('x');
169
+ continue;
170
+ }
171
+
172
+ const code = parseInt(seek(false) + seek(false), 16);
173
+ addChar(String.fromCodePoint(code));
174
+ continue;
175
+ }
176
+
177
+ if (c === 'u') {
178
+ // '\u' = u
179
+ // '\uHHH' = uHHH
180
+ // '\uABCD' = \uABCD
181
+ const next1 = peek(false);
182
+ const next2 = peek(false, 1);
183
+ const next3 = peek(false, 2);
184
+ const next4 = peek(false, 3);
185
+
186
+ // missing a char or invalid hex digit
187
+ if (next1 == null || next2 == null || next3 == null || next4 == null || !HexDigit.test(next1) || !HexDigit.test(next2) || !HexDigit.test(next3) || !HexDigit.test(next4)) {
188
+ addChar('u');
189
+ continue;
190
+ }
191
+
192
+ const code = parseInt(seek(false) + seek(false) + seek(false) + seek(false), 16);
193
+ addChar(String.fromCodePoint(code));
194
+ continue;
195
+ }
196
+
197
+ addChar();
198
+ continue;
199
+ }
200
+
201
+ if (c === '\\') {
202
+ escape = true;
203
+ continue;
204
+ }
205
+
206
+ switch (state) {
207
+ case State.none:
208
+ if (c === '[') {
209
+ parents.push(node);
210
+ node = {
211
+ type: 'Set',
212
+ body: [],
213
+ negated: false
214
+ };
215
+
216
+ parents.at(-1).body.push(node);
217
+
218
+ state = State.insideSet;
219
+ setIndex = 0;
220
+ continue;
221
+ }
222
+
223
+ if (c === '(') {
224
+ parents.push(node);
225
+ node = {
226
+ type: 'Group',
227
+ body: []
228
+ };
229
+
230
+ parents.at(-1).body.push(node);
231
+ continue;
232
+ }
233
+
234
+ if (c === ')') {
235
+ if (node.type !== 'Group') throw new SyntaxError('Unmatched closing parenthesis');
236
+
237
+ node = parents.pop();
238
+ continue;
239
+ }
240
+
241
+ if (QuantifierKeys.includes(c)) {
242
+ const last = node.body.at(-1);
243
+ if (!last) continue; // ignore, maybe lookahead
244
+
245
+ last.quantifier = Quantifiers[c];
246
+
247
+ // lazy modifier
248
+ if (peek(false) === '?') last.lazy = true;
249
+
250
+ continue;
251
+ }
252
+
253
+ if (Metachars.unescaped[c]) {
254
+ addMetachar(Metachars.unescaped[c]);
255
+ continue;
256
+ }
257
+
258
+ addChar();
259
+ break;
260
+
261
+ case State.insideSet:
262
+ setIndex++;
263
+ if (setIndex === 1) {
264
+ // first char in set
265
+ if (c === '^') {
266
+ node.negated = true;
267
+ continue;
268
+ }
269
+ }
270
+
271
+ if (c === ']') {
272
+ state = State.none;
273
+ node = parents.pop();
274
+
275
+ continue;
276
+ }
277
+
278
+ // range
279
+ if (c === '-') {
280
+ // start of set (or not char), just literal -
281
+ if (node.body.at(-1)?.char == null) {
282
+ addChar(); // add -
283
+ continue;
284
+ }
285
+
286
+ const from = node.body.pop().char;
287
+ const [ to, escaped ] = seek();
288
+
289
+ // end of set, just literal -
290
+ if (to == null || (!escaped && to === ']')) {
291
+ addChar(from); // add from char back
292
+ i--; // rollback seek
293
+
294
+ addChar(); // add -
295
+ continue;
296
+ }
297
+
298
+ // next char was escaped and a metachar, just literal -
299
+ if (escaped && Metachars.escaped[to] != null) {
300
+ i -= 2; // rollback seek
301
+
302
+ addChar(); // add -
303
+ continue;
304
+ }
305
+
306
+ if (to < from) throw new SyntaxError('Range out of order');
307
+
308
+ node.body.push(rangeNode(from, to));
309
+ continue;
310
+ }
311
+
312
+ addChar();
313
+ break;
314
+ }
315
+ }
316
+
317
+ // still in a group by the end
318
+ if (node.type !== 'Expression') throw new SyntaxError('Unmatched opening parenthesis');
319
+
320
+ // still in a set by the end
321
+ if (state === State.insideSet) throw new SyntaxError('Unmatched opening square bracket');
322
+
323
+ return out;
321
324
  };