porffor 0.17.0-b103c8894 → 0.17.0-b598eb7bb
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/compiler/2c.js +28 -11
- package/compiler/builtins/array.ts +1 -1
- package/compiler/builtins.js +11 -0
- package/compiler/codegen.js +161 -25
- package/compiler/generated_builtins.js +30 -30
- package/compiler/pgo.js +9 -1
- package/compiler/wrap.js +6 -4
- package/package.json +1 -1
- package/rhemyn/README.md +7 -4
- package/rhemyn/compile.js +139 -73
- package/runner/debug.js +1 -1
- package/runner/index.js +3 -3
- package/runner/profile.js +1 -1
- package/runner/repl.js +5 -5
package/rhemyn/README.md
CHANGED
@@ -24,13 +24,16 @@ Made for use with Porffor but could possibly be adapted, implementation/library
|
|
24
24
|
- 🟢 digit, not digit (eg `\d\D`)
|
25
25
|
- 🟢 word, not word (eg `\w\W`)
|
26
26
|
- 🟢 whitespace, not whitespace (eg `\s\S`)
|
27
|
-
-
|
28
|
-
-
|
29
|
-
-
|
30
|
-
-
|
27
|
+
- 🟡 quantifiers
|
28
|
+
- 🟡 star (eg `a*`)
|
29
|
+
- 🟡 plus (eg `a+`)
|
30
|
+
- 🟡 optional (eg `a?`)
|
31
31
|
- 🟠 lazy modifier (eg `a*?`)
|
32
32
|
- 🔴 n repetitions (eg `a{4}`)
|
33
33
|
- 🔴 n-m repetitions (eg `a{2,4}`)
|
34
|
+
- 🟠 groups
|
35
|
+
- 🟠 capturing groups (`(a)`)
|
36
|
+
- 🔴 non-capturing groups (`(?:a)`)
|
34
37
|
- 🔴 assertions
|
35
38
|
- 🔴 beginning (eg `^a`)
|
36
39
|
- 🔴 end (eg `a$`)
|
package/rhemyn/compile.js
CHANGED
@@ -6,75 +6,56 @@ import { TYPES } from '../compiler/types.js';
|
|
6
6
|
|
7
7
|
// local indexes
|
8
8
|
const BasePointer = 0; // base string pointer
|
9
|
-
const
|
10
|
-
const
|
11
|
-
const
|
12
|
-
const
|
13
|
-
const
|
14
|
-
|
15
|
-
|
16
|
-
let exprLastGet = false;
|
9
|
+
const Counter = 2; // what char we are running on
|
10
|
+
const Pointer = 3; // next char pointer
|
11
|
+
const Length = 4;
|
12
|
+
const Tmp = 5;
|
13
|
+
const QuantifierTmp = 6; // the temporary variable used for quanitifers
|
14
|
+
|
17
15
|
const generate = (node, negated = false, get = true, stringSize = 2, func = 'test') => {
|
18
16
|
let out = [];
|
19
17
|
switch (node.type) {
|
20
18
|
case 'Expression':
|
21
|
-
exprLastGet = false;
|
22
19
|
out = [
|
23
20
|
// set length local
|
24
21
|
[ Opcodes.local_get, BasePointer ],
|
25
22
|
[ Opcodes.i32_load, Math.log2(ValtypeSize.i32) - 1, 0 ],
|
26
23
|
[ Opcodes.local_set, Length ],
|
27
24
|
|
28
|
-
//
|
25
|
+
// pointer = base + sizeof i32
|
29
26
|
[ Opcodes.local_get, BasePointer ],
|
30
27
|
...number(ValtypeSize.i32, Valtype.i32),
|
31
28
|
[ Opcodes.i32_add ],
|
32
|
-
[ Opcodes.local_set, IterPointer ],
|
33
|
-
|
34
|
-
[ Opcodes.loop, Blocktype.void ],
|
35
|
-
|
36
|
-
// reset pointer as iter pointer
|
37
|
-
[ Opcodes.local_get, IterPointer ],
|
38
29
|
[ Opcodes.local_set, Pointer ],
|
39
30
|
|
40
|
-
[ Opcodes.
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
[ Opcodes.
|
53
|
-
]
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
// increment counter by 1, check if eq length, if not loop
|
66
|
-
[ Opcodes.local_get, Counter ],
|
67
|
-
...number(1, Valtype.i32),
|
68
|
-
[ Opcodes.i32_add ],
|
69
|
-
[ Opcodes.local_tee, Counter ],
|
70
|
-
|
71
|
-
[ Opcodes.local_get, Length ],
|
72
|
-
[ Opcodes.i32_ne ],
|
73
|
-
|
74
|
-
[ Opcodes.br_if, 0 ],
|
31
|
+
[ Opcodes.loop, Blocktype.void ],
|
32
|
+
[ Opcodes.block, Blocktype.void ],
|
33
|
+
// generate checks
|
34
|
+
...node.body.flatMap(x => generate(x, negated, true, stringSize, func)),
|
35
|
+
|
36
|
+
// reached end without branching out, successful match
|
37
|
+
...({
|
38
|
+
test: number(1, Valtype.i32),
|
39
|
+
search: [
|
40
|
+
[ Opcodes.local_get, Counter ]
|
41
|
+
]
|
42
|
+
})[func],
|
43
|
+
[ Opcodes.return ],
|
44
|
+
[ Opcodes.end ],
|
45
|
+
|
46
|
+
// increment counter by 1, check if eq length, if not loop
|
47
|
+
[ Opcodes.local_get, Counter ],
|
48
|
+
...number(1, Valtype.i32),
|
49
|
+
[ Opcodes.i32_add ],
|
50
|
+
[ Opcodes.local_tee, Counter ],
|
51
|
+
|
52
|
+
[ Opcodes.local_get, Length ],
|
53
|
+
[ Opcodes.i32_ne ],
|
54
|
+
|
55
|
+
[ Opcodes.br_if, 0 ],
|
75
56
|
[ Opcodes.end ],
|
76
57
|
|
77
|
-
// no match
|
58
|
+
// no match
|
78
59
|
...number(({
|
79
60
|
test: 0,
|
80
61
|
search: -1
|
@@ -110,12 +91,12 @@ const generate = (node, negated = false, get = true, stringSize = 2, func = 'tes
|
|
110
91
|
return out;
|
111
92
|
};
|
112
93
|
|
113
|
-
const getNextChar = (stringSize) => [
|
94
|
+
const getNextChar = (stringSize, peek = false) => [
|
114
95
|
// get char from pointer
|
115
96
|
[ Opcodes.local_get, Pointer ],
|
116
97
|
[ stringSize == 2 ? Opcodes.i32_load16_u : Opcodes.i32_load8_u, 0, 0 ],
|
117
98
|
|
118
|
-
...(
|
99
|
+
...(peek ? [] : [
|
119
100
|
// pointer += string size
|
120
101
|
[ Opcodes.local_get, Pointer ],
|
121
102
|
...number(stringSize, Valtype.i32),
|
@@ -134,11 +115,81 @@ const checkFailure = () => [
|
|
134
115
|
[ Opcodes.br_if, 0 ]
|
135
116
|
];
|
136
117
|
|
137
|
-
const
|
118
|
+
const wrapQuantifier = (node, method, get, stringSize) => {
|
119
|
+
const [ min, max ] = node.quantifier;
|
138
120
|
return [
|
139
|
-
|
121
|
+
// initalize our temp value (number of matched characters)
|
122
|
+
...number(0, Valtype.i32),
|
123
|
+
[Opcodes.local_set, QuantifierTmp],
|
124
|
+
|
125
|
+
// start loop
|
126
|
+
[Opcodes.loop, Blocktype.void],
|
127
|
+
[ Opcodes.block, Blocktype.void ],
|
128
|
+
// if counter + tmp == length, break
|
129
|
+
[ Opcodes.local_get, Counter ],
|
130
|
+
[ Opcodes.local_get, QuantifierTmp ],
|
131
|
+
[ Opcodes.i32_add ],
|
132
|
+
[ Opcodes.local_get, Length ],
|
133
|
+
[ Opcodes.i32_eq ],
|
134
|
+
[ Opcodes.br_if, 0 ],
|
135
|
+
|
136
|
+
// if doesn't match, break
|
137
|
+
...method,
|
138
|
+
[Opcodes.br_if, 0 ],
|
139
|
+
...(get ? [
|
140
|
+
// pointer += stringSize
|
141
|
+
[ Opcodes.local_get, Pointer ],
|
142
|
+
...number(stringSize, Valtype.i32),
|
143
|
+
[ Opcodes.i32_add ],
|
144
|
+
[ Opcodes.local_set, Pointer ]
|
145
|
+
] : []),
|
146
|
+
|
147
|
+
// if maximum was reached, break
|
148
|
+
...(max ? [
|
149
|
+
[ Opcodes.local_get, QuantifierTmp ],
|
150
|
+
...number(max, Valtype.i32),
|
151
|
+
[ Opcodes.i32_eq ],
|
152
|
+
[ Opcodes.br_if, 0 ]
|
153
|
+
] : []),
|
154
|
+
|
155
|
+
[ Opcodes.local_get, QuantifierTmp ],
|
156
|
+
...number(1, Valtype.i32),
|
157
|
+
[ Opcodes.i32_add ],
|
158
|
+
[ Opcodes.local_set, QuantifierTmp ],
|
159
|
+
[ Opcodes.br, 1 ],
|
160
|
+
[ Opcodes.end ],
|
161
|
+
[ Opcodes.end ],
|
162
|
+
|
163
|
+
// if less than minimum, fail
|
164
|
+
[Opcodes.local_get, QuantifierTmp],
|
165
|
+
...number(min, Valtype.i32),
|
166
|
+
[Opcodes.i32_lt_s],
|
167
|
+
...(get ? checkFailure(): []),
|
168
|
+
|
169
|
+
// counter += tmp - 1
|
170
|
+
[ Opcodes.local_get, QuantifierTmp ],
|
171
|
+
...number(1, Valtype.i32),
|
172
|
+
[ Opcodes.i32_sub ],
|
173
|
+
[ Opcodes.local_get, Counter ],
|
174
|
+
[ Opcodes.i32_add ],
|
175
|
+
[ Opcodes.local_set, Counter ]
|
176
|
+
];
|
177
|
+
}
|
178
|
+
|
179
|
+
const generateChar = (node, negated, get, stringSize) => {
|
180
|
+
const hasQuantifier = !!node.quantifier;
|
181
|
+
const out = [
|
182
|
+
...(get ? getNextChar(stringSize, hasQuantifier) : []),
|
140
183
|
...number(node.char.charCodeAt(0), Valtype.i32),
|
141
184
|
negated ? [ Opcodes.i32_eq ] : [ Opcodes.i32_ne ],
|
185
|
+
];
|
186
|
+
|
187
|
+
if (node.quantifier) {
|
188
|
+
return wrapQuantifier(node, out, get, stringSize);
|
189
|
+
}
|
190
|
+
|
191
|
+
return [
|
192
|
+
...out,
|
142
193
|
...(get ? checkFailure(): [])
|
143
194
|
];
|
144
195
|
};
|
@@ -146,21 +197,31 @@ const generateChar = (node, negated, get, stringSize) => {
|
|
146
197
|
const generateSet = (node, negated, get, stringSize) => {
|
147
198
|
// for a single char we do not need a tmp, it is like just
|
148
199
|
const singleChar = node.body.length === 1 && node.body[0].type === 'Character';
|
200
|
+
if (singleChar) return generateChar(node.body[0], negated, get, stringSize)
|
149
201
|
|
150
|
-
|
151
|
-
|
152
|
-
|
202
|
+
const hasQuantifier = !!node.quantifier;
|
203
|
+
|
204
|
+
const out = [
|
205
|
+
...(get ? getNextChar(stringSize, hasQuantifier) : []),
|
206
|
+
[ Opcodes.local_set, Tmp ],
|
153
207
|
];
|
154
208
|
|
155
209
|
for (const x of node.body) {
|
156
|
-
out
|
157
|
-
|
158
|
-
...(singleChar ? [] : [ [ Opcodes.local_get, Tmp ] ]),
|
210
|
+
out.push(
|
211
|
+
[ Opcodes.local_get, Tmp ],
|
159
212
|
...generate(x, negated, false, stringSize)
|
160
|
-
|
213
|
+
);
|
161
214
|
}
|
162
215
|
|
163
|
-
if (node.body.length > 0)
|
216
|
+
if (node.body.length > 0) {
|
217
|
+
for (let i = 0; i < node.body.length - 1; i++) {
|
218
|
+
out.push(negated ? [ Opcodes.i32_or ] : [ Opcodes.i32_and ])
|
219
|
+
}
|
220
|
+
};
|
221
|
+
|
222
|
+
if (hasQuantifier) {
|
223
|
+
return wrapQuantifier(node, out, get, stringSize);
|
224
|
+
}
|
164
225
|
|
165
226
|
return [
|
166
227
|
...out,
|
@@ -206,28 +267,33 @@ const wrapFunc = (regex, func, name, index) => {
|
|
206
267
|
// bytestring
|
207
268
|
...generate(parsed, false, true, 1, func),
|
208
269
|
[ Opcodes.end ]
|
209
|
-
], name, index);
|
270
|
+
], name, index, types[func]);
|
210
271
|
};
|
211
272
|
|
212
273
|
export const test = (regex, index = 0, name = 'regex_test_' + regex) => wrapFunc(regex, 'test', name, index);
|
213
274
|
export const search = (regex, index = 0, name = 'regex_search_' + regex) => wrapFunc(regex, 'search', name, index);
|
214
275
|
|
215
|
-
const
|
276
|
+
export const types = {
|
277
|
+
test: TYPES.boolean,
|
278
|
+
search: TYPES.number
|
279
|
+
};
|
280
|
+
|
281
|
+
const outputFunc = (wasm, name, index, returnType) => ({
|
216
282
|
name,
|
217
283
|
index,
|
218
284
|
wasm,
|
285
|
+
returnType,
|
219
286
|
|
220
287
|
export: true,
|
221
288
|
params: [ Valtype.i32, Valtype.i32 ],
|
222
289
|
returns: [ Valtype.i32 ],
|
223
|
-
returnType: TYPES.boolean,
|
224
290
|
locals: {
|
225
291
|
basePointer: { idx: 0, type: Valtype.i32 },
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
292
|
+
inputType: { idx: 1, type: Valtype.i32 },
|
293
|
+
counter: { idx: 2, type: Valtype.i32 },
|
294
|
+
pointer: { idx: 3, type: Valtype.i32 },
|
295
|
+
length: { idx: 4, type: Valtype.i32 },
|
296
|
+
tmp: { idx: 5, type: Valtype.i32 },
|
297
|
+
quantifierTmp: { idx: 6, type: Valtype.i32 },
|
232
298
|
}
|
233
299
|
});
|
package/runner/debug.js
CHANGED
@@ -43,7 +43,7 @@ let lastLine;
|
|
43
43
|
let output = '';
|
44
44
|
|
45
45
|
try {
|
46
|
-
const { exports } =
|
46
|
+
const { exports } = compile(source, process.argv.includes('--module') ? [ 'module' ] : [], {
|
47
47
|
y: n => {
|
48
48
|
if (callStarts[callStarts.length - 1] === n - 1) {
|
49
49
|
// end of call
|
package/runner/index.js
CHANGED
@@ -130,14 +130,14 @@ const print = str => {
|
|
130
130
|
let runStart;
|
131
131
|
try {
|
132
132
|
if (process.argv.includes('-b')) {
|
133
|
-
const { wasm, exports } =
|
133
|
+
const { wasm, exports } = compile(source, process.argv.includes('--module') ? [ 'module' ] : [], {}, print);
|
134
134
|
|
135
135
|
runStart = performance.now();
|
136
136
|
if (!process.argv.includes('--no-run')) exports.main();
|
137
137
|
|
138
138
|
console.log(`\n\nwasm size: ${wasm.byteLength} bytes`);
|
139
139
|
} else {
|
140
|
-
const { exports } =
|
140
|
+
const { exports } = compile(source, process.argv.includes('--module') ? [ 'module' ] : [], {}, print);
|
141
141
|
|
142
142
|
runStart = performance.now();
|
143
143
|
if (!process.argv.includes('--no-run')) exports.main();
|
@@ -146,7 +146,7 @@ try {
|
|
146
146
|
} catch (e) {
|
147
147
|
// if (cache) process.stdout.write(cache);
|
148
148
|
let out = e;
|
149
|
-
if (!process.argv.includes('-i') && e.
|
149
|
+
if (!process.argv.includes('-i') && Object.getPrototypeOf(e).message != null) out = `${e.constructor.name}${e.message != null ? `: ${e.message}` : ''}`;
|
150
150
|
console.error(out);
|
151
151
|
}
|
152
152
|
|
package/runner/profile.js
CHANGED
@@ -20,7 +20,7 @@ let spin = 0;
|
|
20
20
|
let last = 0;
|
21
21
|
|
22
22
|
try {
|
23
|
-
const { exports } =
|
23
|
+
const { exports } = compile(source, process.argv.includes('--module') ? [ 'module' ] : [], {
|
24
24
|
y: n => {
|
25
25
|
tmp[n] = performance.now();
|
26
26
|
},
|
package/runner/repl.js
CHANGED
@@ -80,13 +80,13 @@ const memoryToString = mem => {
|
|
80
80
|
};
|
81
81
|
|
82
82
|
let prev = '';
|
83
|
-
const run =
|
83
|
+
const run = (source, _context, _filename, callback, run = true) => {
|
84
84
|
// hack: print "secret" before latest code ran to only enable printing for new code
|
85
85
|
|
86
86
|
let toRun = (prev ? (prev + `;\nprint(-0x1337);\n`) : '') + source.trim();
|
87
87
|
|
88
88
|
let shouldPrint = !prev;
|
89
|
-
const { exports, pages } =
|
89
|
+
const { exports, pages } = compile(toRun, [], {}, str => {
|
90
90
|
if (shouldPrint) process.stdout.write(str);
|
91
91
|
if (str === '-4919') shouldPrint = true;
|
92
92
|
});
|
@@ -127,12 +127,12 @@ replServer.defineCommand('memory', {
|
|
127
127
|
});
|
128
128
|
replServer.defineCommand('asm', {
|
129
129
|
help: 'Log Wasm decompiled bytecode',
|
130
|
-
|
130
|
+
action() {
|
131
131
|
this.clearBufferedCommand();
|
132
132
|
|
133
133
|
try {
|
134
134
|
process.argv.push('--opt-funcs');
|
135
|
-
|
135
|
+
run('', null, null, () => {}, false);
|
136
136
|
process.argv.pop();
|
137
137
|
} catch { }
|
138
138
|
|
@@ -141,7 +141,7 @@ replServer.defineCommand('asm', {
|
|
141
141
|
});
|
142
142
|
replServer.defineCommand('js', {
|
143
143
|
help: 'Log JS being actually ran',
|
144
|
-
|
144
|
+
action() {
|
145
145
|
this.clearBufferedCommand();
|
146
146
|
console.log(prev);
|
147
147
|
this.displayPrompt();
|