style-script 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +22 -0
- data/bin/style +5 -0
- data/examples/blocks.style +57 -0
- data/examples/code.style +173 -0
- data/examples/hello.style +1 -0
- data/examples/poignant.style +186 -0
- data/examples/potion.style +205 -0
- data/examples/underscore.style +603 -0
- data/lib/style-script.rb +21 -0
- data/lib/style_script/command_line.rb +235 -0
- data/lib/style_script/grammar.y +491 -0
- data/lib/style_script/lexer.js +363 -0
- data/lib/style_script/lexer.rb +272 -0
- data/lib/style_script/nodes.js +756 -0
- data/lib/style_script/nodes.rb +1079 -0
- data/lib/style_script/parse_error.rb +29 -0
- data/lib/style_script/parser.js +544 -0
- data/lib/style_script/parser.rb +2716 -0
- data/lib/style_script/repl.js +33 -0
- data/lib/style_script/rewriter.js +377 -0
- data/lib/style_script/rewriter.rb +289 -0
- data/lib/style_script/runner.js +11 -0
- data/lib/style_script/scope.js +129 -0
- data/lib/style_script/scope.rb +95 -0
- data/lib/style_script/std/style-script.js +96 -0
- data/lib/style_script/style-script.js +50 -0
- data/lib/style_script/value.rb +64 -0
- data/package.json +8 -0
- data/style-script.gemspec +21 -0
- metadata +93 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
(function(){
|
2
|
+
var style, prompt, quit, readline, run;
|
3
|
+
// A StyleScript port/version of the Node.js REPL.
|
4
|
+
// Required modules.
|
5
|
+
style = require('./style-script');
|
6
|
+
process.mixin(require('sys'));
|
7
|
+
// Shortcut variables.
|
8
|
+
prompt = 'style> ';
|
9
|
+
quit = function quit() {
|
10
|
+
return process.stdio.close();
|
11
|
+
};
|
12
|
+
// The main REPL function. Called everytime a line of code is entered.
|
13
|
+
readline = function readline(code) {
|
14
|
+
return style.compile(code, run);
|
15
|
+
};
|
16
|
+
// Attempt to evaluate the command. If there's an exception, print it.
|
17
|
+
run = function run(js) {
|
18
|
+
var val;
|
19
|
+
try {
|
20
|
+
val = eval(js);
|
21
|
+
if (val !== undefined) {
|
22
|
+
p(val);
|
23
|
+
}
|
24
|
+
} catch (err) {
|
25
|
+
puts(err.stack || err.toString());
|
26
|
+
}
|
27
|
+
return print(prompt);
|
28
|
+
};
|
29
|
+
// Start up the REPL.
|
30
|
+
process.stdio.open();
|
31
|
+
process.stdio.addListener('data', readline);
|
32
|
+
print(prompt);
|
33
|
+
})();
|
@@ -0,0 +1,377 @@
|
|
1
|
+
(function(){
|
2
|
+
var BALANCED_PAIRS, EXPRESSION_CLOSE, EXPRESSION_START, EXPRESSION_TAIL, IMPLICIT_CALL, IMPLICIT_END, IMPLICIT_FUNC, INVERSES, SINGLE_CLOSERS, SINGLE_LINERS, __a, __b, __c, __d, __e, __f, __g, __h, pair, re;
|
3
|
+
var __hasProp = Object.prototype.hasOwnProperty;
|
4
|
+
// In order to keep the grammar simple, the stream of tokens that the Lexer
|
5
|
+
// emits is rewritten by the Rewriter, smoothing out ambiguities, mis-nested
|
6
|
+
// indentation, and single-line flavors of expressions.
|
7
|
+
exports.Rewriter = (re = function re() { });
|
8
|
+
// Tokens that must be balanced.
|
9
|
+
BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], ['INDENT', 'OUTDENT'], ['PARAM_START', 'PARAM_END'], ['CALL_START', 'CALL_END'], ['INDEX_START', 'INDEX_END']];
|
10
|
+
// Tokens that signal the start of a balanced pair.
|
11
|
+
EXPRESSION_START = (function() {
|
12
|
+
__a = []; __b = BALANCED_PAIRS;
|
13
|
+
for (__c = 0; __c < __b.length; __c++) {
|
14
|
+
pair = __b[__c];
|
15
|
+
__a.push(pair[0]);
|
16
|
+
}
|
17
|
+
return __a;
|
18
|
+
}).call(this);
|
19
|
+
// Tokens that signal the end of a balanced pair.
|
20
|
+
EXPRESSION_TAIL = (function() {
|
21
|
+
__d = []; __e = BALANCED_PAIRS;
|
22
|
+
for (__f = 0; __f < __e.length; __f++) {
|
23
|
+
pair = __e[__f];
|
24
|
+
__d.push(pair[1]);
|
25
|
+
}
|
26
|
+
return __d;
|
27
|
+
}).call(this);
|
28
|
+
// Tokens that indicate the close of a clause of an expression.
|
29
|
+
EXPRESSION_CLOSE = ['CATCH', 'WHEN', 'ELSE', 'FINALLY'].concat(EXPRESSION_TAIL);
|
30
|
+
// Tokens pairs that, in immediate succession, indicate an implicit call.
|
31
|
+
IMPLICIT_FUNC = ['IDENTIFIER', 'SUPER', ')', 'CALL_END', ']', 'INDEX_END'];
|
32
|
+
IMPLICIT_END = ['IF', 'UNLESS', 'FOR', 'WHILE', 'TERMINATOR', 'OUTDENT'];
|
33
|
+
IMPLICIT_CALL = ['IDENTIFIER', 'NUMBER', 'STRING', 'JS', 'REGEX', 'NEW', 'PARAM_START', 'TRY', 'DELETE', 'TYPEOF', 'SWITCH', 'ARGUMENTS', 'TRUE', 'FALSE', 'YES', 'NO', 'ON', 'OFF', '!', '!!', 'NOT', '->', '=>', '[', '(', '{'];
|
34
|
+
// The inverse mappings of token pairs we're trying to fix up.
|
35
|
+
INVERSES = {
|
36
|
+
};
|
37
|
+
__g = BALANCED_PAIRS;
|
38
|
+
for (__h = 0; __h < __g.length; __h++) {
|
39
|
+
pair = __g[__h];
|
40
|
+
INVERSES[pair[0]] = pair[1];
|
41
|
+
INVERSES[pair[1]] = pair[0];
|
42
|
+
}
|
43
|
+
// Single-line flavors of block expressions that have unclosed endings.
|
44
|
+
// The grammar can't disambiguate them, so we insert the implicit indentation.
|
45
|
+
SINGLE_LINERS = ['ELSE', "->", "=>", 'TRY', 'FINALLY', 'THEN'];
|
46
|
+
SINGLE_CLOSERS = ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN', 'PARAM_START'];
|
47
|
+
// Rewrite the token stream in multiple passes, one logical filter at
|
48
|
+
// a time. This could certainly be changed into a single pass through the
|
49
|
+
// stream, with a big ol' efficient switch, but it's much nicer like this.
|
50
|
+
re.prototype.rewrite = function rewrite(tokens) {
|
51
|
+
this.tokens = tokens;
|
52
|
+
this.adjust_comments();
|
53
|
+
this.remove_leading_newlines();
|
54
|
+
this.remove_mid_expression_newlines();
|
55
|
+
this.move_commas_outside_outdents();
|
56
|
+
this.close_open_calls_and_indexes();
|
57
|
+
this.add_implicit_parentheses();
|
58
|
+
this.add_implicit_indentation();
|
59
|
+
this.ensure_balance(BALANCED_PAIRS);
|
60
|
+
this.rewrite_closing_parens();
|
61
|
+
return this.tokens;
|
62
|
+
};
|
63
|
+
// Rewrite the token stream, looking one token ahead and behind.
|
64
|
+
// Allow the return value of the block to tell us how many tokens to move
|
65
|
+
// forwards (or backwards) in the stream, to make sure we don't miss anything
|
66
|
+
// as the stream changes length under our feet.
|
67
|
+
re.prototype.scan_tokens = function scan_tokens(block) {
|
68
|
+
var i, move;
|
69
|
+
i = 0;
|
70
|
+
while (true) {
|
71
|
+
if (!(this.tokens[i])) {
|
72
|
+
break;
|
73
|
+
}
|
74
|
+
move = block(this.tokens[i - 1], this.tokens[i], this.tokens[i + 1], i);
|
75
|
+
i += move;
|
76
|
+
}
|
77
|
+
return true;
|
78
|
+
};
|
79
|
+
// Massage newlines and indentations so that comments don't have to be
|
80
|
+
// correctly indented, or appear on their own line.
|
81
|
+
re.prototype.adjust_comments = function adjust_comments() {
|
82
|
+
return this.scan_tokens((function(__this) {
|
83
|
+
var __func = function(prev, token, post, i) {
|
84
|
+
var after, before;
|
85
|
+
if (!(token[0] === 'COMMENT')) {
|
86
|
+
return 1;
|
87
|
+
}
|
88
|
+
before = this.tokens[i - 2];
|
89
|
+
after = this.tokens[i + 2];
|
90
|
+
if (before && after && ((before[0] === 'INDENT' && after[0] === 'OUTDENT') || (before[0] === 'OUTDENT' && after[0] === 'INDENT')) && before[1] === after[1]) {
|
91
|
+
this.tokens.splice(i + 2, 1);
|
92
|
+
this.tokens.splice(i - 2, 1);
|
93
|
+
return 0;
|
94
|
+
} else if (prev && prev[0] === 'TERMINATOR' && after[0] === 'INDENT') {
|
95
|
+
this.tokens.splice(i + 2, 1);
|
96
|
+
this.tokens[i - 1] = after;
|
97
|
+
return 1;
|
98
|
+
} else if (prev && prev[0] !== 'TERMINATOR' && prev[0] !== 'INDENT' && prev[0] !== 'OUTDENT') {
|
99
|
+
this.tokens.splice(i, 0, ['TERMINATOR', "\n", prev[2]]);
|
100
|
+
return 2;
|
101
|
+
} else {
|
102
|
+
return 1;
|
103
|
+
}
|
104
|
+
};
|
105
|
+
return (function() {
|
106
|
+
return __func.apply(__this, arguments);
|
107
|
+
});
|
108
|
+
})(this));
|
109
|
+
};
|
110
|
+
// Leading newlines would introduce an ambiguity in the grammar, so we
|
111
|
+
// dispatch them here.
|
112
|
+
re.prototype.remove_leading_newlines = function remove_leading_newlines() {
|
113
|
+
if (this.tokens[0][0] === 'TERMINATOR') {
|
114
|
+
return this.tokens.shift();
|
115
|
+
}
|
116
|
+
};
|
117
|
+
// Some blocks occur in the middle of expressions -- when we're expecting
|
118
|
+
// this, remove their trailing newlines.
|
119
|
+
re.prototype.remove_mid_expression_newlines = function remove_mid_expression_newlines() {
|
120
|
+
return this.scan_tokens((function(__this) {
|
121
|
+
var __func = function(prev, token, post, i) {
|
122
|
+
if (!(post && EXPRESSION_CLOSE.indexOf(post[0]) >= 0 && token[0] === 'TERMINATOR')) {
|
123
|
+
return 1;
|
124
|
+
}
|
125
|
+
this.tokens.splice(i, 1);
|
126
|
+
return 0;
|
127
|
+
};
|
128
|
+
return (function() {
|
129
|
+
return __func.apply(__this, arguments);
|
130
|
+
});
|
131
|
+
})(this));
|
132
|
+
};
|
133
|
+
// Make sure that we don't accidentally break trailing commas, which need
|
134
|
+
// to go on the outside of expression closers.
|
135
|
+
re.prototype.move_commas_outside_outdents = function move_commas_outside_outdents() {
|
136
|
+
return this.scan_tokens((function(__this) {
|
137
|
+
var __func = function(prev, token, post, i) {
|
138
|
+
if (token[0] === 'OUTDENT' && prev[0] === ',') {
|
139
|
+
this.tokens.splice(i, 1, token);
|
140
|
+
}
|
141
|
+
return 1;
|
142
|
+
};
|
143
|
+
return (function() {
|
144
|
+
return __func.apply(__this, arguments);
|
145
|
+
});
|
146
|
+
})(this));
|
147
|
+
};
|
148
|
+
// We've tagged the opening parenthesis of a method call, and the opening
|
149
|
+
// bracket of an indexing operation. Match them with their close.
|
150
|
+
re.prototype.close_open_calls_and_indexes = function close_open_calls_and_indexes() {
|
151
|
+
var brackets, parens;
|
152
|
+
parens = [0];
|
153
|
+
brackets = [0];
|
154
|
+
return this.scan_tokens((function(__this) {
|
155
|
+
var __func = function(prev, token, post, i) {
|
156
|
+
if (token[0] === 'CALL_START') {
|
157
|
+
parens.push(0);
|
158
|
+
} else if (token[0] === 'INDEX_START') {
|
159
|
+
brackets.push(0);
|
160
|
+
} else if (token[0] === '(') {
|
161
|
+
parens[parens.length - 1] += 1;
|
162
|
+
} else if (token[0] === '[') {
|
163
|
+
brackets[brackets.length - 1] += 1;
|
164
|
+
} else if (token[0] === ')') {
|
165
|
+
if (parens[parens.length - 1] === 0) {
|
166
|
+
parens.pop();
|
167
|
+
token[0] = 'CALL_END';
|
168
|
+
} else {
|
169
|
+
parens[parens.length - 1] -= 1;
|
170
|
+
}
|
171
|
+
} else if (token[0] === ']') {
|
172
|
+
if (brackets[brackets.length - 1] === 0) {
|
173
|
+
brackets.pop();
|
174
|
+
token[0] = 'INDEX_END';
|
175
|
+
} else {
|
176
|
+
brackets[brackets.length - 1] -= 1;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
return 1;
|
180
|
+
};
|
181
|
+
return (function() {
|
182
|
+
return __func.apply(__this, arguments);
|
183
|
+
});
|
184
|
+
})(this));
|
185
|
+
};
|
186
|
+
// Methods may be optionally called without parentheses, for simple cases.
|
187
|
+
// Insert the implicit parentheses here, so that the parser doesn't have to
|
188
|
+
// deal with them.
|
189
|
+
re.prototype.add_implicit_parentheses = function add_implicit_parentheses() {
|
190
|
+
var stack;
|
191
|
+
stack = [0];
|
192
|
+
return this.scan_tokens((function(__this) {
|
193
|
+
var __func = function(prev, token, post, i) {
|
194
|
+
var __i, __j, __k, __l, idx, last, size, tmp;
|
195
|
+
if (token[0] === 'INDENT') {
|
196
|
+
stack.push(0);
|
197
|
+
}
|
198
|
+
if (token[0] === 'OUTDENT') {
|
199
|
+
last = stack.pop();
|
200
|
+
stack[stack.length - 1] += last;
|
201
|
+
}
|
202
|
+
if (stack[stack.length - 1] > 0 && (IMPLICIT_END.indexOf(token[0]) >= 0 || !(typeof post !== "undefined" && post !== null))) {
|
203
|
+
idx = token[0] === 'OUTDENT' ? i + 1 : i;
|
204
|
+
__k = 0; __l = stack[stack.length - 1];
|
205
|
+
for (__j=0, tmp=__k; (__k <= __l ? tmp < __l : tmp > __l); (__k <= __l ? tmp += 1 : tmp -= 1), __j++) {
|
206
|
+
this.tokens.splice(idx, 0, ['CALL_END', ')']);
|
207
|
+
}
|
208
|
+
size = stack[stack.length - 1] + 1;
|
209
|
+
stack[stack.length - 1] = 0;
|
210
|
+
return size;
|
211
|
+
}
|
212
|
+
if (!(prev && IMPLICIT_FUNC.indexOf(prev[0]) >= 0 && IMPLICIT_CALL.indexOf(token[0]) >= 0)) {
|
213
|
+
return 1;
|
214
|
+
}
|
215
|
+
this.tokens.splice(i, 0, ['CALL_START', '(']);
|
216
|
+
stack[stack.length - 1] += 1;
|
217
|
+
return 2;
|
218
|
+
};
|
219
|
+
return (function() {
|
220
|
+
return __func.apply(__this, arguments);
|
221
|
+
});
|
222
|
+
})(this));
|
223
|
+
};
|
224
|
+
// Because our grammar is LALR(1), it can't handle some single-line
|
225
|
+
// expressions that lack ending delimiters. Use the lexer to add the implicit
|
226
|
+
// blocks, so it doesn't need to.
|
227
|
+
// ')' can close a single-line block, but we need to make sure it's balanced.
|
228
|
+
re.prototype.add_implicit_indentation = function add_implicit_indentation() {
|
229
|
+
return this.scan_tokens((function(__this) {
|
230
|
+
var __func = function(prev, token, post, i) {
|
231
|
+
var idx, insertion, parens, starter, tok;
|
232
|
+
if (!(SINGLE_LINERS.indexOf(token[0]) >= 0 && post[0] !== 'INDENT' && !(token[0] === 'ELSE' && post[0] === 'IF'))) {
|
233
|
+
return 1;
|
234
|
+
}
|
235
|
+
starter = token[0];
|
236
|
+
this.tokens.splice(i + 1, 0, ['INDENT', 2]);
|
237
|
+
idx = i + 1;
|
238
|
+
parens = 0;
|
239
|
+
while (true) {
|
240
|
+
idx += 1;
|
241
|
+
tok = this.tokens[idx];
|
242
|
+
if ((!tok || SINGLE_CLOSERS.indexOf(tok[0]) >= 0 || (tok[0] === ')' && parens === 0)) && !(starter === 'ELSE' && tok[0] === 'ELSE')) {
|
243
|
+
insertion = this.tokens[idx - 1][0] === "," ? idx - 1 : idx;
|
244
|
+
this.tokens.splice(insertion, 0, ['OUTDENT', 2]);
|
245
|
+
break;
|
246
|
+
}
|
247
|
+
if (tok[0] === '(') {
|
248
|
+
parens += 1;
|
249
|
+
}
|
250
|
+
if (tok[0] === ')') {
|
251
|
+
parens -= 1;
|
252
|
+
}
|
253
|
+
}
|
254
|
+
if (!(token[0] === 'THEN')) {
|
255
|
+
return 1;
|
256
|
+
}
|
257
|
+
this.tokens.splice(i, 1);
|
258
|
+
return 0;
|
259
|
+
};
|
260
|
+
return (function() {
|
261
|
+
return __func.apply(__this, arguments);
|
262
|
+
});
|
263
|
+
})(this));
|
264
|
+
};
|
265
|
+
// Ensure that all listed pairs of tokens are correctly balanced throughout
|
266
|
+
// the course of the token stream.
|
267
|
+
re.prototype.ensure_balance = function ensure_balance(pairs) {
|
268
|
+
var __i, __j, key, levels, unclosed, value;
|
269
|
+
levels = {
|
270
|
+
};
|
271
|
+
this.scan_tokens((function(__this) {
|
272
|
+
var __func = function(prev, token, post, i) {
|
273
|
+
var __i, __j, __k, close, open;
|
274
|
+
__i = pairs;
|
275
|
+
for (__j = 0; __j < __i.length; __j++) {
|
276
|
+
pair = __i[__j];
|
277
|
+
__k = pair;
|
278
|
+
open = __k[0];
|
279
|
+
close = __k[1];
|
280
|
+
levels[open] = levels[open] || 0;
|
281
|
+
if (token[0] === open) {
|
282
|
+
levels[open] += 1;
|
283
|
+
}
|
284
|
+
if (token[0] === close) {
|
285
|
+
levels[open] -= 1;
|
286
|
+
}
|
287
|
+
if (levels[open] < 0) {
|
288
|
+
throw "too many " + token[1];
|
289
|
+
}
|
290
|
+
}
|
291
|
+
return 1;
|
292
|
+
};
|
293
|
+
return (function() {
|
294
|
+
return __func.apply(__this, arguments);
|
295
|
+
});
|
296
|
+
})(this));
|
297
|
+
unclosed = (function() {
|
298
|
+
__i = []; __j = levels;
|
299
|
+
for (key in __j) {
|
300
|
+
value = __j[key];
|
301
|
+
if (__hasProp.call(__j, key)) {
|
302
|
+
if (value > 0) {
|
303
|
+
__i.push(key);
|
304
|
+
}
|
305
|
+
}
|
306
|
+
}
|
307
|
+
return __i;
|
308
|
+
}).call(this);
|
309
|
+
if (unclosed.length) {
|
310
|
+
throw "unclosed " + unclosed[0];
|
311
|
+
}
|
312
|
+
};
|
313
|
+
// We'd like to support syntax like this:
|
314
|
+
// el.click((event) ->
|
315
|
+
// el.hide())
|
316
|
+
// In order to accomplish this, move outdents that follow closing parens
|
317
|
+
// inwards, safely. The steps to accomplish this are:
|
318
|
+
//
|
319
|
+
// 1. Check that all paired tokens are balanced and in order.
|
320
|
+
// 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it
|
321
|
+
// to the stack. If you see an ')' or OUTDENT, pop the stack and replace
|
322
|
+
// it with the inverse of what we've just popped.
|
323
|
+
// 3. Keep track of "debt" for tokens that we fake, to make sure we end
|
324
|
+
// up balanced in the end.
|
325
|
+
re.prototype.rewrite_closing_parens = function rewrite_closing_parens() {
|
326
|
+
var __i, debt, key, stack, val;
|
327
|
+
stack = [];
|
328
|
+
debt = {
|
329
|
+
};
|
330
|
+
__i = INVERSES;
|
331
|
+
for (key in __i) {
|
332
|
+
val = __i[key];
|
333
|
+
if (__hasProp.call(__i, key)) {
|
334
|
+
((debt[key] = 0));
|
335
|
+
}
|
336
|
+
}
|
337
|
+
return this.scan_tokens((function(__this) {
|
338
|
+
var __func = function(prev, token, post, i) {
|
339
|
+
var inv, match, mtag, tag;
|
340
|
+
tag = token[0];
|
341
|
+
inv = INVERSES[token[0]];
|
342
|
+
// Push openers onto the stack.
|
343
|
+
if (EXPRESSION_START.indexOf(tag) >= 0) {
|
344
|
+
stack.push(token);
|
345
|
+
return 1;
|
346
|
+
// The end of an expression, check stack and debt for a pair.
|
347
|
+
} else if (EXPRESSION_TAIL.indexOf(tag) >= 0) {
|
348
|
+
// If the tag is already in our debt, swallow it.
|
349
|
+
if (debt[inv] > 0) {
|
350
|
+
debt[inv] -= 1;
|
351
|
+
this.tokens.splice(i, 1);
|
352
|
+
return 0;
|
353
|
+
} else {
|
354
|
+
// Pop the stack of open delimiters.
|
355
|
+
match = stack.pop();
|
356
|
+
mtag = match[0];
|
357
|
+
// Continue onwards if it's the expected tag.
|
358
|
+
if (tag === INVERSES[mtag]) {
|
359
|
+
return 1;
|
360
|
+
} else {
|
361
|
+
// Unexpected close, insert correct close, adding to the debt.
|
362
|
+
debt[mtag] += 1;
|
363
|
+
val = mtag === 'INDENT' ? match[1] : INVERSES[mtag];
|
364
|
+
this.tokens.splice(i, 0, [INVERSES[mtag], val]);
|
365
|
+
return 1;
|
366
|
+
}
|
367
|
+
}
|
368
|
+
} else {
|
369
|
+
return 1;
|
370
|
+
}
|
371
|
+
};
|
372
|
+
return (function() {
|
373
|
+
return __func.apply(__this, arguments);
|
374
|
+
});
|
375
|
+
})(this));
|
376
|
+
};
|
377
|
+
})();
|
@@ -0,0 +1,289 @@
|
|
1
|
+
module StyleScript
|
2
|
+
|
3
|
+
# In order to keep the grammar simple, the stream of tokens that the Lexer
|
4
|
+
# emits is rewritten by the Rewriter, smoothing out ambiguities, mis-nested
|
5
|
+
# indentation, and single-line flavors of expressions.
|
6
|
+
class Rewriter
|
7
|
+
|
8
|
+
# Tokens that must be balanced.
|
9
|
+
BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], [:INDENT, :OUTDENT],
|
10
|
+
[:PARAM_START, :PARAM_END], [:CALL_START, :CALL_END], [:INDEX_START, :INDEX_END]]
|
11
|
+
|
12
|
+
# Tokens that signal the start of a balanced pair.
|
13
|
+
EXPRESSION_START = BALANCED_PAIRS.map {|pair| pair.first }
|
14
|
+
|
15
|
+
# Tokens that signal the end of a balanced pair.
|
16
|
+
EXPRESSION_TAIL = BALANCED_PAIRS.map {|pair| pair.last }
|
17
|
+
|
18
|
+
# Tokens that indicate the close of a clause of an expression.
|
19
|
+
EXPRESSION_CLOSE = [:CATCH, :WHEN, :ELSE, :FINALLY] + EXPRESSION_TAIL
|
20
|
+
|
21
|
+
# Tokens pairs that, in immediate succession, indicate an implicit call.
|
22
|
+
IMPLICIT_FUNC = [:IDENTIFIER, :SUPER, ')', :CALL_END, ']', :INDEX_END]
|
23
|
+
IMPLICIT_END = [:IF, :UNLESS, :FOR, :WHILE, "\n", :OUTDENT]
|
24
|
+
IMPLICIT_CALL = [:IDENTIFIER, :NUMBER, :STRING, :JS, :REGEX, :NEW, :PARAM_START,
|
25
|
+
:TRY, :DELETE, :TYPEOF, :SWITCH,
|
26
|
+
:TRUE, :FALSE, :YES, :NO, :ON, :OFF, '!', '!!', :NOT,
|
27
|
+
'@', '->', '=>', '[', '(', '{']
|
28
|
+
|
29
|
+
# The inverse mappings of token pairs we're trying to fix up.
|
30
|
+
INVERSES = BALANCED_PAIRS.inject({}) do |memo, pair|
|
31
|
+
memo[pair.first] = pair.last
|
32
|
+
memo[pair.last] = pair.first
|
33
|
+
memo
|
34
|
+
end
|
35
|
+
|
36
|
+
# Single-line flavors of block expressions that have unclosed endings.
|
37
|
+
# The grammar can't disambiguate them, so we insert the implicit indentation.
|
38
|
+
SINGLE_LINERS = [:ELSE, "->", "=>", :TRY, :FINALLY, :THEN]
|
39
|
+
SINGLE_CLOSERS = ["\n", :CATCH, :FINALLY, :ELSE, :OUTDENT, :LEADING_WHEN, :PARAM_START]
|
40
|
+
|
41
|
+
# Rewrite the token stream in multiple passes, one logical filter at
|
42
|
+
# a time. This could certainly be changed into a single pass through the
|
43
|
+
# stream, with a big ol' efficient switch, but it's much nicer like this.
|
44
|
+
def rewrite(tokens)
|
45
|
+
@tokens = tokens
|
46
|
+
adjust_comments
|
47
|
+
remove_leading_newlines
|
48
|
+
remove_mid_expression_newlines
|
49
|
+
move_commas_outside_outdents
|
50
|
+
close_open_calls_and_indexes
|
51
|
+
add_implicit_parentheses
|
52
|
+
add_implicit_indentation
|
53
|
+
ensure_balance(*BALANCED_PAIRS)
|
54
|
+
rewrite_closing_parens
|
55
|
+
@tokens
|
56
|
+
end
|
57
|
+
|
58
|
+
# Rewrite the token stream, looking one token ahead and behind.
|
59
|
+
# Allow the return value of the block to tell us how many tokens to move
|
60
|
+
# forwards (or backwards) in the stream, to make sure we don't miss anything
|
61
|
+
# as the stream changes length under our feet.
|
62
|
+
def scan_tokens
|
63
|
+
i = 0
|
64
|
+
loop do
|
65
|
+
break unless @tokens[i]
|
66
|
+
move = yield(@tokens[i - 1], @tokens[i], @tokens[i + 1], i)
|
67
|
+
i += move
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# Massage newlines and indentations so that comments don't have to be
|
72
|
+
# correctly indented, or appear on their own line.
|
73
|
+
def adjust_comments
|
74
|
+
scan_tokens do |prev, token, post, i|
|
75
|
+
next 1 unless token[0] == :COMMENT
|
76
|
+
before, after = @tokens[i - 2], @tokens[i + 2]
|
77
|
+
if before && after &&
|
78
|
+
((before[0] == :INDENT && after[0] == :OUTDENT) ||
|
79
|
+
(before[0] == :OUTDENT && after[0] == :INDENT)) &&
|
80
|
+
before[1] == after[1]
|
81
|
+
@tokens.delete_at(i + 2)
|
82
|
+
@tokens.delete_at(i - 2)
|
83
|
+
next 0
|
84
|
+
elsif prev[0] == "\n" && [:INDENT].include?(after[0])
|
85
|
+
@tokens.delete_at(i + 2)
|
86
|
+
@tokens[i - 1] = after
|
87
|
+
next 1
|
88
|
+
elsif !["\n", :INDENT, :OUTDENT].include?(prev[0])
|
89
|
+
@tokens.insert(i, ["\n", Value.new("\n", token[1].line)])
|
90
|
+
next 2
|
91
|
+
else
|
92
|
+
next 1
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Leading newlines would introduce an ambiguity in the grammar, so we
|
98
|
+
# dispatch them here.
|
99
|
+
def remove_leading_newlines
|
100
|
+
@tokens.shift if @tokens[0][0] == "\n"
|
101
|
+
end
|
102
|
+
|
103
|
+
# Some blocks occur in the middle of expressions -- when we're expecting
|
104
|
+
# this, remove their trailing newlines.
|
105
|
+
def remove_mid_expression_newlines
|
106
|
+
scan_tokens do |prev, token, post, i|
|
107
|
+
next 1 unless post && EXPRESSION_CLOSE.include?(post[0]) && token[0] == "\n"
|
108
|
+
@tokens.delete_at(i)
|
109
|
+
next 0
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Make sure that we don't accidentally break trailing commas, which need
|
114
|
+
# to go on the outside of expression closers.
|
115
|
+
def move_commas_outside_outdents
|
116
|
+
scan_tokens do |prev, token, post, i|
|
117
|
+
if token[0] == :OUTDENT && prev[0] == ','
|
118
|
+
@tokens.delete_at(i)
|
119
|
+
@tokens.insert(i - 1, token)
|
120
|
+
end
|
121
|
+
next 1
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
# We've tagged the opening parenthesis of a method call, and the opening
|
126
|
+
# bracket of an indexing operation. Match them with their close.
|
127
|
+
def close_open_calls_and_indexes
|
128
|
+
parens, brackets = [0], [0]
|
129
|
+
scan_tokens do |prev, token, post, i|
|
130
|
+
case token[0]
|
131
|
+
when :CALL_START then parens.push(0)
|
132
|
+
when :INDEX_START then brackets.push(0)
|
133
|
+
when '(' then parens[-1] += 1
|
134
|
+
when '[' then brackets[-1] += 1
|
135
|
+
when ')'
|
136
|
+
if parens.last == 0
|
137
|
+
parens.pop
|
138
|
+
token[0] = :CALL_END
|
139
|
+
else
|
140
|
+
parens[-1] -= 1
|
141
|
+
end
|
142
|
+
when ']'
|
143
|
+
if brackets.last == 0
|
144
|
+
brackets.pop
|
145
|
+
token[0] = :INDEX_END
|
146
|
+
else
|
147
|
+
brackets[-1] -= 1
|
148
|
+
end
|
149
|
+
end
|
150
|
+
next 1
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# Methods may be optionally called without parentheses, for simple cases.
|
155
|
+
# Insert the implicit parentheses here, so that the parser doesn't have to
|
156
|
+
# deal with them.
|
157
|
+
def add_implicit_parentheses
|
158
|
+
stack = [0]
|
159
|
+
scan_tokens do |prev, token, post, i|
|
160
|
+
stack.push(0) if token[0] == :INDENT
|
161
|
+
if token[0] == :OUTDENT
|
162
|
+
last = stack.pop
|
163
|
+
stack[-1] += last
|
164
|
+
end
|
165
|
+
if stack.last > 0 && (IMPLICIT_END.include?(token[0]) || post.nil?)
|
166
|
+
idx = token[0] == :OUTDENT ? i + 1 : i
|
167
|
+
stack.last.times { @tokens.insert(idx, [:CALL_END, Value.new(')', token[1].line)]) }
|
168
|
+
size, stack[-1] = stack[-1] + 1, 0
|
169
|
+
next size
|
170
|
+
end
|
171
|
+
next 1 unless IMPLICIT_FUNC.include?(prev[0]) && IMPLICIT_CALL.include?(token[0])
|
172
|
+
@tokens.insert(i, [:CALL_START, Value.new('(', token[1].line)])
|
173
|
+
stack[-1] += 1
|
174
|
+
next 2
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
# Because our grammar is LALR(1), it can't handle some single-line
|
179
|
+
# expressions that lack ending delimiters. Use the lexer to add the implicit
|
180
|
+
# blocks, so it doesn't need to.
|
181
|
+
# ')' can close a single-line block, but we need to make sure it's balanced.
|
182
|
+
def add_implicit_indentation
|
183
|
+
scan_tokens do |prev, token, post, i|
|
184
|
+
next 1 unless SINGLE_LINERS.include?(token[0]) && post[0] != :INDENT &&
|
185
|
+
!(token[0] == :ELSE && post[0] == :IF) # Elsifs shouldn't get blocks.
|
186
|
+
starter = token[0]
|
187
|
+
line = token[1].line
|
188
|
+
@tokens.insert(i + 1, [:INDENT, Value.new(2, line)])
|
189
|
+
idx = i + 1
|
190
|
+
parens = 0
|
191
|
+
loop do
|
192
|
+
idx += 1
|
193
|
+
tok = @tokens[idx]
|
194
|
+
if (!tok || SINGLE_CLOSERS.include?(tok[0]) ||
|
195
|
+
(tok[0] == ')' && parens == 0)) &&
|
196
|
+
!(starter == :ELSE && tok[0] == :ELSE)
|
197
|
+
insertion = @tokens[idx - 1][0] == "," ? idx - 1 : idx
|
198
|
+
@tokens.insert(insertion, [:OUTDENT, Value.new(2, line)])
|
199
|
+
break
|
200
|
+
end
|
201
|
+
parens += 1 if tok[0] == '('
|
202
|
+
parens -= 1 if tok[0] == ')'
|
203
|
+
end
|
204
|
+
next 1 unless token[0] == :THEN
|
205
|
+
@tokens.delete_at(i)
|
206
|
+
next 0
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# Ensure that all listed pairs of tokens are correctly balanced throughout
|
211
|
+
# the course of the token stream.
|
212
|
+
def ensure_balance(*pairs)
|
213
|
+
puts "\nbefore ensure_balance: #{@tokens.inspect}" if ENV['VERBOSE']
|
214
|
+
levels, lines = Hash.new(0), Hash.new
|
215
|
+
scan_tokens do |prev, token, post, i|
|
216
|
+
pairs.each do |pair|
|
217
|
+
open, close = *pair
|
218
|
+
levels[open] += 1 if token[0] == open
|
219
|
+
levels[open] -= 1 if token[0] == close
|
220
|
+
lines[token[0]] = token[1].line
|
221
|
+
raise ParseError.new(token[0], token[1], nil) if levels[open] < 0
|
222
|
+
end
|
223
|
+
next 1
|
224
|
+
end
|
225
|
+
unclosed = levels.detect {|k, v| v > 0 }
|
226
|
+
sym = unclosed && unclosed[0]
|
227
|
+
raise ParseError.new(sym, Value.new(sym, lines[sym]), nil, "unclosed '#{sym}'") if unclosed
|
228
|
+
end
|
229
|
+
|
230
|
+
# We'd like to support syntax like this:
|
231
|
+
# el.click((event) ->
|
232
|
+
# el.hide())
|
233
|
+
# In order to accomplish this, move outdents that follow closing parens
|
234
|
+
# inwards, safely. The steps to accomplish this are:
|
235
|
+
#
|
236
|
+
# 1. Check that all paired tokens are balanced and in order.
|
237
|
+
# 2. Rewrite the stream with a stack: if you see an '(' or INDENT, add it
|
238
|
+
# to the stack. If you see an ')' or OUTDENT, pop the stack and replace
|
239
|
+
# it with the inverse of what we've just popped.
|
240
|
+
# 3. Keep track of "debt" for tokens that we fake, to make sure we end
|
241
|
+
# up balanced in the end.
|
242
|
+
#
|
243
|
+
def rewrite_closing_parens
|
244
|
+
verbose = ENV['VERBOSE']
|
245
|
+
stack, debt = [], Hash.new(0)
|
246
|
+
stack_stats = lambda { "stack: #{stack.inspect} debt: #{debt.inspect}\n\n" }
|
247
|
+
puts "rewrite_closing_original: #{@tokens.inspect}" if verbose
|
248
|
+
scan_tokens do |prev, token, post, i|
|
249
|
+
tag, inv = token[0], INVERSES[token[0]]
|
250
|
+
# Push openers onto the stack.
|
251
|
+
if EXPRESSION_START.include?(tag)
|
252
|
+
stack.push(token)
|
253
|
+
puts "pushing #{tag} #{stack_stats[]}" if verbose
|
254
|
+
next 1
|
255
|
+
# The end of an expression, check stack and debt for a pair.
|
256
|
+
elsif EXPRESSION_TAIL.include?(tag)
|
257
|
+
puts @tokens[i..-1].inspect if verbose
|
258
|
+
# If the tag is already in our debt, swallow it.
|
259
|
+
if debt[inv] > 0
|
260
|
+
debt[inv] -= 1
|
261
|
+
@tokens.delete_at(i)
|
262
|
+
puts "tag in debt #{tag} #{stack_stats[]}" if verbose
|
263
|
+
next 0
|
264
|
+
else
|
265
|
+
# Pop the stack of open delimiters.
|
266
|
+
match = stack.pop
|
267
|
+
mtag = match[0]
|
268
|
+
# Continue onwards if it's the expected tag.
|
269
|
+
if tag == INVERSES[mtag]
|
270
|
+
puts "expected tag #{tag} #{stack_stats[]}" if verbose
|
271
|
+
next 1
|
272
|
+
else
|
273
|
+
# Unexpected close, insert correct close, adding to the debt.
|
274
|
+
debt[mtag] += 1
|
275
|
+
puts "unexpected #{tag}, replacing with #{INVERSES[mtag]} #{stack_stats[]}" if verbose
|
276
|
+
val = mtag == :INDENT ? match[1] : INVERSES[mtag]
|
277
|
+
@tokens.insert(i, [INVERSES[mtag], Value.new(val, token[1].line)])
|
278
|
+
next 1
|
279
|
+
end
|
280
|
+
end
|
281
|
+
else
|
282
|
+
# Uninteresting token:
|
283
|
+
next 1
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
end
|
289
|
+
end
|