coffee-script 0.3.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +2 -2
- data/README.md +15 -0
- data/lib/coffee-script.rb +1 -21
- data/lib/coffee_script.rb +31 -0
- metadata +30 -46
- data/README +0 -41
- data/bin/coffee +0 -5
- data/coffee-script.gemspec +0 -27
- data/examples/blocks.coffee +0 -57
- data/examples/code.coffee +0 -173
- data/examples/poignant.coffee +0 -186
- data/examples/potion.coffee +0 -205
- data/examples/underscore.coffee +0 -603
- data/extras/CoffeeScript.tmbundle/Preferences/CoffeeScript.tmPreferences +0 -24
- data/extras/CoffeeScript.tmbundle/Syntaxes/CoffeeScript.tmLanguage +0 -361
- data/extras/CoffeeScript.tmbundle/info.plist +0 -10
- data/extras/EXTRAS +0 -20
- data/extras/coffee.vim +0 -111
- data/lib/coffee_script/coffee-script.js +0 -50
- data/lib/coffee_script/command_line.rb +0 -235
- data/lib/coffee_script/grammar.y +0 -481
- data/lib/coffee_script/lexer.js +0 -363
- data/lib/coffee_script/lexer.rb +0 -272
- data/lib/coffee_script/narwhal/coffee-script.js +0 -96
- data/lib/coffee_script/nodes.js +0 -443
- data/lib/coffee_script/nodes.rb +0 -1050
- data/lib/coffee_script/parse_error.rb +0 -29
- data/lib/coffee_script/parser.js +0 -477
- data/lib/coffee_script/parser.rb +0 -2611
- data/lib/coffee_script/repl.js +0 -33
- data/lib/coffee_script/rewriter.js +0 -377
- data/lib/coffee_script/rewriter.rb +0 -289
- data/lib/coffee_script/runner.js +0 -11
- data/lib/coffee_script/scope.js +0 -73
- data/lib/coffee_script/scope.rb +0 -91
- data/lib/coffee_script/value.rb +0 -64
- data/package.json +0 -8
data/lib/coffee_script/lexer.js
DELETED
@@ -1,363 +0,0 @@
|
|
1
|
-
(function(){
|
2
|
-
var ASSIGNMENT, CALLABLE, CODE, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, KEYWORDS, LAST_DENT, LAST_DENTS, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, lex;
|
3
|
-
Rewriter = require('./rewriter').Rewriter;
|
4
|
-
// The lexer reads a stream of CoffeeScript and divvys it up into tagged
|
5
|
-
// tokens. A minor bit of the ambiguity in the grammar has been avoided by
|
6
|
-
// pushing some extra smarts into the Lexer.
|
7
|
-
exports.Lexer = (lex = function lex() { });
|
8
|
-
// Constants ============================================================
|
9
|
-
// The list of keywords passed verbatim to the parser.
|
10
|
-
KEYWORDS = ["if", "else", "then", "unless", "true", "false", "yes", "no", "on", "off", "and", "or", "is", "isnt", "not", "new", "return", "arguments", "try", "catch", "finally", "throw", "break", "continue", "for", "in", "of", "by", "where", "while", "delete", "instanceof", "typeof", "switch", "when", "super", "extends"];
|
11
|
-
// Token matching regexes.
|
12
|
-
IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/;
|
13
|
-
NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
|
14
|
-
STRING = /^(""|''|"([\s\S]*?)([^\\]|\\\\)"|'([\s\S]*?)([^\\]|\\\\)')/;
|
15
|
-
HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
|
16
|
-
JS = /^(``|`([\s\S]*?)([^\\]|\\\\)`)/;
|
17
|
-
OPERATOR = /^([+\*&|\/\-%=<>:!?]+)/;
|
18
|
-
WHITESPACE = /^([ \t]+)/;
|
19
|
-
COMMENT = /^(((\n?[ \t]*)?#.*$)+)/;
|
20
|
-
CODE = /^((-|=)>)/;
|
21
|
-
REGEX = /^(\/(.*?)([^\\]|\\\\)\/[imgy]{0,4})/;
|
22
|
-
MULTI_DENT = /^((\n([ \t]*))+)(\.)?/;
|
23
|
-
LAST_DENTS = /\n([ \t]*)/g;
|
24
|
-
LAST_DENT = /\n([ \t]*)/;
|
25
|
-
ASSIGNMENT = /^(:|=)$/;
|
26
|
-
// Token cleaning regexes.
|
27
|
-
JS_CLEANER = /(^`|`$)/g;
|
28
|
-
MULTILINER = /\n/g;
|
29
|
-
STRING_NEWLINES = /\n[ \t]*/g;
|
30
|
-
COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/mg;
|
31
|
-
NO_NEWLINE = /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/;
|
32
|
-
HEREDOC_INDENT = /^[ \t]+/g;
|
33
|
-
// Tokens which a regular expression will never immediately follow, but which
|
34
|
-
// a division operator might.
|
35
|
-
// See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
|
36
|
-
NOT_REGEX = ['IDENTIFIER', 'NUMBER', 'REGEX', 'STRING', ')', '++', '--', ']', '}', 'FALSE', 'NULL', 'TRUE'];
|
37
|
-
// Tokens which could legitimately be invoked or indexed.
|
38
|
-
CALLABLE = ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING'];
|
39
|
-
// Scan by attempting to match tokens one character at a time. Slow and steady.
|
40
|
-
lex.prototype.tokenize = function tokenize(code) {
|
41
|
-
this.code = code;
|
42
|
-
// Cleanup code by remove extra line breaks, TODO: chomp
|
43
|
-
this.i = 0;
|
44
|
-
// Current character position we're parsing
|
45
|
-
this.line = 1;
|
46
|
-
// The current line.
|
47
|
-
this.indent = 0;
|
48
|
-
// The current indent level.
|
49
|
-
this.indents = [];
|
50
|
-
// The stack of all indent levels we are currently within.
|
51
|
-
this.tokens = [];
|
52
|
-
// Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
|
53
|
-
this.spaced = null;
|
54
|
-
// The last token that has a space following it.
|
55
|
-
while (this.i < this.code.length) {
|
56
|
-
this.chunk = this.code.slice(this.i);
|
57
|
-
this.extract_next_token();
|
58
|
-
}
|
59
|
-
this.close_indentation();
|
60
|
-
return (new Rewriter()).rewrite(this.tokens);
|
61
|
-
};
|
62
|
-
// At every position, run through this list of attempted matches,
|
63
|
-
// short-circuiting if any of them succeed.
|
64
|
-
lex.prototype.extract_next_token = function extract_next_token() {
|
65
|
-
if (this.identifier_token()) {
|
66
|
-
return null;
|
67
|
-
}
|
68
|
-
if (this.number_token()) {
|
69
|
-
return null;
|
70
|
-
}
|
71
|
-
if (this.heredoc_token()) {
|
72
|
-
return null;
|
73
|
-
}
|
74
|
-
if (this.string_token()) {
|
75
|
-
return null;
|
76
|
-
}
|
77
|
-
if (this.js_token()) {
|
78
|
-
return null;
|
79
|
-
}
|
80
|
-
if (this.regex_token()) {
|
81
|
-
return null;
|
82
|
-
}
|
83
|
-
if (this.indent_token()) {
|
84
|
-
return null;
|
85
|
-
}
|
86
|
-
if (this.comment_token()) {
|
87
|
-
return null;
|
88
|
-
}
|
89
|
-
if (this.whitespace_token()) {
|
90
|
-
return null;
|
91
|
-
}
|
92
|
-
return this.literal_token();
|
93
|
-
};
|
94
|
-
// Tokenizers ==========================================================
|
95
|
-
// Matches identifying literals: variables, keywords, method names, etc.
|
96
|
-
lex.prototype.identifier_token = function identifier_token() {
|
97
|
-
var id, tag;
|
98
|
-
if (!((id = this.match(IDENTIFIER, 1)))) {
|
99
|
-
return false;
|
100
|
-
}
|
101
|
-
// Keywords are special identifiers tagged with their own name,
|
102
|
-
// 'if' will result in an ['IF', "if"] token.
|
103
|
-
tag = KEYWORDS.indexOf(id) >= 0 ? id.toUpperCase() : 'IDENTIFIER';
|
104
|
-
if (tag === 'WHEN' && (this.tag() === 'OUTDENT' || this.tag() === 'INDENT')) {
|
105
|
-
tag = 'LEADING_WHEN';
|
106
|
-
}
|
107
|
-
if (tag === 'IDENTIFIER' && this.value() === '::') {
|
108
|
-
this.tag(-1, 'PROTOTYPE_ACCESS');
|
109
|
-
}
|
110
|
-
if (tag === 'IDENTIFIER' && this.value() === '.' && !(this.value(-2) === '.')) {
|
111
|
-
if (this.tag(-2) === '?') {
|
112
|
-
this.tag(-1, 'SOAK_ACCESS');
|
113
|
-
this.tokens.splice(-2, 1);
|
114
|
-
} else {
|
115
|
-
this.tag(-1, 'PROPERTY_ACCESS');
|
116
|
-
}
|
117
|
-
}
|
118
|
-
this.token(tag, id);
|
119
|
-
this.i += id.length;
|
120
|
-
return true;
|
121
|
-
};
|
122
|
-
// Matches numbers, including decimals, hex, and exponential notation.
|
123
|
-
lex.prototype.number_token = function number_token() {
|
124
|
-
var number;
|
125
|
-
if (!((number = this.match(NUMBER, 1)))) {
|
126
|
-
return false;
|
127
|
-
}
|
128
|
-
this.token('NUMBER', number);
|
129
|
-
this.i += number.length;
|
130
|
-
return true;
|
131
|
-
};
|
132
|
-
// Matches strings, including multi-line strings.
|
133
|
-
lex.prototype.string_token = function string_token() {
|
134
|
-
var escaped, string;
|
135
|
-
if (!((string = this.match(STRING, 1)))) {
|
136
|
-
return false;
|
137
|
-
}
|
138
|
-
escaped = string.replace(STRING_NEWLINES, " \\\n");
|
139
|
-
this.token('STRING', escaped);
|
140
|
-
this.line += this.count(string, "\n");
|
141
|
-
this.i += string.length;
|
142
|
-
return true;
|
143
|
-
};
|
144
|
-
// Matches heredocs, adjusting indentation to the correct level.
|
145
|
-
lex.prototype.heredoc_token = function heredoc_token() {
|
146
|
-
var doc, indent, match;
|
147
|
-
if (!((match = this.chunk.match(HEREDOC)))) {
|
148
|
-
return false;
|
149
|
-
}
|
150
|
-
doc = match[2] || match[4];
|
151
|
-
indent = doc.match(HEREDOC_INDENT).sort()[0];
|
152
|
-
doc = doc.replace(new RegExp("^" + indent, 'g'), '').replace(MULTILINER, "\\n").replace('"', '\\"');
|
153
|
-
this.token('STRING', '"' + doc + '"');
|
154
|
-
this.line += this.count(match[1], "\n");
|
155
|
-
this.i += match[1].length;
|
156
|
-
return true;
|
157
|
-
};
|
158
|
-
// Matches interpolated JavaScript.
|
159
|
-
lex.prototype.js_token = function js_token() {
|
160
|
-
var script;
|
161
|
-
if (!((script = this.match(JS, 1)))) {
|
162
|
-
return false;
|
163
|
-
}
|
164
|
-
this.token('JS', script.replace(JS_CLEANER, ''));
|
165
|
-
this.i += script.length;
|
166
|
-
return true;
|
167
|
-
};
|
168
|
-
// Matches regular expression literals.
|
169
|
-
lex.prototype.regex_token = function regex_token() {
|
170
|
-
var regex;
|
171
|
-
if (!((regex = this.match(REGEX, 1)))) {
|
172
|
-
return false;
|
173
|
-
}
|
174
|
-
if (NOT_REGEX.indexOf(this.tag()) >= 0) {
|
175
|
-
return false;
|
176
|
-
}
|
177
|
-
this.token('REGEX', regex);
|
178
|
-
this.i += regex.length;
|
179
|
-
return true;
|
180
|
-
};
|
181
|
-
// Matches and conumes comments.
|
182
|
-
lex.prototype.comment_token = function comment_token() {
|
183
|
-
var comment;
|
184
|
-
if (!((comment = this.match(COMMENT, 1)))) {
|
185
|
-
return false;
|
186
|
-
}
|
187
|
-
this.line += comment.match(MULTILINER).length;
|
188
|
-
this.token('COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER));
|
189
|
-
this.token('TERMINATOR', "\n");
|
190
|
-
this.i += comment.length;
|
191
|
-
return true;
|
192
|
-
};
|
193
|
-
// Record tokens for indentation differing from the previous line.
|
194
|
-
lex.prototype.indent_token = function indent_token() {
|
195
|
-
var diff, indent, next_character, no_newlines, size;
|
196
|
-
if (!((indent = this.match(MULTI_DENT, 1)))) {
|
197
|
-
return false;
|
198
|
-
}
|
199
|
-
this.line += indent.match(MULTILINER).length;
|
200
|
-
this.i += indent.length;
|
201
|
-
next_character = this.chunk.match(MULTI_DENT)[4];
|
202
|
-
no_newlines = next_character === '.' || (this.value().match(NO_NEWLINE) && this.tokens[this.tokens.length - 2][0] !== '.' && !this.value().match(CODE));
|
203
|
-
if (no_newlines) {
|
204
|
-
return this.suppress_newlines(indent);
|
205
|
-
}
|
206
|
-
size = indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length;
|
207
|
-
if (size === this.indent) {
|
208
|
-
return this.newline_token(indent);
|
209
|
-
}
|
210
|
-
if (size > this.indent) {
|
211
|
-
diff = size - this.indent;
|
212
|
-
this.token('INDENT', diff);
|
213
|
-
this.indents.push(diff);
|
214
|
-
} else {
|
215
|
-
this.outdent_token(this.indent - size);
|
216
|
-
}
|
217
|
-
this.indent = size;
|
218
|
-
return true;
|
219
|
-
};
|
220
|
-
// Record an oudent token or tokens, if we're moving back inwards past
|
221
|
-
// multiple recorded indents.
|
222
|
-
lex.prototype.outdent_token = function outdent_token(move_out) {
|
223
|
-
var last_indent;
|
224
|
-
while (move_out > 0 && this.indents.length) {
|
225
|
-
last_indent = this.indents.pop();
|
226
|
-
this.token('OUTDENT', last_indent);
|
227
|
-
move_out -= last_indent;
|
228
|
-
}
|
229
|
-
this.token('TERMINATOR', "\n");
|
230
|
-
return true;
|
231
|
-
};
|
232
|
-
// Matches and consumes non-meaningful whitespace.
|
233
|
-
lex.prototype.whitespace_token = function whitespace_token() {
|
234
|
-
var space;
|
235
|
-
if (!((space = this.match(WHITESPACE, 1)))) {
|
236
|
-
return false;
|
237
|
-
}
|
238
|
-
this.spaced = this.value();
|
239
|
-
this.i += space.length;
|
240
|
-
return true;
|
241
|
-
};
|
242
|
-
// Multiple newlines get merged together.
|
243
|
-
// Use a trailing \ to escape newlines.
|
244
|
-
lex.prototype.newline_token = function newline_token(newlines) {
|
245
|
-
if (!(this.value() === "\n")) {
|
246
|
-
this.token('TERMINATOR', "\n");
|
247
|
-
}
|
248
|
-
return true;
|
249
|
-
};
|
250
|
-
// Tokens to explicitly escape newlines are removed once their job is done.
|
251
|
-
lex.prototype.suppress_newlines = function suppress_newlines(newlines) {
|
252
|
-
if (this.value() === "\\") {
|
253
|
-
this.tokens.pop();
|
254
|
-
}
|
255
|
-
return true;
|
256
|
-
};
|
257
|
-
// We treat all other single characters as a token. Eg.: ( ) , . !
|
258
|
-
// Multi-character operators are also literal tokens, so that Racc can assign
|
259
|
-
// the proper order of operations.
|
260
|
-
lex.prototype.literal_token = function literal_token() {
|
261
|
-
var match, tag, value;
|
262
|
-
match = this.chunk.match(OPERATOR);
|
263
|
-
value = match && match[1];
|
264
|
-
if (value && value.match(CODE)) {
|
265
|
-
this.tag_parameters();
|
266
|
-
}
|
267
|
-
value = value || this.chunk.substr(0, 1);
|
268
|
-
tag = value.match(ASSIGNMENT) ? 'ASSIGN' : value;
|
269
|
-
if (value === ';') {
|
270
|
-
tag = 'TERMINATOR';
|
271
|
-
}
|
272
|
-
if (this.value() !== this.spaced && CALLABLE.indexOf(this.tag()) >= 0) {
|
273
|
-
if (value === '(') {
|
274
|
-
tag = 'CALL_START';
|
275
|
-
}
|
276
|
-
if (value === '[') {
|
277
|
-
tag = 'INDEX_START';
|
278
|
-
}
|
279
|
-
}
|
280
|
-
this.token(tag, value);
|
281
|
-
this.i += value.length;
|
282
|
-
return true;
|
283
|
-
};
|
284
|
-
// Helpers =============================================================
|
285
|
-
// Add a token to the results, taking note of the line number.
|
286
|
-
lex.prototype.token = function token(tag, value) {
|
287
|
-
return this.tokens.push([tag, value]);
|
288
|
-
// this.tokens.push([tag, Value.new(value, @line)])
|
289
|
-
};
|
290
|
-
// Look at a tag in the current token stream.
|
291
|
-
lex.prototype.tag = function tag(index, tag) {
|
292
|
-
var tok;
|
293
|
-
if (!((tok = this.tokens[this.tokens.length - (index || 1)]))) {
|
294
|
-
return null;
|
295
|
-
}
|
296
|
-
if ((typeof tag !== "undefined" && tag !== null)) {
|
297
|
-
return (tok[0] = tag);
|
298
|
-
}
|
299
|
-
return tok[0];
|
300
|
-
};
|
301
|
-
// Look at a value in the current token stream.
|
302
|
-
lex.prototype.value = function value(index, val) {
|
303
|
-
var tok;
|
304
|
-
if (!((tok = this.tokens[this.tokens.length - (index || 1)]))) {
|
305
|
-
return null;
|
306
|
-
}
|
307
|
-
if ((typeof val !== "undefined" && val !== null)) {
|
308
|
-
return (tok[1] = val);
|
309
|
-
}
|
310
|
-
return tok[1];
|
311
|
-
};
|
312
|
-
// Count the occurences of a character in a string.
|
313
|
-
lex.prototype.count = function count(string, letter) {
|
314
|
-
var num, pos;
|
315
|
-
num = 0;
|
316
|
-
pos = string.indexOf(letter);
|
317
|
-
while (pos !== -1) {
|
318
|
-
count += 1;
|
319
|
-
pos = string.indexOf(letter, pos + 1);
|
320
|
-
}
|
321
|
-
return count;
|
322
|
-
};
|
323
|
-
// Attempt to match a string against the current chunk, returning the indexed
|
324
|
-
// match.
|
325
|
-
lex.prototype.match = function match(regex, index) {
|
326
|
-
var m;
|
327
|
-
if (!((m = this.chunk.match(regex)))) {
|
328
|
-
return false;
|
329
|
-
}
|
330
|
-
return m ? m[index] : false;
|
331
|
-
};
|
332
|
-
// A source of ambiguity in our grammar was parameter lists in function
|
333
|
-
// definitions (as opposed to argument lists in function calls). Tag
|
334
|
-
// parameter identifiers in order to avoid this. Also, parameter lists can
|
335
|
-
// make use of splats.
|
336
|
-
lex.prototype.tag_parameters = function tag_parameters() {
|
337
|
-
var i, tok;
|
338
|
-
if (this.tag() !== ')') {
|
339
|
-
return null;
|
340
|
-
}
|
341
|
-
i = 0;
|
342
|
-
while (true) {
|
343
|
-
i += 1;
|
344
|
-
tok = this.tokens[this.tokens.length - i];
|
345
|
-
if (!tok) {
|
346
|
-
return null;
|
347
|
-
}
|
348
|
-
if (tok[0] === 'IDENTIFIER') {
|
349
|
-
tok[0] = 'PARAM';
|
350
|
-
} else if (tok[0] === ')') {
|
351
|
-
tok[0] = 'PARAM_END';
|
352
|
-
} else if (tok[0] === '(') {
|
353
|
-
return (tok[0] = 'PARAM_START');
|
354
|
-
}
|
355
|
-
}
|
356
|
-
return true;
|
357
|
-
};
|
358
|
-
// Close up all remaining open blocks. IF the first token is an indent,
|
359
|
-
// axe it.
|
360
|
-
lex.prototype.close_indentation = function close_indentation() {
|
361
|
-
return this.outdent_token(this.indent);
|
362
|
-
};
|
363
|
-
})();
|
data/lib/coffee_script/lexer.rb
DELETED
@@ -1,272 +0,0 @@
|
|
1
|
-
module CoffeeScript
|
2
|
-
|
3
|
-
# The lexer reads a stream of CoffeeScript and divvys it up into tagged
|
4
|
-
# tokens. A minor bit of the ambiguity in the grammar has been avoided by
|
5
|
-
# pushing some extra smarts into the Lexer.
|
6
|
-
class Lexer
|
7
|
-
|
8
|
-
# The list of keywords passed verbatim to the parser.
|
9
|
-
KEYWORDS = ["if", "else", "then", "unless",
|
10
|
-
"true", "false", "yes", "no", "on", "off",
|
11
|
-
"and", "or", "is", "isnt", "not",
|
12
|
-
"new", "return",
|
13
|
-
"try", "catch", "finally", "throw",
|
14
|
-
"break", "continue",
|
15
|
-
"for", "in", "of", "by", "where", "while",
|
16
|
-
"delete", "instanceof", "typeof",
|
17
|
-
"switch", "when",
|
18
|
-
"super", "extends"]
|
19
|
-
|
20
|
-
# Token matching regexes.
|
21
|
-
IDENTIFIER = /\A([a-zA-Z$_](\w|\$)*)/
|
22
|
-
NUMBER = /\A(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
|
23
|
-
STRING = /\A(""|''|"(.*?)([^\\]|\\\\)"|'(.*?)([^\\]|\\\\)')/m
|
24
|
-
HEREDOC = /\A("{6}|'{6}|"{3}\n?(.*?)\n?([ \t]*)"{3}|'{3}\n?(.*?)\n?([ \t]*)'{3})/m
|
25
|
-
JS = /\A(``|`(.*?)([^\\]|\\\\)`)/m
|
26
|
-
OPERATOR = /\A([+\*&|\/\-%=<>:!?]+)/
|
27
|
-
WHITESPACE = /\A([ \t]+)/
|
28
|
-
COMMENT = /\A(((\n?[ \t]*)?#.*$)+)/
|
29
|
-
CODE = /\A((-|=)>)/
|
30
|
-
REGEX = /\A(\/(.*?)([^\\]|\\\\)\/[imgy]{0,4})/
|
31
|
-
MULTI_DENT = /\A((\n([ \t]*))+)(\.)?/
|
32
|
-
LAST_DENT = /\n([ \t]*)/
|
33
|
-
ASSIGNMENT = /\A(:|=)\Z/
|
34
|
-
|
35
|
-
# Token cleaning regexes.
|
36
|
-
JS_CLEANER = /(\A`|`\Z)/
|
37
|
-
MULTILINER = /\n/
|
38
|
-
STRING_NEWLINES = /\n[ \t]*/
|
39
|
-
COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/
|
40
|
-
NO_NEWLINE = /\A([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)\Z/
|
41
|
-
HEREDOC_INDENT = /^[ \t]+/
|
42
|
-
|
43
|
-
# Tokens which a regular expression will never immediately follow, but which
|
44
|
-
# a division operator might.
|
45
|
-
# See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
|
46
|
-
NOT_REGEX = [
|
47
|
-
:IDENTIFIER, :NUMBER, :REGEX, :STRING,
|
48
|
-
')', '++', '--', ']', '}',
|
49
|
-
:FALSE, :NULL, :TRUE
|
50
|
-
]
|
51
|
-
|
52
|
-
# Tokens which could legitimately be invoked or indexed.
|
53
|
-
CALLABLE = [:IDENTIFIER, :SUPER, ')', ']', '}', :STRING]
|
54
|
-
|
55
|
-
# Scan by attempting to match tokens one character at a time. Slow and steady.
|
56
|
-
def tokenize(code)
|
57
|
-
@code = code.chomp # Cleanup code by remove extra line breaks
|
58
|
-
@i = 0 # Current character position we're parsing
|
59
|
-
@line = 1 # The current line.
|
60
|
-
@indent = 0 # The current indent level.
|
61
|
-
@indents = [] # The stack of all indent levels we are currently within.
|
62
|
-
@tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
|
63
|
-
@spaced = nil # The last value that has a space following it.
|
64
|
-
while @i < @code.length
|
65
|
-
@chunk = @code[@i..-1]
|
66
|
-
extract_next_token
|
67
|
-
end
|
68
|
-
puts "original stream: #{@tokens.inspect}" if ENV['VERBOSE']
|
69
|
-
close_indentation
|
70
|
-
Rewriter.new.rewrite(@tokens)
|
71
|
-
end
|
72
|
-
|
73
|
-
# At every position, run through this list of attempted matches,
|
74
|
-
# short-circuiting if any of them succeed.
|
75
|
-
def extract_next_token
|
76
|
-
return if identifier_token
|
77
|
-
return if number_token
|
78
|
-
return if heredoc_token
|
79
|
-
return if string_token
|
80
|
-
return if js_token
|
81
|
-
return if regex_token
|
82
|
-
return if indent_token
|
83
|
-
return if comment_token
|
84
|
-
return if whitespace_token
|
85
|
-
return literal_token
|
86
|
-
end
|
87
|
-
|
88
|
-
# Tokenizers ==========================================================
|
89
|
-
|
90
|
-
# Matches identifying literals: variables, keywords, method names, etc.
|
91
|
-
def identifier_token
|
92
|
-
return false unless identifier = @chunk[IDENTIFIER, 1]
|
93
|
-
# Keywords are special identifiers tagged with their own name,
|
94
|
-
# 'if' will result in an [:IF, "if"] token.
|
95
|
-
tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
|
96
|
-
tag = :LEADING_WHEN if tag == :WHEN && [:OUTDENT, :INDENT, "\n"].include?(last_tag)
|
97
|
-
@tokens[-1][0] = :PROTOTYPE_ACCESS if tag == :IDENTIFIER && last_value == '::'
|
98
|
-
if tag == :IDENTIFIER && last_value == '.' && !(@tokens[-2] && @tokens[-2][1] == '.')
|
99
|
-
if @tokens[-2][0] == "?"
|
100
|
-
@tokens[-1][0] = :SOAK_ACCESS
|
101
|
-
@tokens.delete_at(-2)
|
102
|
-
else
|
103
|
-
@tokens[-1][0] = :PROPERTY_ACCESS
|
104
|
-
end
|
105
|
-
end
|
106
|
-
token(tag, identifier)
|
107
|
-
@i += identifier.length
|
108
|
-
end
|
109
|
-
|
110
|
-
# Matches numbers, including decimals, hex, and exponential notation.
|
111
|
-
def number_token
|
112
|
-
return false unless number = @chunk[NUMBER, 1]
|
113
|
-
token(:NUMBER, number)
|
114
|
-
@i += number.length
|
115
|
-
end
|
116
|
-
|
117
|
-
# Matches strings, including multi-line strings.
|
118
|
-
def string_token
|
119
|
-
return false unless string = @chunk[STRING, 1]
|
120
|
-
escaped = string.gsub(STRING_NEWLINES, " \\\n")
|
121
|
-
token(:STRING, escaped)
|
122
|
-
@line += string.count("\n")
|
123
|
-
@i += string.length
|
124
|
-
end
|
125
|
-
|
126
|
-
# Matches heredocs, adjusting indentation to the correct level.
|
127
|
-
def heredoc_token
|
128
|
-
return false unless match = @chunk.match(HEREDOC)
|
129
|
-
doc = match[2] || match[4]
|
130
|
-
indent = doc.scan(HEREDOC_INDENT).min
|
131
|
-
doc.gsub!(/^#{indent}/, "")
|
132
|
-
doc.gsub!("\n", "\\n")
|
133
|
-
doc.gsub!('"', '\\"')
|
134
|
-
token(:STRING, "\"#{doc}\"")
|
135
|
-
@line += match[1].count("\n")
|
136
|
-
@i += match[1].length
|
137
|
-
end
|
138
|
-
|
139
|
-
# Matches interpolated JavaScript.
|
140
|
-
def js_token
|
141
|
-
return false unless script = @chunk[JS, 1]
|
142
|
-
token(:JS, script.gsub(JS_CLEANER, ''))
|
143
|
-
@i += script.length
|
144
|
-
end
|
145
|
-
|
146
|
-
# Matches regular expression literals.
|
147
|
-
def regex_token
|
148
|
-
return false unless regex = @chunk[REGEX, 1]
|
149
|
-
return false if NOT_REGEX.include?(last_tag)
|
150
|
-
token(:REGEX, regex)
|
151
|
-
@i += regex.length
|
152
|
-
end
|
153
|
-
|
154
|
-
# Matches and consumes comments.
|
155
|
-
def comment_token
|
156
|
-
return false unless comment = @chunk[COMMENT, 1]
|
157
|
-
@line += comment.scan(MULTILINER).length
|
158
|
-
token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER))
|
159
|
-
token("\n", "\n")
|
160
|
-
@i += comment.length
|
161
|
-
end
|
162
|
-
|
163
|
-
# Record tokens for indentation differing from the previous line.
|
164
|
-
def indent_token
|
165
|
-
return false unless indent = @chunk[MULTI_DENT, 1]
|
166
|
-
@line += indent.scan(MULTILINER).size
|
167
|
-
@i += indent.size
|
168
|
-
next_character = @chunk[MULTI_DENT, 4]
|
169
|
-
no_newlines = next_character == '.' || (last_value.to_s.match(NO_NEWLINE) && @tokens[-2][0] != '.' && !last_value.match(CODE))
|
170
|
-
return suppress_newlines(indent) if no_newlines
|
171
|
-
size = indent.scan(LAST_DENT).last.last.length
|
172
|
-
return newline_token(indent) if size == @indent
|
173
|
-
if size > @indent
|
174
|
-
token(:INDENT, size - @indent)
|
175
|
-
@indents << (size - @indent)
|
176
|
-
else
|
177
|
-
outdent_token(@indent - size)
|
178
|
-
end
|
179
|
-
@indent = size
|
180
|
-
end
|
181
|
-
|
182
|
-
# Record an oudent token or tokens, if we're moving back inwards past
|
183
|
-
# multiple recorded indents.
|
184
|
-
def outdent_token(move_out)
|
185
|
-
while move_out > 0 && !@indents.empty?
|
186
|
-
last_indent = @indents.pop
|
187
|
-
token(:OUTDENT, last_indent)
|
188
|
-
move_out -= last_indent
|
189
|
-
end
|
190
|
-
token("\n", "\n")
|
191
|
-
end
|
192
|
-
|
193
|
-
# Matches and consumes non-meaningful whitespace.
|
194
|
-
def whitespace_token
|
195
|
-
return false unless whitespace = @chunk[WHITESPACE, 1]
|
196
|
-
@spaced = last_value
|
197
|
-
@i += whitespace.length
|
198
|
-
end
|
199
|
-
|
200
|
-
# Multiple newlines get merged together.
|
201
|
-
# Use a trailing \ to escape newlines.
|
202
|
-
def newline_token(newlines)
|
203
|
-
token("\n", "\n") unless last_value == "\n"
|
204
|
-
true
|
205
|
-
end
|
206
|
-
|
207
|
-
# Tokens to explicitly escape newlines are removed once their job is done.
|
208
|
-
def suppress_newlines(newlines)
|
209
|
-
@tokens.pop if last_value == "\\"
|
210
|
-
true
|
211
|
-
end
|
212
|
-
|
213
|
-
# We treat all other single characters as a token. Eg.: ( ) , . !
|
214
|
-
# Multi-character operators are also literal tokens, so that Racc can assign
|
215
|
-
# the proper order of operations.
|
216
|
-
def literal_token
|
217
|
-
value = @chunk[OPERATOR, 1]
|
218
|
-
tag_parameters if value && value.match(CODE)
|
219
|
-
value ||= @chunk[0,1]
|
220
|
-
tag = value.match(ASSIGNMENT) ? :ASSIGN : value
|
221
|
-
if !@spaced.equal?(last_value) && CALLABLE.include?(last_tag)
|
222
|
-
tag = :CALL_START if value == '('
|
223
|
-
tag = :INDEX_START if value == '['
|
224
|
-
end
|
225
|
-
token(tag, value)
|
226
|
-
@i += value.length
|
227
|
-
end
|
228
|
-
|
229
|
-
# Helpers ==========================================================
|
230
|
-
|
231
|
-
# Add a token to the results, taking note of the line number.
|
232
|
-
def token(tag, value)
|
233
|
-
@tokens << [tag, Value.new(value, @line)]
|
234
|
-
end
|
235
|
-
|
236
|
-
# Peek at the previous token's value.
|
237
|
-
def last_value
|
238
|
-
@tokens.last && @tokens.last[1]
|
239
|
-
end
|
240
|
-
|
241
|
-
# Peek at the previous token's tag.
|
242
|
-
def last_tag
|
243
|
-
@tokens.last && @tokens.last[0]
|
244
|
-
end
|
245
|
-
|
246
|
-
# A source of ambiguity in our grammar was parameter lists in function
|
247
|
-
# definitions (as opposed to argument lists in function calls). Tag
|
248
|
-
# parameter identifiers in order to avoid this. Also, parameter lists can
|
249
|
-
# make use of splats.
|
250
|
-
def tag_parameters
|
251
|
-
return if last_tag != ')'
|
252
|
-
i = 0
|
253
|
-
loop do
|
254
|
-
i -= 1
|
255
|
-
tok = @tokens[i]
|
256
|
-
return if !tok
|
257
|
-
case tok[0]
|
258
|
-
when :IDENTIFIER then tok[0] = :PARAM
|
259
|
-
when ')' then tok[0] = :PARAM_END
|
260
|
-
when '(' then return tok[0] = :PARAM_START
|
261
|
-
end
|
262
|
-
end
|
263
|
-
end
|
264
|
-
|
265
|
-
# Close up all remaining open blocks. IF the first token is an indent,
|
266
|
-
# axe it.
|
267
|
-
def close_indentation
|
268
|
-
outdent_token(@indent)
|
269
|
-
end
|
270
|
-
|
271
|
-
end
|
272
|
-
end
|