coffee-script 0.3.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,363 +0,0 @@
1
- (function(){
2
- var ASSIGNMENT, CALLABLE, CODE, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, KEYWORDS, LAST_DENT, LAST_DENTS, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, lex;
3
- Rewriter = require('./rewriter').Rewriter;
4
- // The lexer reads a stream of CoffeeScript and divvys it up into tagged
5
- // tokens. A minor bit of the ambiguity in the grammar has been avoided by
6
- // pushing some extra smarts into the Lexer.
7
- exports.Lexer = (lex = function lex() { });
8
- // Constants ============================================================
9
- // The list of keywords passed verbatim to the parser.
10
- KEYWORDS = ["if", "else", "then", "unless", "true", "false", "yes", "no", "on", "off", "and", "or", "is", "isnt", "not", "new", "return", "arguments", "try", "catch", "finally", "throw", "break", "continue", "for", "in", "of", "by", "where", "while", "delete", "instanceof", "typeof", "switch", "when", "super", "extends"];
11
- // Token matching regexes.
12
- IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/;
13
- NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
14
- STRING = /^(""|''|"([\s\S]*?)([^\\]|\\\\)"|'([\s\S]*?)([^\\]|\\\\)')/;
15
- HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
16
- JS = /^(``|`([\s\S]*?)([^\\]|\\\\)`)/;
17
- OPERATOR = /^([+\*&|\/\-%=<>:!?]+)/;
18
- WHITESPACE = /^([ \t]+)/;
19
- COMMENT = /^(((\n?[ \t]*)?#.*$)+)/;
20
- CODE = /^((-|=)>)/;
21
- REGEX = /^(\/(.*?)([^\\]|\\\\)\/[imgy]{0,4})/;
22
- MULTI_DENT = /^((\n([ \t]*))+)(\.)?/;
23
- LAST_DENTS = /\n([ \t]*)/g;
24
- LAST_DENT = /\n([ \t]*)/;
25
- ASSIGNMENT = /^(:|=)$/;
26
- // Token cleaning regexes.
27
- JS_CLEANER = /(^`|`$)/g;
28
- MULTILINER = /\n/g;
29
- STRING_NEWLINES = /\n[ \t]*/g;
30
- COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/mg;
31
- NO_NEWLINE = /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/;
32
- HEREDOC_INDENT = /^[ \t]+/g;
33
- // Tokens which a regular expression will never immediately follow, but which
34
- // a division operator might.
35
- // See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
36
- NOT_REGEX = ['IDENTIFIER', 'NUMBER', 'REGEX', 'STRING', ')', '++', '--', ']', '}', 'FALSE', 'NULL', 'TRUE'];
37
- // Tokens which could legitimately be invoked or indexed.
38
- CALLABLE = ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING'];
39
- // Scan by attempting to match tokens one character at a time. Slow and steady.
40
- lex.prototype.tokenize = function tokenize(code) {
41
- this.code = code;
42
- // Cleanup code by remove extra line breaks, TODO: chomp
43
- this.i = 0;
44
- // Current character position we're parsing
45
- this.line = 1;
46
- // The current line.
47
- this.indent = 0;
48
- // The current indent level.
49
- this.indents = [];
50
- // The stack of all indent levels we are currently within.
51
- this.tokens = [];
52
- // Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
53
- this.spaced = null;
54
- // The last token that has a space following it.
55
- while (this.i < this.code.length) {
56
- this.chunk = this.code.slice(this.i);
57
- this.extract_next_token();
58
- }
59
- this.close_indentation();
60
- return (new Rewriter()).rewrite(this.tokens);
61
- };
62
- // At every position, run through this list of attempted matches,
63
- // short-circuiting if any of them succeed.
64
- lex.prototype.extract_next_token = function extract_next_token() {
65
- if (this.identifier_token()) {
66
- return null;
67
- }
68
- if (this.number_token()) {
69
- return null;
70
- }
71
- if (this.heredoc_token()) {
72
- return null;
73
- }
74
- if (this.string_token()) {
75
- return null;
76
- }
77
- if (this.js_token()) {
78
- return null;
79
- }
80
- if (this.regex_token()) {
81
- return null;
82
- }
83
- if (this.indent_token()) {
84
- return null;
85
- }
86
- if (this.comment_token()) {
87
- return null;
88
- }
89
- if (this.whitespace_token()) {
90
- return null;
91
- }
92
- return this.literal_token();
93
- };
94
- // Tokenizers ==========================================================
95
- // Matches identifying literals: variables, keywords, method names, etc.
96
- lex.prototype.identifier_token = function identifier_token() {
97
- var id, tag;
98
- if (!((id = this.match(IDENTIFIER, 1)))) {
99
- return false;
100
- }
101
- // Keywords are special identifiers tagged with their own name,
102
- // 'if' will result in an ['IF', "if"] token.
103
- tag = KEYWORDS.indexOf(id) >= 0 ? id.toUpperCase() : 'IDENTIFIER';
104
- if (tag === 'WHEN' && (this.tag() === 'OUTDENT' || this.tag() === 'INDENT')) {
105
- tag = 'LEADING_WHEN';
106
- }
107
- if (tag === 'IDENTIFIER' && this.value() === '::') {
108
- this.tag(-1, 'PROTOTYPE_ACCESS');
109
- }
110
- if (tag === 'IDENTIFIER' && this.value() === '.' && !(this.value(-2) === '.')) {
111
- if (this.tag(-2) === '?') {
112
- this.tag(-1, 'SOAK_ACCESS');
113
- this.tokens.splice(-2, 1);
114
- } else {
115
- this.tag(-1, 'PROPERTY_ACCESS');
116
- }
117
- }
118
- this.token(tag, id);
119
- this.i += id.length;
120
- return true;
121
- };
122
- // Matches numbers, including decimals, hex, and exponential notation.
123
- lex.prototype.number_token = function number_token() {
124
- var number;
125
- if (!((number = this.match(NUMBER, 1)))) {
126
- return false;
127
- }
128
- this.token('NUMBER', number);
129
- this.i += number.length;
130
- return true;
131
- };
132
- // Matches strings, including multi-line strings.
133
- lex.prototype.string_token = function string_token() {
134
- var escaped, string;
135
- if (!((string = this.match(STRING, 1)))) {
136
- return false;
137
- }
138
- escaped = string.replace(STRING_NEWLINES, " \\\n");
139
- this.token('STRING', escaped);
140
- this.line += this.count(string, "\n");
141
- this.i += string.length;
142
- return true;
143
- };
144
- // Matches heredocs, adjusting indentation to the correct level.
145
- lex.prototype.heredoc_token = function heredoc_token() {
146
- var doc, indent, match;
147
- if (!((match = this.chunk.match(HEREDOC)))) {
148
- return false;
149
- }
150
- doc = match[2] || match[4];
151
- indent = doc.match(HEREDOC_INDENT).sort()[0];
152
- doc = doc.replace(new RegExp("^" + indent, 'g'), '').replace(MULTILINER, "\\n").replace('"', '\\"');
153
- this.token('STRING', '"' + doc + '"');
154
- this.line += this.count(match[1], "\n");
155
- this.i += match[1].length;
156
- return true;
157
- };
158
- // Matches interpolated JavaScript.
159
- lex.prototype.js_token = function js_token() {
160
- var script;
161
- if (!((script = this.match(JS, 1)))) {
162
- return false;
163
- }
164
- this.token('JS', script.replace(JS_CLEANER, ''));
165
- this.i += script.length;
166
- return true;
167
- };
168
- // Matches regular expression literals.
169
- lex.prototype.regex_token = function regex_token() {
170
- var regex;
171
- if (!((regex = this.match(REGEX, 1)))) {
172
- return false;
173
- }
174
- if (NOT_REGEX.indexOf(this.tag()) >= 0) {
175
- return false;
176
- }
177
- this.token('REGEX', regex);
178
- this.i += regex.length;
179
- return true;
180
- };
181
- // Matches and conumes comments.
182
- lex.prototype.comment_token = function comment_token() {
183
- var comment;
184
- if (!((comment = this.match(COMMENT, 1)))) {
185
- return false;
186
- }
187
- this.line += comment.match(MULTILINER).length;
188
- this.token('COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER));
189
- this.token('TERMINATOR', "\n");
190
- this.i += comment.length;
191
- return true;
192
- };
193
- // Record tokens for indentation differing from the previous line.
194
- lex.prototype.indent_token = function indent_token() {
195
- var diff, indent, next_character, no_newlines, size;
196
- if (!((indent = this.match(MULTI_DENT, 1)))) {
197
- return false;
198
- }
199
- this.line += indent.match(MULTILINER).length;
200
- this.i += indent.length;
201
- next_character = this.chunk.match(MULTI_DENT)[4];
202
- no_newlines = next_character === '.' || (this.value().match(NO_NEWLINE) && this.tokens[this.tokens.length - 2][0] !== '.' && !this.value().match(CODE));
203
- if (no_newlines) {
204
- return this.suppress_newlines(indent);
205
- }
206
- size = indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length;
207
- if (size === this.indent) {
208
- return this.newline_token(indent);
209
- }
210
- if (size > this.indent) {
211
- diff = size - this.indent;
212
- this.token('INDENT', diff);
213
- this.indents.push(diff);
214
- } else {
215
- this.outdent_token(this.indent - size);
216
- }
217
- this.indent = size;
218
- return true;
219
- };
220
- // Record an oudent token or tokens, if we're moving back inwards past
221
- // multiple recorded indents.
222
- lex.prototype.outdent_token = function outdent_token(move_out) {
223
- var last_indent;
224
- while (move_out > 0 && this.indents.length) {
225
- last_indent = this.indents.pop();
226
- this.token('OUTDENT', last_indent);
227
- move_out -= last_indent;
228
- }
229
- this.token('TERMINATOR', "\n");
230
- return true;
231
- };
232
- // Matches and consumes non-meaningful whitespace.
233
- lex.prototype.whitespace_token = function whitespace_token() {
234
- var space;
235
- if (!((space = this.match(WHITESPACE, 1)))) {
236
- return false;
237
- }
238
- this.spaced = this.value();
239
- this.i += space.length;
240
- return true;
241
- };
242
- // Multiple newlines get merged together.
243
- // Use a trailing \ to escape newlines.
244
- lex.prototype.newline_token = function newline_token(newlines) {
245
- if (!(this.value() === "\n")) {
246
- this.token('TERMINATOR', "\n");
247
- }
248
- return true;
249
- };
250
- // Tokens to explicitly escape newlines are removed once their job is done.
251
- lex.prototype.suppress_newlines = function suppress_newlines(newlines) {
252
- if (this.value() === "\\") {
253
- this.tokens.pop();
254
- }
255
- return true;
256
- };
257
- // We treat all other single characters as a token. Eg.: ( ) , . !
258
- // Multi-character operators are also literal tokens, so that Racc can assign
259
- // the proper order of operations.
260
- lex.prototype.literal_token = function literal_token() {
261
- var match, tag, value;
262
- match = this.chunk.match(OPERATOR);
263
- value = match && match[1];
264
- if (value && value.match(CODE)) {
265
- this.tag_parameters();
266
- }
267
- value = value || this.chunk.substr(0, 1);
268
- tag = value.match(ASSIGNMENT) ? 'ASSIGN' : value;
269
- if (value === ';') {
270
- tag = 'TERMINATOR';
271
- }
272
- if (this.value() !== this.spaced && CALLABLE.indexOf(this.tag()) >= 0) {
273
- if (value === '(') {
274
- tag = 'CALL_START';
275
- }
276
- if (value === '[') {
277
- tag = 'INDEX_START';
278
- }
279
- }
280
- this.token(tag, value);
281
- this.i += value.length;
282
- return true;
283
- };
284
- // Helpers =============================================================
285
- // Add a token to the results, taking note of the line number.
286
- lex.prototype.token = function token(tag, value) {
287
- return this.tokens.push([tag, value]);
288
- // this.tokens.push([tag, Value.new(value, @line)])
289
- };
290
- // Look at a tag in the current token stream.
291
- lex.prototype.tag = function tag(index, tag) {
292
- var tok;
293
- if (!((tok = this.tokens[this.tokens.length - (index || 1)]))) {
294
- return null;
295
- }
296
- if ((typeof tag !== "undefined" && tag !== null)) {
297
- return (tok[0] = tag);
298
- }
299
- return tok[0];
300
- };
301
- // Look at a value in the current token stream.
302
- lex.prototype.value = function value(index, val) {
303
- var tok;
304
- if (!((tok = this.tokens[this.tokens.length - (index || 1)]))) {
305
- return null;
306
- }
307
- if ((typeof val !== "undefined" && val !== null)) {
308
- return (tok[1] = val);
309
- }
310
- return tok[1];
311
- };
312
- // Count the occurences of a character in a string.
313
- lex.prototype.count = function count(string, letter) {
314
- var num, pos;
315
- num = 0;
316
- pos = string.indexOf(letter);
317
- while (pos !== -1) {
318
- count += 1;
319
- pos = string.indexOf(letter, pos + 1);
320
- }
321
- return count;
322
- };
323
- // Attempt to match a string against the current chunk, returning the indexed
324
- // match.
325
- lex.prototype.match = function match(regex, index) {
326
- var m;
327
- if (!((m = this.chunk.match(regex)))) {
328
- return false;
329
- }
330
- return m ? m[index] : false;
331
- };
332
- // A source of ambiguity in our grammar was parameter lists in function
333
- // definitions (as opposed to argument lists in function calls). Tag
334
- // parameter identifiers in order to avoid this. Also, parameter lists can
335
- // make use of splats.
336
- lex.prototype.tag_parameters = function tag_parameters() {
337
- var i, tok;
338
- if (this.tag() !== ')') {
339
- return null;
340
- }
341
- i = 0;
342
- while (true) {
343
- i += 1;
344
- tok = this.tokens[this.tokens.length - i];
345
- if (!tok) {
346
- return null;
347
- }
348
- if (tok[0] === 'IDENTIFIER') {
349
- tok[0] = 'PARAM';
350
- } else if (tok[0] === ')') {
351
- tok[0] = 'PARAM_END';
352
- } else if (tok[0] === '(') {
353
- return (tok[0] = 'PARAM_START');
354
- }
355
- }
356
- return true;
357
- };
358
- // Close up all remaining open blocks. IF the first token is an indent,
359
- // axe it.
360
- lex.prototype.close_indentation = function close_indentation() {
361
- return this.outdent_token(this.indent);
362
- };
363
- })();
@@ -1,272 +0,0 @@
1
- module CoffeeScript
2
-
3
- # The lexer reads a stream of CoffeeScript and divvys it up into tagged
4
- # tokens. A minor bit of the ambiguity in the grammar has been avoided by
5
- # pushing some extra smarts into the Lexer.
6
- class Lexer
7
-
8
- # The list of keywords passed verbatim to the parser.
9
- KEYWORDS = ["if", "else", "then", "unless",
10
- "true", "false", "yes", "no", "on", "off",
11
- "and", "or", "is", "isnt", "not",
12
- "new", "return",
13
- "try", "catch", "finally", "throw",
14
- "break", "continue",
15
- "for", "in", "of", "by", "where", "while",
16
- "delete", "instanceof", "typeof",
17
- "switch", "when",
18
- "super", "extends"]
19
-
20
- # Token matching regexes.
21
- IDENTIFIER = /\A([a-zA-Z$_](\w|\$)*)/
22
- NUMBER = /\A(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
23
- STRING = /\A(""|''|"(.*?)([^\\]|\\\\)"|'(.*?)([^\\]|\\\\)')/m
24
- HEREDOC = /\A("{6}|'{6}|"{3}\n?(.*?)\n?([ \t]*)"{3}|'{3}\n?(.*?)\n?([ \t]*)'{3})/m
25
- JS = /\A(``|`(.*?)([^\\]|\\\\)`)/m
26
- OPERATOR = /\A([+\*&|\/\-%=<>:!?]+)/
27
- WHITESPACE = /\A([ \t]+)/
28
- COMMENT = /\A(((\n?[ \t]*)?#.*$)+)/
29
- CODE = /\A((-|=)>)/
30
- REGEX = /\A(\/(.*?)([^\\]|\\\\)\/[imgy]{0,4})/
31
- MULTI_DENT = /\A((\n([ \t]*))+)(\.)?/
32
- LAST_DENT = /\n([ \t]*)/
33
- ASSIGNMENT = /\A(:|=)\Z/
34
-
35
- # Token cleaning regexes.
36
- JS_CLEANER = /(\A`|`\Z)/
37
- MULTILINER = /\n/
38
- STRING_NEWLINES = /\n[ \t]*/
39
- COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/
40
- NO_NEWLINE = /\A([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)\Z/
41
- HEREDOC_INDENT = /^[ \t]+/
42
-
43
- # Tokens which a regular expression will never immediately follow, but which
44
- # a division operator might.
45
- # See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
46
- NOT_REGEX = [
47
- :IDENTIFIER, :NUMBER, :REGEX, :STRING,
48
- ')', '++', '--', ']', '}',
49
- :FALSE, :NULL, :TRUE
50
- ]
51
-
52
- # Tokens which could legitimately be invoked or indexed.
53
- CALLABLE = [:IDENTIFIER, :SUPER, ')', ']', '}', :STRING]
54
-
55
- # Scan by attempting to match tokens one character at a time. Slow and steady.
56
- def tokenize(code)
57
- @code = code.chomp # Cleanup code by remove extra line breaks
58
- @i = 0 # Current character position we're parsing
59
- @line = 1 # The current line.
60
- @indent = 0 # The current indent level.
61
- @indents = [] # The stack of all indent levels we are currently within.
62
- @tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
63
- @spaced = nil # The last value that has a space following it.
64
- while @i < @code.length
65
- @chunk = @code[@i..-1]
66
- extract_next_token
67
- end
68
- puts "original stream: #{@tokens.inspect}" if ENV['VERBOSE']
69
- close_indentation
70
- Rewriter.new.rewrite(@tokens)
71
- end
72
-
73
- # At every position, run through this list of attempted matches,
74
- # short-circuiting if any of them succeed.
75
- def extract_next_token
76
- return if identifier_token
77
- return if number_token
78
- return if heredoc_token
79
- return if string_token
80
- return if js_token
81
- return if regex_token
82
- return if indent_token
83
- return if comment_token
84
- return if whitespace_token
85
- return literal_token
86
- end
87
-
88
- # Tokenizers ==========================================================
89
-
90
- # Matches identifying literals: variables, keywords, method names, etc.
91
- def identifier_token
92
- return false unless identifier = @chunk[IDENTIFIER, 1]
93
- # Keywords are special identifiers tagged with their own name,
94
- # 'if' will result in an [:IF, "if"] token.
95
- tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
96
- tag = :LEADING_WHEN if tag == :WHEN && [:OUTDENT, :INDENT, "\n"].include?(last_tag)
97
- @tokens[-1][0] = :PROTOTYPE_ACCESS if tag == :IDENTIFIER && last_value == '::'
98
- if tag == :IDENTIFIER && last_value == '.' && !(@tokens[-2] && @tokens[-2][1] == '.')
99
- if @tokens[-2][0] == "?"
100
- @tokens[-1][0] = :SOAK_ACCESS
101
- @tokens.delete_at(-2)
102
- else
103
- @tokens[-1][0] = :PROPERTY_ACCESS
104
- end
105
- end
106
- token(tag, identifier)
107
- @i += identifier.length
108
- end
109
-
110
- # Matches numbers, including decimals, hex, and exponential notation.
111
- def number_token
112
- return false unless number = @chunk[NUMBER, 1]
113
- token(:NUMBER, number)
114
- @i += number.length
115
- end
116
-
117
- # Matches strings, including multi-line strings.
118
- def string_token
119
- return false unless string = @chunk[STRING, 1]
120
- escaped = string.gsub(STRING_NEWLINES, " \\\n")
121
- token(:STRING, escaped)
122
- @line += string.count("\n")
123
- @i += string.length
124
- end
125
-
126
- # Matches heredocs, adjusting indentation to the correct level.
127
- def heredoc_token
128
- return false unless match = @chunk.match(HEREDOC)
129
- doc = match[2] || match[4]
130
- indent = doc.scan(HEREDOC_INDENT).min
131
- doc.gsub!(/^#{indent}/, "")
132
- doc.gsub!("\n", "\\n")
133
- doc.gsub!('"', '\\"')
134
- token(:STRING, "\"#{doc}\"")
135
- @line += match[1].count("\n")
136
- @i += match[1].length
137
- end
138
-
139
- # Matches interpolated JavaScript.
140
- def js_token
141
- return false unless script = @chunk[JS, 1]
142
- token(:JS, script.gsub(JS_CLEANER, ''))
143
- @i += script.length
144
- end
145
-
146
- # Matches regular expression literals.
147
- def regex_token
148
- return false unless regex = @chunk[REGEX, 1]
149
- return false if NOT_REGEX.include?(last_tag)
150
- token(:REGEX, regex)
151
- @i += regex.length
152
- end
153
-
154
- # Matches and consumes comments.
155
- def comment_token
156
- return false unless comment = @chunk[COMMENT, 1]
157
- @line += comment.scan(MULTILINER).length
158
- token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER))
159
- token("\n", "\n")
160
- @i += comment.length
161
- end
162
-
163
- # Record tokens for indentation differing from the previous line.
164
- def indent_token
165
- return false unless indent = @chunk[MULTI_DENT, 1]
166
- @line += indent.scan(MULTILINER).size
167
- @i += indent.size
168
- next_character = @chunk[MULTI_DENT, 4]
169
- no_newlines = next_character == '.' || (last_value.to_s.match(NO_NEWLINE) && @tokens[-2][0] != '.' && !last_value.match(CODE))
170
- return suppress_newlines(indent) if no_newlines
171
- size = indent.scan(LAST_DENT).last.last.length
172
- return newline_token(indent) if size == @indent
173
- if size > @indent
174
- token(:INDENT, size - @indent)
175
- @indents << (size - @indent)
176
- else
177
- outdent_token(@indent - size)
178
- end
179
- @indent = size
180
- end
181
-
182
- # Record an oudent token or tokens, if we're moving back inwards past
183
- # multiple recorded indents.
184
- def outdent_token(move_out)
185
- while move_out > 0 && !@indents.empty?
186
- last_indent = @indents.pop
187
- token(:OUTDENT, last_indent)
188
- move_out -= last_indent
189
- end
190
- token("\n", "\n")
191
- end
192
-
193
- # Matches and consumes non-meaningful whitespace.
194
- def whitespace_token
195
- return false unless whitespace = @chunk[WHITESPACE, 1]
196
- @spaced = last_value
197
- @i += whitespace.length
198
- end
199
-
200
- # Multiple newlines get merged together.
201
- # Use a trailing \ to escape newlines.
202
- def newline_token(newlines)
203
- token("\n", "\n") unless last_value == "\n"
204
- true
205
- end
206
-
207
- # Tokens to explicitly escape newlines are removed once their job is done.
208
- def suppress_newlines(newlines)
209
- @tokens.pop if last_value == "\\"
210
- true
211
- end
212
-
213
- # We treat all other single characters as a token. Eg.: ( ) , . !
214
- # Multi-character operators are also literal tokens, so that Racc can assign
215
- # the proper order of operations.
216
- def literal_token
217
- value = @chunk[OPERATOR, 1]
218
- tag_parameters if value && value.match(CODE)
219
- value ||= @chunk[0,1]
220
- tag = value.match(ASSIGNMENT) ? :ASSIGN : value
221
- if !@spaced.equal?(last_value) && CALLABLE.include?(last_tag)
222
- tag = :CALL_START if value == '('
223
- tag = :INDEX_START if value == '['
224
- end
225
- token(tag, value)
226
- @i += value.length
227
- end
228
-
229
- # Helpers ==========================================================
230
-
231
- # Add a token to the results, taking note of the line number.
232
- def token(tag, value)
233
- @tokens << [tag, Value.new(value, @line)]
234
- end
235
-
236
- # Peek at the previous token's value.
237
- def last_value
238
- @tokens.last && @tokens.last[1]
239
- end
240
-
241
- # Peek at the previous token's tag.
242
- def last_tag
243
- @tokens.last && @tokens.last[0]
244
- end
245
-
246
- # A source of ambiguity in our grammar was parameter lists in function
247
- # definitions (as opposed to argument lists in function calls). Tag
248
- # parameter identifiers in order to avoid this. Also, parameter lists can
249
- # make use of splats.
250
- def tag_parameters
251
- return if last_tag != ')'
252
- i = 0
253
- loop do
254
- i -= 1
255
- tok = @tokens[i]
256
- return if !tok
257
- case tok[0]
258
- when :IDENTIFIER then tok[0] = :PARAM
259
- when ')' then tok[0] = :PARAM_END
260
- when '(' then return tok[0] = :PARAM_START
261
- end
262
- end
263
- end
264
-
265
- # Close up all remaining open blocks. IF the first token is an indent,
266
- # axe it.
267
- def close_indentation
268
- outdent_token(@indent)
269
- end
270
-
271
- end
272
- end