style-script 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,363 @@
1
+ (function(){
2
+ var ASSIGNMENT, CALLABLE, CODE, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, KEYWORDS, LAST_DENT, LAST_DENTS, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, lex;
3
+ Rewriter = require('./rewriter').Rewriter;
4
+ // The lexer reads a stream of StyleScript and divvys it up into tagged
5
+ // tokens. A minor bit of the ambiguity in the grammar has been avoided by
6
+ // pushing some extra smarts into the Lexer.
7
+ exports.Lexer = (lex = function lex() { });
8
+ // Constants ============================================================
9
+ // The list of keywords passed verbatim to the parser.
10
+ KEYWORDS = ["if", "else", "then", "unless", "true", "false", "yes", "no", "on", "off", "and", "or", "is", "isnt", "not", "new", "return", "arguments", "try", "catch", "finally", "throw", "break", "continue", "for", "in", "of", "by", "where", "while", "delete", "instanceof", "typeof", "switch", "when", "super", "extends"];
11
+ // Token matching regexes.
12
+ IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/;
13
+ NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
14
+ STRING = /^(""|''|"([\s\S]*?)([^\\]|\\\\)"|'([\s\S]*?)([^\\]|\\\\)')/;
15
+ HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
16
+ JS = /^(``|`([\s\S]*?)([^\\]|\\\\)`)/;
17
+ OPERATOR = /^([+\*&|\/\-%=<>:!?]+)/;
18
+ WHITESPACE = /^([ \t]+)/;
19
+ COMMENT = /^(((\n?[ \t]*)?#[^\n]*)+)/;
20
+ CODE = /^((-|=)>)/;
21
+ REGEX = /^(\/(.*?)([^\\]|\\\\)\/[imgy]{0,4})/;
22
+ MULTI_DENT = /^((\n([ \t]*))+)(\.)?/;
23
+ LAST_DENTS = /\n([ \t]*)/g;
24
+ LAST_DENT = /\n([ \t]*)/;
25
+ ASSIGNMENT = /^(:|=)$/;
26
+ // Token cleaning regexes.
27
+ JS_CLEANER = /(^`|`$)/g;
28
+ MULTILINER = /\n/g;
29
+ STRING_NEWLINES = /\n[ \t]*/g;
30
+ COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/mg;
31
+ NO_NEWLINE = /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/;
32
+ HEREDOC_INDENT = /^[ \t]+/g;
33
+ // Tokens which a regular expression will never immediately follow, but which
34
+ // a division operator might.
35
+ // See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
36
+ NOT_REGEX = ['IDENTIFIER', 'NUMBER', 'REGEX', 'STRING', ')', '++', '--', ']', '}', 'FALSE', 'NULL', 'TRUE'];
37
+ // Tokens which could legitimately be invoked or indexed.
38
+ CALLABLE = ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING'];
39
+ // Scan by attempting to match tokens one character at a time. Slow and steady.
40
+ lex.prototype.tokenize = function tokenize(code) {
41
+ this.code = code;
42
+ // Cleanup code by remove extra line breaks, TODO: chomp
43
+ this.i = 0;
44
+ // Current character position we're parsing
45
+ this.line = 1;
46
+ // The current line.
47
+ this.indent = 0;
48
+ // The current indent level.
49
+ this.indents = [];
50
+ // The stack of all indent levels we are currently within.
51
+ this.tokens = [];
52
+ // Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
53
+ this.spaced = null;
54
+ // The last token that has a space following it.
55
+ while (this.i < this.code.length) {
56
+ this.chunk = this.code.slice(this.i);
57
+ this.extract_next_token();
58
+ }
59
+ this.close_indentation();
60
+ return (new Rewriter()).rewrite(this.tokens);
61
+ };
62
+ // At every position, run through this list of attempted matches,
63
+ // short-circuiting if any of them succeed.
64
+ lex.prototype.extract_next_token = function extract_next_token() {
65
+ if (this.identifier_token()) {
66
+ return null;
67
+ }
68
+ if (this.number_token()) {
69
+ return null;
70
+ }
71
+ if (this.heredoc_token()) {
72
+ return null;
73
+ }
74
+ if (this.string_token()) {
75
+ return null;
76
+ }
77
+ if (this.js_token()) {
78
+ return null;
79
+ }
80
+ if (this.regex_token()) {
81
+ return null;
82
+ }
83
+ if (this.indent_token()) {
84
+ return null;
85
+ }
86
+ if (this.comment_token()) {
87
+ return null;
88
+ }
89
+ if (this.whitespace_token()) {
90
+ return null;
91
+ }
92
+ return this.literal_token();
93
+ };
94
+ // Tokenizers ==========================================================
95
+ // Matches identifying literals: variables, keywords, method names, etc.
96
+ lex.prototype.identifier_token = function identifier_token() {
97
+ var id, tag;
98
+ if (!((id = this.match(IDENTIFIER, 1)))) {
99
+ return false;
100
+ }
101
+ // Keywords are special identifiers tagged with their own name,
102
+ // 'if' will result in an ['IF', "if"] token.
103
+ tag = KEYWORDS.indexOf(id) >= 0 ? id.toUpperCase() : 'IDENTIFIER';
104
+ if (tag === 'WHEN' && (this.tag() === 'OUTDENT' || this.tag() === 'INDENT')) {
105
+ tag = 'LEADING_WHEN';
106
+ }
107
+ if (tag === 'IDENTIFIER' && this.value() === '::') {
108
+ this.tag(-1, 'PROTOTYPE_ACCESS');
109
+ }
110
+ if (tag === 'IDENTIFIER' && this.value() === '.' && !(this.value(2) === '.')) {
111
+ if (this.tag(2) === '?') {
112
+ this.tag(1, 'SOAK_ACCESS');
113
+ this.tokens.splice(-2, 1);
114
+ } else {
115
+ this.tag(1, 'PROPERTY_ACCESS');
116
+ }
117
+ }
118
+ this.token(tag, id);
119
+ this.i += id.length;
120
+ return true;
121
+ };
122
+ // Matches numbers, including decimals, hex, and exponential notation.
123
+ lex.prototype.number_token = function number_token() {
124
+ var number;
125
+ if (!((number = this.match(NUMBER, 1)))) {
126
+ return false;
127
+ }
128
+ this.token('NUMBER', number);
129
+ this.i += number.length;
130
+ return true;
131
+ };
132
+ // Matches strings, including multi-line strings.
133
+ lex.prototype.string_token = function string_token() {
134
+ var escaped, string;
135
+ if (!((string = this.match(STRING, 1)))) {
136
+ return false;
137
+ }
138
+ escaped = string.replace(STRING_NEWLINES, " \\\n");
139
+ this.token('STRING', escaped);
140
+ this.line += this.count(string, "\n");
141
+ this.i += string.length;
142
+ return true;
143
+ };
144
+ // Matches heredocs, adjusting indentation to the correct level.
145
+ lex.prototype.heredoc_token = function heredoc_token() {
146
+ var doc, indent, match;
147
+ if (!((match = this.chunk.match(HEREDOC)))) {
148
+ return false;
149
+ }
150
+ doc = match[2] || match[4];
151
+ indent = doc.match(HEREDOC_INDENT).sort()[0];
152
+ doc = doc.replace(new RegExp("^" + indent, 'g'), '').replace(MULTILINER, "\\n").replace('"', '\\"');
153
+ this.token('STRING', '"' + doc + '"');
154
+ this.line += this.count(match[1], "\n");
155
+ this.i += match[1].length;
156
+ return true;
157
+ };
158
+ // Matches interpolated JavaScript.
159
+ lex.prototype.js_token = function js_token() {
160
+ var script;
161
+ if (!((script = this.match(JS, 1)))) {
162
+ return false;
163
+ }
164
+ this.token('JS', script.replace(JS_CLEANER, ''));
165
+ this.i += script.length;
166
+ return true;
167
+ };
168
+ // Matches regular expression literals.
169
+ lex.prototype.regex_token = function regex_token() {
170
+ var regex;
171
+ if (!((regex = this.match(REGEX, 1)))) {
172
+ return false;
173
+ }
174
+ if (NOT_REGEX.indexOf(this.tag()) >= 0) {
175
+ return false;
176
+ }
177
+ this.token('REGEX', regex);
178
+ this.i += regex.length;
179
+ return true;
180
+ };
181
+ // Matches and conumes comments.
182
+ lex.prototype.comment_token = function comment_token() {
183
+ var comment;
184
+ if (!((comment = this.match(COMMENT, 1)))) {
185
+ return false;
186
+ }
187
+ this.line += (comment.match(MULTILINER) || []).length;
188
+ this.token('COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER));
189
+ this.token('TERMINATOR', "\n");
190
+ this.i += comment.length;
191
+ return true;
192
+ };
193
+ // Record tokens for indentation differing from the previous line.
194
+ lex.prototype.indent_token = function indent_token() {
195
+ var diff, indent, next_character, no_newlines, size;
196
+ if (!((indent = this.match(MULTI_DENT, 1)))) {
197
+ return false;
198
+ }
199
+ this.line += indent.match(MULTILINER).length;
200
+ this.i += indent.length;
201
+ next_character = this.chunk.match(MULTI_DENT)[4];
202
+ no_newlines = next_character === '.' || (this.value().match(NO_NEWLINE) && this.tokens[this.tokens.length - 2][0] !== '.' && !this.value().match(CODE));
203
+ if (no_newlines) {
204
+ return this.suppress_newlines(indent);
205
+ }
206
+ size = indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length;
207
+ if (size === this.indent) {
208
+ return this.newline_token(indent);
209
+ }
210
+ if (size > this.indent) {
211
+ diff = size - this.indent;
212
+ this.token('INDENT', diff);
213
+ this.indents.push(diff);
214
+ } else {
215
+ this.outdent_token(this.indent - size);
216
+ }
217
+ this.indent = size;
218
+ return true;
219
+ };
220
+ // Record an oudent token or tokens, if we're moving back inwards past
221
+ // multiple recorded indents.
222
+ lex.prototype.outdent_token = function outdent_token(move_out) {
223
+ var last_indent;
224
+ while (move_out > 0 && this.indents.length) {
225
+ last_indent = this.indents.pop();
226
+ this.token('OUTDENT', last_indent);
227
+ move_out -= last_indent;
228
+ }
229
+ this.token('TERMINATOR', "\n");
230
+ return true;
231
+ };
232
+ // Matches and consumes non-meaningful whitespace.
233
+ lex.prototype.whitespace_token = function whitespace_token() {
234
+ var space;
235
+ if (!((space = this.match(WHITESPACE, 1)))) {
236
+ return false;
237
+ }
238
+ this.spaced = this.value();
239
+ this.i += space.length;
240
+ return true;
241
+ };
242
+ // Multiple newlines get merged together.
243
+ // Use a trailing \ to escape newlines.
244
+ lex.prototype.newline_token = function newline_token(newlines) {
245
+ if (!(this.value() === "\n")) {
246
+ this.token('TERMINATOR', "\n");
247
+ }
248
+ return true;
249
+ };
250
+ // Tokens to explicitly escape newlines are removed once their job is done.
251
+ lex.prototype.suppress_newlines = function suppress_newlines(newlines) {
252
+ if (this.value() === "\\") {
253
+ this.tokens.pop();
254
+ }
255
+ return true;
256
+ };
257
+ // We treat all other single characters as a token. Eg.: ( ) , . !
258
+ // Multi-character operators are also literal tokens, so that Racc can assign
259
+ // the proper order of operations.
260
+ lex.prototype.literal_token = function literal_token() {
261
+ var match, tag, value;
262
+ match = this.chunk.match(OPERATOR);
263
+ value = match && match[1];
264
+ if (value && value.match(CODE)) {
265
+ this.tag_parameters();
266
+ }
267
+ value = value || this.chunk.substr(0, 1);
268
+ tag = value.match(ASSIGNMENT) ? 'ASSIGN' : value;
269
+ if (value === ';') {
270
+ tag = 'TERMINATOR';
271
+ }
272
+ if (this.value() !== this.spaced && CALLABLE.indexOf(this.tag()) >= 0) {
273
+ if (value === '(') {
274
+ tag = 'CALL_START';
275
+ }
276
+ if (value === '[') {
277
+ tag = 'INDEX_START';
278
+ }
279
+ }
280
+ this.token(tag, value);
281
+ this.i += value.length;
282
+ return true;
283
+ };
284
+ // Helpers =============================================================
285
+ // Add a token to the results, taking note of the line number.
286
+ lex.prototype.token = function token(tag, value) {
287
+ return this.tokens.push([tag, value]);
288
+ // this.tokens.push([tag, Value.new(value, @line)])
289
+ };
290
+ // Look at a tag in the current token stream.
291
+ lex.prototype.tag = function tag(index, tag) {
292
+ var tok;
293
+ if (!((tok = this.tokens[this.tokens.length - (index || 1)]))) {
294
+ return null;
295
+ }
296
+ if ((typeof tag !== "undefined" && tag !== null)) {
297
+ return (tok[0] = tag);
298
+ }
299
+ return tok[0];
300
+ };
301
+ // Look at a value in the current token stream.
302
+ lex.prototype.value = function value(index, val) {
303
+ var tok;
304
+ if (!((tok = this.tokens[this.tokens.length - (index || 1)]))) {
305
+ return null;
306
+ }
307
+ if ((typeof val !== "undefined" && val !== null)) {
308
+ return (tok[1] = val);
309
+ }
310
+ return tok[1];
311
+ };
312
+ // Count the occurences of a character in a string.
313
+ lex.prototype.count = function count(string, letter) {
314
+ var num, pos;
315
+ num = 0;
316
+ pos = string.indexOf(letter);
317
+ while (pos !== -1) {
318
+ count += 1;
319
+ pos = string.indexOf(letter, pos + 1);
320
+ }
321
+ return count;
322
+ };
323
+ // Attempt to match a string against the current chunk, returning the indexed
324
+ // match.
325
+ lex.prototype.match = function match(regex, index) {
326
+ var m;
327
+ if (!((m = this.chunk.match(regex)))) {
328
+ return false;
329
+ }
330
+ return m ? m[index] : false;
331
+ };
332
+ // A source of ambiguity in our grammar was parameter lists in function
333
+ // definitions (as opposed to argument lists in function calls). Tag
334
+ // parameter identifiers in order to avoid this. Also, parameter lists can
335
+ // make use of splats.
336
+ lex.prototype.tag_parameters = function tag_parameters() {
337
+ var i, tok;
338
+ if (this.tag() !== ')') {
339
+ return null;
340
+ }
341
+ i = 0;
342
+ while (true) {
343
+ i += 1;
344
+ tok = this.tokens[this.tokens.length - i];
345
+ if (!tok) {
346
+ return null;
347
+ }
348
+ if (tok[0] === 'IDENTIFIER') {
349
+ tok[0] = 'PARAM';
350
+ } else if (tok[0] === ')') {
351
+ tok[0] = 'PARAM_END';
352
+ } else if (tok[0] === '(') {
353
+ return (tok[0] = 'PARAM_START');
354
+ }
355
+ }
356
+ return true;
357
+ };
358
+ // Close up all remaining open blocks. IF the first token is an indent,
359
+ // axe it.
360
+ lex.prototype.close_indentation = function close_indentation() {
361
+ return this.outdent_token(this.indent);
362
+ };
363
+ })();
@@ -0,0 +1,272 @@
1
+ module StyleScript
2
+
3
+ # The lexer reads a stream of StyleScript and divvys it up into tagged
4
+ # tokens. A minor bit of the ambiguity in the grammar has been avoided by
5
+ # pushing some extra smarts into the Lexer.
6
+ class Lexer
7
+
8
+ # The list of keywords passed verbatim to the parser.
9
+ KEYWORDS = ["if", "else", "then", "unless", "until",
10
+ "true", "false", "yes", "no", "on", "off",
11
+ "and", "or", "is", "isnt", "not",
12
+ "new", "return",
13
+ "try", "catch", "finally", "throw",
14
+ "break", "continue",
15
+ "for", "in", "of", "by", "where", "while",
16
+ "delete", "instanceof", "typeof",
17
+ "switch", "when",
18
+ "super", "extends"]
19
+
20
+ # Token matching regexes.
21
+ IDENTIFIER = /\A([a-zA-Z$_](\w|\$)*)/
22
+ NUMBER = /\A(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
23
+ STRING = /\A(""|''|"(.*?)([^\\]|\\\\)"|'(.*?)([^\\]|\\\\)')/m
24
+ HEREDOC = /\A("{6}|'{6}|"{3}\n?(.*?)\n?([ \t]*)"{3}|'{3}\n?(.*?)\n?([ \t]*)'{3})/m
25
+ JS = /\A(``|`(.*?)([^\\]|\\\\)`)/m
26
+ OPERATOR = /\A([+\*&|\/\-%=<>:!?]+)/
27
+ WHITESPACE = /\A([ \t]+)/
28
+ COMMENT = /\A(((\n?[ \t]*)?#.*$)+)/
29
+ CODE = /\A((-|=)>)/
30
+ REGEX = /\A(\/(.*?)([^\\]|\\\\)\/[imgy]{0,4})/
31
+ MULTI_DENT = /\A((\n([ \t]*))+)(\.)?/
32
+ LAST_DENT = /\n([ \t]*)/
33
+ ASSIGNMENT = /\A(:|=)\Z/
34
+
35
+ # Token cleaning regexes.
36
+ JS_CLEANER = /(\A`|`\Z)/
37
+ MULTILINER = /\n/
38
+ STRING_NEWLINES = /\n[ \t]*/
39
+ COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/
40
+ NO_NEWLINE = /\A([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)\Z/
41
+ HEREDOC_INDENT = /^[ \t]+/
42
+
43
+ # Tokens which a regular expression will never immediately follow, but which
44
+ # a division operator might.
45
+ # See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
46
+ NOT_REGEX = [
47
+ :IDENTIFIER, :NUMBER, :REGEX, :STRING,
48
+ ')', '++', '--', ']', '}',
49
+ :FALSE, :NULL, :TRUE
50
+ ]
51
+
52
+ # Tokens which could legitimately be invoked or indexed.
53
+ CALLABLE = [:IDENTIFIER, :SUPER, ')', ']', '}', :STRING]
54
+
55
+ # Scan by attempting to match tokens one character at a time. Slow and steady.
56
+ def tokenize(code)
57
+ @code = code.chomp # Cleanup code by remove extra line breaks
58
+ @i = 0 # Current character position we're parsing
59
+ @line = 1 # The current line.
60
+ @indent = 0 # The current indent level.
61
+ @indents = [] # The stack of all indent levels we are currently within.
62
+ @tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
63
+ @spaced = nil # The last value that has a space following it.
64
+ while @i < @code.length
65
+ @chunk = @code[@i..-1]
66
+ extract_next_token
67
+ end
68
+ puts "original stream: #{@tokens.inspect}" if ENV['VERBOSE']
69
+ close_indentation
70
+ Rewriter.new.rewrite(@tokens)
71
+ end
72
+
73
+ # At every position, run through this list of attempted matches,
74
+ # short-circuiting if any of them succeed.
75
+ def extract_next_token
76
+ return if identifier_token
77
+ return if number_token
78
+ return if heredoc_token
79
+ return if string_token
80
+ return if js_token
81
+ return if regex_token
82
+ return if indent_token
83
+ return if comment_token
84
+ return if whitespace_token
85
+ return literal_token
86
+ end
87
+
88
+ # Tokenizers ==========================================================
89
+
90
+ # Matches identifying literals: variables, keywords, method names, etc.
91
+ def identifier_token
92
+ return false unless identifier = @chunk[IDENTIFIER, 1]
93
+ # Keywords are special identifiers tagged with their own name,
94
+ # 'if' will result in an [:IF, "if"] token.
95
+ tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
96
+ tag = :LEADING_WHEN if tag == :WHEN && [:OUTDENT, :INDENT, "\n"].include?(last_tag)
97
+ @tokens[-1][0] = :PROTOTYPE_ACCESS if tag == :IDENTIFIER && last_value == '::'
98
+ if tag == :IDENTIFIER && last_value == '.' && !(@tokens[-2] && @tokens[-2][1] == '.')
99
+ if @tokens[-2][0] == "?"
100
+ @tokens[-1][0] = :SOAK_ACCESS
101
+ @tokens.delete_at(-2)
102
+ else
103
+ @tokens[-1][0] = :PROPERTY_ACCESS
104
+ end
105
+ end
106
+ token(tag, identifier)
107
+ @i += identifier.length
108
+ end
109
+
110
+ # Matches numbers, including decimals, hex, and exponential notation.
111
+ def number_token
112
+ return false unless number = @chunk[NUMBER, 1]
113
+ token(:NUMBER, number)
114
+ @i += number.length
115
+ end
116
+
117
+ # Matches strings, including multi-line strings.
118
+ def string_token
119
+ return false unless string = @chunk[STRING, 1]
120
+ escaped = string.gsub(STRING_NEWLINES, " \\\n")
121
+ token(:STRING, escaped)
122
+ @line += string.count("\n")
123
+ @i += string.length
124
+ end
125
+
126
+ # Matches heredocs, adjusting indentation to the correct level.
127
+ def heredoc_token
128
+ return false unless match = @chunk.match(HEREDOC)
129
+ doc = match[2] || match[4]
130
+ indent = doc.scan(HEREDOC_INDENT).min
131
+ doc.gsub!(/^#{indent}/, "")
132
+ doc.gsub!("\n", "\\n")
133
+ doc.gsub!('"', '\\"')
134
+ token(:STRING, "\"#{doc}\"")
135
+ @line += match[1].count("\n")
136
+ @i += match[1].length
137
+ end
138
+
139
+ # Matches interpolated JavaScript.
140
+ def js_token
141
+ return false unless script = @chunk[JS, 1]
142
+ token(:JS, script.gsub(JS_CLEANER, ''))
143
+ @i += script.length
144
+ end
145
+
146
+ # Matches regular expression literals.
147
+ def regex_token
148
+ return false unless regex = @chunk[REGEX, 1]
149
+ return false if NOT_REGEX.include?(last_tag)
150
+ token(:REGEX, regex)
151
+ @i += regex.length
152
+ end
153
+
154
+ # Matches and consumes comments.
155
+ def comment_token
156
+ return false unless comment = @chunk[COMMENT, 1]
157
+ @line += comment.scan(MULTILINER).length
158
+ token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER))
159
+ token("\n", "\n")
160
+ @i += comment.length
161
+ end
162
+
163
+ # Record tokens for indentation differing from the previous line.
164
+ def indent_token
165
+ return false unless indent = @chunk[MULTI_DENT, 1]
166
+ @line += indent.scan(MULTILINER).size
167
+ @i += indent.size
168
+ next_character = @chunk[MULTI_DENT, 4]
169
+ no_newlines = next_character == '.' || (last_value.to_s.match(NO_NEWLINE) && @tokens[-2][0] != '.' && !last_value.match(CODE))
170
+ return suppress_newlines(indent) if no_newlines
171
+ size = indent.scan(LAST_DENT).last.last.length
172
+ return newline_token(indent) if size == @indent
173
+ if size > @indent
174
+ token(:INDENT, size - @indent)
175
+ @indents << (size - @indent)
176
+ else
177
+ outdent_token(@indent - size)
178
+ end
179
+ @indent = size
180
+ end
181
+
182
+ # Record an oudent token or tokens, if we're moving back inwards past
183
+ # multiple recorded indents.
184
+ def outdent_token(move_out)
185
+ while move_out > 0 && !@indents.empty?
186
+ last_indent = @indents.pop
187
+ token(:OUTDENT, last_indent)
188
+ move_out -= last_indent
189
+ end
190
+ token("\n", "\n")
191
+ end
192
+
193
+ # Matches and consumes non-meaningful whitespace.
194
+ def whitespace_token
195
+ return false unless whitespace = @chunk[WHITESPACE, 1]
196
+ @spaced = last_value
197
+ @i += whitespace.length
198
+ end
199
+
200
+ # Multiple newlines get merged together.
201
+ # Use a trailing \ to escape newlines.
202
+ def newline_token(newlines)
203
+ token("\n", "\n") unless last_value == "\n"
204
+ true
205
+ end
206
+
207
+ # Tokens to explicitly escape newlines are removed once their job is done.
208
+ def suppress_newlines(newlines)
209
+ @tokens.pop if last_value == "\\"
210
+ true
211
+ end
212
+
213
+ # We treat all other single characters as a token. Eg.: ( ) , . !
214
+ # Multi-character operators are also literal tokens, so that Racc can assign
215
+ # the proper order of operations.
216
+ def literal_token
217
+ value = @chunk[OPERATOR, 1]
218
+ tag_parameters if value && value.match(CODE)
219
+ value ||= @chunk[0,1]
220
+ tag = value.match(ASSIGNMENT) ? :ASSIGN : value
221
+ if !@spaced.equal?(last_value) && CALLABLE.include?(last_tag)
222
+ tag = :CALL_START if value == '('
223
+ tag = :INDEX_START if value == '['
224
+ end
225
+ token(tag, value)
226
+ @i += value.length
227
+ end
228
+
229
+ # Helpers ==========================================================
230
+
231
+ # Add a token to the results, taking note of the line number.
232
+ def token(tag, value)
233
+ @tokens << [tag, Value.new(value, @line)]
234
+ end
235
+
236
+ # Peek at the previous token's value.
237
+ def last_value
238
+ @tokens.last && @tokens.last[1]
239
+ end
240
+
241
+ # Peek at the previous token's tag.
242
+ def last_tag
243
+ @tokens.last && @tokens.last[0]
244
+ end
245
+
246
+ # A source of ambiguity in our grammar was parameter lists in function
247
+ # definitions (as opposed to argument lists in function calls). Tag
248
+ # parameter identifiers in order to avoid this. Also, parameter lists can
249
+ # make use of splats.
250
+ def tag_parameters
251
+ return if last_tag != ')'
252
+ i = 0
253
+ loop do
254
+ i -= 1
255
+ tok = @tokens[i]
256
+ return if !tok
257
+ case tok[0]
258
+ when :IDENTIFIER then tok[0] = :PARAM
259
+ when ')' then tok[0] = :PARAM_END
260
+ when '(' then return tok[0] = :PARAM_START
261
+ end
262
+ end
263
+ end
264
+
265
+ # Close up all remaining open blocks. IF the first token is an indent,
266
+ # axe it.
267
+ def close_indentation
268
+ outdent_token(@indent)
269
+ end
270
+
271
+ end
272
+ end