style-script 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,363 @@
1
+ (function(){
2
+ var ASSIGNMENT, CALLABLE, CODE, COMMENT, COMMENT_CLEANER, HEREDOC, HEREDOC_INDENT, IDENTIFIER, JS, JS_CLEANER, KEYWORDS, LAST_DENT, LAST_DENTS, MULTILINER, MULTI_DENT, NOT_REGEX, NO_NEWLINE, NUMBER, OPERATOR, REGEX, Rewriter, STRING, STRING_NEWLINES, WHITESPACE, lex;
3
+ Rewriter = require('./rewriter').Rewriter;
4
+ // The lexer reads a stream of StyleScript and divvys it up into tagged
5
+ // tokens. A minor bit of the ambiguity in the grammar has been avoided by
6
+ // pushing some extra smarts into the Lexer.
7
+ exports.Lexer = (lex = function lex() { });
8
+ // Constants ============================================================
9
+ // The list of keywords passed verbatim to the parser.
10
+ KEYWORDS = ["if", "else", "then", "unless", "true", "false", "yes", "no", "on", "off", "and", "or", "is", "isnt", "not", "new", "return", "arguments", "try", "catch", "finally", "throw", "break", "continue", "for", "in", "of", "by", "where", "while", "delete", "instanceof", "typeof", "switch", "when", "super", "extends"];
11
+ // Token matching regexes.
12
+ IDENTIFIER = /^([a-zA-Z$_](\w|\$)*)/;
13
+ NUMBER = /^(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i;
14
+ STRING = /^(""|''|"([\s\S]*?)([^\\]|\\\\)"|'([\s\S]*?)([^\\]|\\\\)')/;
15
+ HEREDOC = /^("{6}|'{6}|"{3}\n?([\s\S]*?)\n?([ \t]*)"{3}|'{3}\n?([\s\S]*?)\n?([ \t]*)'{3})/;
16
+ JS = /^(``|`([\s\S]*?)([^\\]|\\\\)`)/;
17
+ OPERATOR = /^([+\*&|\/\-%=<>:!?]+)/;
18
+ WHITESPACE = /^([ \t]+)/;
19
+ COMMENT = /^(((\n?[ \t]*)?#[^\n]*)+)/;
20
+ CODE = /^((-|=)>)/;
21
+ REGEX = /^(\/(.*?)([^\\]|\\\\)\/[imgy]{0,4})/;
22
+ MULTI_DENT = /^((\n([ \t]*))+)(\.)?/;
23
+ LAST_DENTS = /\n([ \t]*)/g;
24
+ LAST_DENT = /\n([ \t]*)/;
25
+ ASSIGNMENT = /^(:|=)$/;
26
+ // Token cleaning regexes.
27
+ JS_CLEANER = /(^`|`$)/g;
28
+ MULTILINER = /\n/g;
29
+ STRING_NEWLINES = /\n[ \t]*/g;
30
+ COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/mg;
31
+ NO_NEWLINE = /^([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)$/;
32
+ HEREDOC_INDENT = /^[ \t]+/g;
33
+ // Tokens which a regular expression will never immediately follow, but which
34
+ // a division operator might.
35
+ // See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
36
+ NOT_REGEX = ['IDENTIFIER', 'NUMBER', 'REGEX', 'STRING', ')', '++', '--', ']', '}', 'FALSE', 'NULL', 'TRUE'];
37
+ // Tokens which could legitimately be invoked or indexed.
38
+ CALLABLE = ['IDENTIFIER', 'SUPER', ')', ']', '}', 'STRING'];
39
+ // Scan by attempting to match tokens one character at a time. Slow and steady.
40
+ lex.prototype.tokenize = function tokenize(code) {
41
+ this.code = code;
42
+ // Cleanup code by remove extra line breaks, TODO: chomp
43
+ this.i = 0;
44
+ // Current character position we're parsing
45
+ this.line = 1;
46
+ // The current line.
47
+ this.indent = 0;
48
+ // The current indent level.
49
+ this.indents = [];
50
+ // The stack of all indent levels we are currently within.
51
+ this.tokens = [];
52
+ // Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
53
+ this.spaced = null;
54
+ // The last token that has a space following it.
55
+ while (this.i < this.code.length) {
56
+ this.chunk = this.code.slice(this.i);
57
+ this.extract_next_token();
58
+ }
59
+ this.close_indentation();
60
+ return (new Rewriter()).rewrite(this.tokens);
61
+ };
62
+ // At every position, run through this list of attempted matches,
63
+ // short-circuiting if any of them succeed.
64
+ lex.prototype.extract_next_token = function extract_next_token() {
65
+ if (this.identifier_token()) {
66
+ return null;
67
+ }
68
+ if (this.number_token()) {
69
+ return null;
70
+ }
71
+ if (this.heredoc_token()) {
72
+ return null;
73
+ }
74
+ if (this.string_token()) {
75
+ return null;
76
+ }
77
+ if (this.js_token()) {
78
+ return null;
79
+ }
80
+ if (this.regex_token()) {
81
+ return null;
82
+ }
83
+ if (this.indent_token()) {
84
+ return null;
85
+ }
86
+ if (this.comment_token()) {
87
+ return null;
88
+ }
89
+ if (this.whitespace_token()) {
90
+ return null;
91
+ }
92
+ return this.literal_token();
93
+ };
94
+ // Tokenizers ==========================================================
95
+ // Matches identifying literals: variables, keywords, method names, etc.
96
+ lex.prototype.identifier_token = function identifier_token() {
97
+ var id, tag;
98
+ if (!((id = this.match(IDENTIFIER, 1)))) {
99
+ return false;
100
+ }
101
+ // Keywords are special identifiers tagged with their own name,
102
+ // 'if' will result in an ['IF', "if"] token.
103
+ tag = KEYWORDS.indexOf(id) >= 0 ? id.toUpperCase() : 'IDENTIFIER';
104
+ if (tag === 'WHEN' && (this.tag() === 'OUTDENT' || this.tag() === 'INDENT')) {
105
+ tag = 'LEADING_WHEN';
106
+ }
107
+ if (tag === 'IDENTIFIER' && this.value() === '::') {
108
+ this.tag(-1, 'PROTOTYPE_ACCESS');
109
+ }
110
+ if (tag === 'IDENTIFIER' && this.value() === '.' && !(this.value(2) === '.')) {
111
+ if (this.tag(2) === '?') {
112
+ this.tag(1, 'SOAK_ACCESS');
113
+ this.tokens.splice(-2, 1);
114
+ } else {
115
+ this.tag(1, 'PROPERTY_ACCESS');
116
+ }
117
+ }
118
+ this.token(tag, id);
119
+ this.i += id.length;
120
+ return true;
121
+ };
122
+ // Matches numbers, including decimals, hex, and exponential notation.
123
+ lex.prototype.number_token = function number_token() {
124
+ var number;
125
+ if (!((number = this.match(NUMBER, 1)))) {
126
+ return false;
127
+ }
128
+ this.token('NUMBER', number);
129
+ this.i += number.length;
130
+ return true;
131
+ };
132
+ // Matches strings, including multi-line strings.
133
+ lex.prototype.string_token = function string_token() {
134
+ var escaped, string;
135
+ if (!((string = this.match(STRING, 1)))) {
136
+ return false;
137
+ }
138
+ escaped = string.replace(STRING_NEWLINES, " \\\n");
139
+ this.token('STRING', escaped);
140
+ this.line += this.count(string, "\n");
141
+ this.i += string.length;
142
+ return true;
143
+ };
144
+ // Matches heredocs, adjusting indentation to the correct level.
145
+ lex.prototype.heredoc_token = function heredoc_token() {
146
+ var doc, indent, match;
147
+ if (!((match = this.chunk.match(HEREDOC)))) {
148
+ return false;
149
+ }
150
+ doc = match[2] || match[4];
151
+ indent = doc.match(HEREDOC_INDENT).sort()[0];
152
+ doc = doc.replace(new RegExp("^" + indent, 'g'), '').replace(MULTILINER, "\\n").replace('"', '\\"');
153
+ this.token('STRING', '"' + doc + '"');
154
+ this.line += this.count(match[1], "\n");
155
+ this.i += match[1].length;
156
+ return true;
157
+ };
158
+ // Matches interpolated JavaScript.
159
+ lex.prototype.js_token = function js_token() {
160
+ var script;
161
+ if (!((script = this.match(JS, 1)))) {
162
+ return false;
163
+ }
164
+ this.token('JS', script.replace(JS_CLEANER, ''));
165
+ this.i += script.length;
166
+ return true;
167
+ };
168
+ // Matches regular expression literals.
169
+ lex.prototype.regex_token = function regex_token() {
170
+ var regex;
171
+ if (!((regex = this.match(REGEX, 1)))) {
172
+ return false;
173
+ }
174
+ if (NOT_REGEX.indexOf(this.tag()) >= 0) {
175
+ return false;
176
+ }
177
+ this.token('REGEX', regex);
178
+ this.i += regex.length;
179
+ return true;
180
+ };
181
+ // Matches and conumes comments.
182
+ lex.prototype.comment_token = function comment_token() {
183
+ var comment;
184
+ if (!((comment = this.match(COMMENT, 1)))) {
185
+ return false;
186
+ }
187
+ this.line += (comment.match(MULTILINER) || []).length;
188
+ this.token('COMMENT', comment.replace(COMMENT_CLEANER, '').split(MULTILINER));
189
+ this.token('TERMINATOR', "\n");
190
+ this.i += comment.length;
191
+ return true;
192
+ };
193
+ // Record tokens for indentation differing from the previous line.
194
+ lex.prototype.indent_token = function indent_token() {
195
+ var diff, indent, next_character, no_newlines, size;
196
+ if (!((indent = this.match(MULTI_DENT, 1)))) {
197
+ return false;
198
+ }
199
+ this.line += indent.match(MULTILINER).length;
200
+ this.i += indent.length;
201
+ next_character = this.chunk.match(MULTI_DENT)[4];
202
+ no_newlines = next_character === '.' || (this.value().match(NO_NEWLINE) && this.tokens[this.tokens.length - 2][0] !== '.' && !this.value().match(CODE));
203
+ if (no_newlines) {
204
+ return this.suppress_newlines(indent);
205
+ }
206
+ size = indent.match(LAST_DENTS).reverse()[0].match(LAST_DENT)[1].length;
207
+ if (size === this.indent) {
208
+ return this.newline_token(indent);
209
+ }
210
+ if (size > this.indent) {
211
+ diff = size - this.indent;
212
+ this.token('INDENT', diff);
213
+ this.indents.push(diff);
214
+ } else {
215
+ this.outdent_token(this.indent - size);
216
+ }
217
+ this.indent = size;
218
+ return true;
219
+ };
220
+ // Record an oudent token or tokens, if we're moving back inwards past
221
+ // multiple recorded indents.
222
+ lex.prototype.outdent_token = function outdent_token(move_out) {
223
+ var last_indent;
224
+ while (move_out > 0 && this.indents.length) {
225
+ last_indent = this.indents.pop();
226
+ this.token('OUTDENT', last_indent);
227
+ move_out -= last_indent;
228
+ }
229
+ this.token('TERMINATOR', "\n");
230
+ return true;
231
+ };
232
+ // Matches and consumes non-meaningful whitespace.
233
+ lex.prototype.whitespace_token = function whitespace_token() {
234
+ var space;
235
+ if (!((space = this.match(WHITESPACE, 1)))) {
236
+ return false;
237
+ }
238
+ this.spaced = this.value();
239
+ this.i += space.length;
240
+ return true;
241
+ };
242
+ // Multiple newlines get merged together.
243
+ // Use a trailing \ to escape newlines.
244
+ lex.prototype.newline_token = function newline_token(newlines) {
245
+ if (!(this.value() === "\n")) {
246
+ this.token('TERMINATOR', "\n");
247
+ }
248
+ return true;
249
+ };
250
+ // Tokens to explicitly escape newlines are removed once their job is done.
251
+ lex.prototype.suppress_newlines = function suppress_newlines(newlines) {
252
+ if (this.value() === "\\") {
253
+ this.tokens.pop();
254
+ }
255
+ return true;
256
+ };
257
+ // We treat all other single characters as a token. Eg.: ( ) , . !
258
+ // Multi-character operators are also literal tokens, so that Racc can assign
259
+ // the proper order of operations.
260
+ lex.prototype.literal_token = function literal_token() {
261
+ var match, tag, value;
262
+ match = this.chunk.match(OPERATOR);
263
+ value = match && match[1];
264
+ if (value && value.match(CODE)) {
265
+ this.tag_parameters();
266
+ }
267
+ value = value || this.chunk.substr(0, 1);
268
+ tag = value.match(ASSIGNMENT) ? 'ASSIGN' : value;
269
+ if (value === ';') {
270
+ tag = 'TERMINATOR';
271
+ }
272
+ if (this.value() !== this.spaced && CALLABLE.indexOf(this.tag()) >= 0) {
273
+ if (value === '(') {
274
+ tag = 'CALL_START';
275
+ }
276
+ if (value === '[') {
277
+ tag = 'INDEX_START';
278
+ }
279
+ }
280
+ this.token(tag, value);
281
+ this.i += value.length;
282
+ return true;
283
+ };
284
+ // Helpers =============================================================
285
+ // Add a token to the results, taking note of the line number.
286
+ lex.prototype.token = function token(tag, value) {
287
+ return this.tokens.push([tag, value]);
288
+ // this.tokens.push([tag, Value.new(value, @line)])
289
+ };
290
+ // Look at a tag in the current token stream.
291
+ lex.prototype.tag = function tag(index, tag) {
292
+ var tok;
293
+ if (!((tok = this.tokens[this.tokens.length - (index || 1)]))) {
294
+ return null;
295
+ }
296
+ if ((typeof tag !== "undefined" && tag !== null)) {
297
+ return (tok[0] = tag);
298
+ }
299
+ return tok[0];
300
+ };
301
+ // Look at a value in the current token stream.
302
+ lex.prototype.value = function value(index, val) {
303
+ var tok;
304
+ if (!((tok = this.tokens[this.tokens.length - (index || 1)]))) {
305
+ return null;
306
+ }
307
+ if ((typeof val !== "undefined" && val !== null)) {
308
+ return (tok[1] = val);
309
+ }
310
+ return tok[1];
311
+ };
312
+ // Count the occurences of a character in a string.
313
+ lex.prototype.count = function count(string, letter) {
314
+ var num, pos;
315
+ num = 0;
316
+ pos = string.indexOf(letter);
317
+ while (pos !== -1) {
318
+ count += 1;
319
+ pos = string.indexOf(letter, pos + 1);
320
+ }
321
+ return count;
322
+ };
323
+ // Attempt to match a string against the current chunk, returning the indexed
324
+ // match.
325
+ lex.prototype.match = function match(regex, index) {
326
+ var m;
327
+ if (!((m = this.chunk.match(regex)))) {
328
+ return false;
329
+ }
330
+ return m ? m[index] : false;
331
+ };
332
+ // A source of ambiguity in our grammar was parameter lists in function
333
+ // definitions (as opposed to argument lists in function calls). Tag
334
+ // parameter identifiers in order to avoid this. Also, parameter lists can
335
+ // make use of splats.
336
+ lex.prototype.tag_parameters = function tag_parameters() {
337
+ var i, tok;
338
+ if (this.tag() !== ')') {
339
+ return null;
340
+ }
341
+ i = 0;
342
+ while (true) {
343
+ i += 1;
344
+ tok = this.tokens[this.tokens.length - i];
345
+ if (!tok) {
346
+ return null;
347
+ }
348
+ if (tok[0] === 'IDENTIFIER') {
349
+ tok[0] = 'PARAM';
350
+ } else if (tok[0] === ')') {
351
+ tok[0] = 'PARAM_END';
352
+ } else if (tok[0] === '(') {
353
+ return (tok[0] = 'PARAM_START');
354
+ }
355
+ }
356
+ return true;
357
+ };
358
+ // Close up all remaining open blocks. IF the first token is an indent,
359
+ // axe it.
360
+ lex.prototype.close_indentation = function close_indentation() {
361
+ return this.outdent_token(this.indent);
362
+ };
363
+ })();
@@ -0,0 +1,272 @@
1
+ module StyleScript
2
+
3
+ # The lexer reads a stream of StyleScript and divvys it up into tagged
4
+ # tokens. A minor bit of the ambiguity in the grammar has been avoided by
5
+ # pushing some extra smarts into the Lexer.
6
+ class Lexer
7
+
8
+ # The list of keywords passed verbatim to the parser.
9
+ KEYWORDS = ["if", "else", "then", "unless", "until",
10
+ "true", "false", "yes", "no", "on", "off",
11
+ "and", "or", "is", "isnt", "not",
12
+ "new", "return",
13
+ "try", "catch", "finally", "throw",
14
+ "break", "continue",
15
+ "for", "in", "of", "by", "where", "while",
16
+ "delete", "instanceof", "typeof",
17
+ "switch", "when",
18
+ "super", "extends"]
19
+
20
+ # Token matching regexes.
21
+ IDENTIFIER = /\A([a-zA-Z$_](\w|\$)*)/
22
+ NUMBER = /\A(\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?)))\b/i
23
+ STRING = /\A(""|''|"(.*?)([^\\]|\\\\)"|'(.*?)([^\\]|\\\\)')/m
24
+ HEREDOC = /\A("{6}|'{6}|"{3}\n?(.*?)\n?([ \t]*)"{3}|'{3}\n?(.*?)\n?([ \t]*)'{3})/m
25
+ JS = /\A(``|`(.*?)([^\\]|\\\\)`)/m
26
+ OPERATOR = /\A([+\*&|\/\-%=<>:!?]+)/
27
+ WHITESPACE = /\A([ \t]+)/
28
+ COMMENT = /\A(((\n?[ \t]*)?#.*$)+)/
29
+ CODE = /\A((-|=)>)/
30
+ REGEX = /\A(\/(.*?)([^\\]|\\\\)\/[imgy]{0,4})/
31
+ MULTI_DENT = /\A((\n([ \t]*))+)(\.)?/
32
+ LAST_DENT = /\n([ \t]*)/
33
+ ASSIGNMENT = /\A(:|=)\Z/
34
+
35
+ # Token cleaning regexes.
36
+ JS_CLEANER = /(\A`|`\Z)/
37
+ MULTILINER = /\n/
38
+ STRING_NEWLINES = /\n[ \t]*/
39
+ COMMENT_CLEANER = /(^[ \t]*#|\n[ \t]*$)/
40
+ NO_NEWLINE = /\A([+\*&|\/\-%=<>:!.\\][<>=&|]*|and|or|is|isnt|not|delete|typeof|instanceof)\Z/
41
+ HEREDOC_INDENT = /^[ \t]+/
42
+
43
+ # Tokens which a regular expression will never immediately follow, but which
44
+ # a division operator might.
45
+ # See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
46
+ NOT_REGEX = [
47
+ :IDENTIFIER, :NUMBER, :REGEX, :STRING,
48
+ ')', '++', '--', ']', '}',
49
+ :FALSE, :NULL, :TRUE
50
+ ]
51
+
52
+ # Tokens which could legitimately be invoked or indexed.
53
+ CALLABLE = [:IDENTIFIER, :SUPER, ')', ']', '}', :STRING]
54
+
55
+ # Scan by attempting to match tokens one character at a time. Slow and steady.
56
+ def tokenize(code)
57
+ @code = code.chomp # Cleanup code by remove extra line breaks
58
+ @i = 0 # Current character position we're parsing
59
+ @line = 1 # The current line.
60
+ @indent = 0 # The current indent level.
61
+ @indents = [] # The stack of all indent levels we are currently within.
62
+ @tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
63
+ @spaced = nil # The last value that has a space following it.
64
+ while @i < @code.length
65
+ @chunk = @code[@i..-1]
66
+ extract_next_token
67
+ end
68
+ puts "original stream: #{@tokens.inspect}" if ENV['VERBOSE']
69
+ close_indentation
70
+ Rewriter.new.rewrite(@tokens)
71
+ end
72
+
73
+ # At every position, run through this list of attempted matches,
74
+ # short-circuiting if any of them succeed.
75
+ def extract_next_token
76
+ return if identifier_token
77
+ return if number_token
78
+ return if heredoc_token
79
+ return if string_token
80
+ return if js_token
81
+ return if regex_token
82
+ return if indent_token
83
+ return if comment_token
84
+ return if whitespace_token
85
+ return literal_token
86
+ end
87
+
88
+ # Tokenizers ==========================================================
89
+
90
+ # Matches identifying literals: variables, keywords, method names, etc.
91
+ def identifier_token
92
+ return false unless identifier = @chunk[IDENTIFIER, 1]
93
+ # Keywords are special identifiers tagged with their own name,
94
+ # 'if' will result in an [:IF, "if"] token.
95
+ tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
96
+ tag = :LEADING_WHEN if tag == :WHEN && [:OUTDENT, :INDENT, "\n"].include?(last_tag)
97
+ @tokens[-1][0] = :PROTOTYPE_ACCESS if tag == :IDENTIFIER && last_value == '::'
98
+ if tag == :IDENTIFIER && last_value == '.' && !(@tokens[-2] && @tokens[-2][1] == '.')
99
+ if @tokens[-2][0] == "?"
100
+ @tokens[-1][0] = :SOAK_ACCESS
101
+ @tokens.delete_at(-2)
102
+ else
103
+ @tokens[-1][0] = :PROPERTY_ACCESS
104
+ end
105
+ end
106
+ token(tag, identifier)
107
+ @i += identifier.length
108
+ end
109
+
110
+ # Matches numbers, including decimals, hex, and exponential notation.
111
+ def number_token
112
+ return false unless number = @chunk[NUMBER, 1]
113
+ token(:NUMBER, number)
114
+ @i += number.length
115
+ end
116
+
117
+ # Matches strings, including multi-line strings.
118
+ def string_token
119
+ return false unless string = @chunk[STRING, 1]
120
+ escaped = string.gsub(STRING_NEWLINES, " \\\n")
121
+ token(:STRING, escaped)
122
+ @line += string.count("\n")
123
+ @i += string.length
124
+ end
125
+
126
+ # Matches heredocs, adjusting indentation to the correct level.
127
+ def heredoc_token
128
+ return false unless match = @chunk.match(HEREDOC)
129
+ doc = match[2] || match[4]
130
+ indent = doc.scan(HEREDOC_INDENT).min
131
+ doc.gsub!(/^#{indent}/, "")
132
+ doc.gsub!("\n", "\\n")
133
+ doc.gsub!('"', '\\"')
134
+ token(:STRING, "\"#{doc}\"")
135
+ @line += match[1].count("\n")
136
+ @i += match[1].length
137
+ end
138
+
139
+ # Matches interpolated JavaScript.
140
+ def js_token
141
+ return false unless script = @chunk[JS, 1]
142
+ token(:JS, script.gsub(JS_CLEANER, ''))
143
+ @i += script.length
144
+ end
145
+
146
+ # Matches regular expression literals.
147
+ def regex_token
148
+ return false unless regex = @chunk[REGEX, 1]
149
+ return false if NOT_REGEX.include?(last_tag)
150
+ token(:REGEX, regex)
151
+ @i += regex.length
152
+ end
153
+
154
+ # Matches and consumes comments.
155
+ def comment_token
156
+ return false unless comment = @chunk[COMMENT, 1]
157
+ @line += comment.scan(MULTILINER).length
158
+ token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER))
159
+ token("\n", "\n")
160
+ @i += comment.length
161
+ end
162
+
163
+ # Record tokens for indentation differing from the previous line.
164
+ def indent_token
165
+ return false unless indent = @chunk[MULTI_DENT, 1]
166
+ @line += indent.scan(MULTILINER).size
167
+ @i += indent.size
168
+ next_character = @chunk[MULTI_DENT, 4]
169
+ no_newlines = next_character == '.' || (last_value.to_s.match(NO_NEWLINE) && @tokens[-2][0] != '.' && !last_value.match(CODE))
170
+ return suppress_newlines(indent) if no_newlines
171
+ size = indent.scan(LAST_DENT).last.last.length
172
+ return newline_token(indent) if size == @indent
173
+ if size > @indent
174
+ token(:INDENT, size - @indent)
175
+ @indents << (size - @indent)
176
+ else
177
+ outdent_token(@indent - size)
178
+ end
179
+ @indent = size
180
+ end
181
+
182
+ # Record an oudent token or tokens, if we're moving back inwards past
183
+ # multiple recorded indents.
184
+ def outdent_token(move_out)
185
+ while move_out > 0 && !@indents.empty?
186
+ last_indent = @indents.pop
187
+ token(:OUTDENT, last_indent)
188
+ move_out -= last_indent
189
+ end
190
+ token("\n", "\n")
191
+ end
192
+
193
+ # Matches and consumes non-meaningful whitespace.
194
+ def whitespace_token
195
+ return false unless whitespace = @chunk[WHITESPACE, 1]
196
+ @spaced = last_value
197
+ @i += whitespace.length
198
+ end
199
+
200
+ # Multiple newlines get merged together.
201
+ # Use a trailing \ to escape newlines.
202
+ def newline_token(newlines)
203
+ token("\n", "\n") unless last_value == "\n"
204
+ true
205
+ end
206
+
207
+ # Tokens to explicitly escape newlines are removed once their job is done.
208
+ def suppress_newlines(newlines)
209
+ @tokens.pop if last_value == "\\"
210
+ true
211
+ end
212
+
213
+ # We treat all other single characters as a token. Eg.: ( ) , . !
214
+ # Multi-character operators are also literal tokens, so that Racc can assign
215
+ # the proper order of operations.
216
+ def literal_token
217
+ value = @chunk[OPERATOR, 1]
218
+ tag_parameters if value && value.match(CODE)
219
+ value ||= @chunk[0,1]
220
+ tag = value.match(ASSIGNMENT) ? :ASSIGN : value
221
+ if !@spaced.equal?(last_value) && CALLABLE.include?(last_tag)
222
+ tag = :CALL_START if value == '('
223
+ tag = :INDEX_START if value == '['
224
+ end
225
+ token(tag, value)
226
+ @i += value.length
227
+ end
228
+
229
+ # Helpers ==========================================================
230
+
231
+ # Add a token to the results, taking note of the line number.
232
+ def token(tag, value)
233
+ @tokens << [tag, Value.new(value, @line)]
234
+ end
235
+
236
+ # Peek at the previous token's value.
237
+ def last_value
238
+ @tokens.last && @tokens.last[1]
239
+ end
240
+
241
+ # Peek at the previous token's tag.
242
+ def last_tag
243
+ @tokens.last && @tokens.last[0]
244
+ end
245
+
246
+ # A source of ambiguity in our grammar was parameter lists in function
247
+ # definitions (as opposed to argument lists in function calls). Tag
248
+ # parameter identifiers in order to avoid this. Also, parameter lists can
249
+ # make use of splats.
250
+ def tag_parameters
251
+ return if last_tag != ')'
252
+ i = 0
253
+ loop do
254
+ i -= 1
255
+ tok = @tokens[i]
256
+ return if !tok
257
+ case tok[0]
258
+ when :IDENTIFIER then tok[0] = :PARAM
259
+ when ')' then tok[0] = :PARAM_END
260
+ when '(' then return tok[0] = :PARAM_START
261
+ end
262
+ end
263
+ end
264
+
265
+ # Close up all remaining open blocks. IF the first token is an indent,
266
+ # axe it.
267
+ def close_indentation
268
+ outdent_token(@indent)
269
+ end
270
+
271
+ end
272
+ end