coffee-script 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,403 @@
1
+ class Parser
2
+
3
+ # Declare tokens produced by the lexer
4
+ token IF ELSE THEN UNLESS
5
+ token NUMBER STRING REGEX
6
+ token TRUE FALSE YES NO ON OFF
7
+ token IDENTIFIER PROPERTY_ACCESS
8
+ token CODE PARAM NEW RETURN
9
+ token TRY CATCH FINALLY THROW
10
+ token BREAK CONTINUE
11
+ token FOR IN WHILE
12
+ token SWITCH WHEN
13
+ token SUPER
14
+ token DELETE
15
+ token NEWLINE
16
+ token COMMENT
17
+ token JS
18
+
19
+ # Declare order of operations.
20
+ prechigh
21
+ nonassoc UMINUS NOT '!' '!!' '~' '++' '--'
22
+ left '*' '/' '%'
23
+ left '+' '-'
24
+ left '<<' '>>' '>>>'
25
+ left '&' '|' '^'
26
+ left '<=' '<' '>' '>='
27
+ right '==' '!=' IS AINT
28
+ left '&&' '||' AND OR
29
+ right '-=' '+=' '/=' '*='
30
+ right DELETE
31
+ left "."
32
+ right THROW FOR IN WHILE NEW
33
+ left UNLESS IF ELSE
34
+ left ":" '||:' '&&:'
35
+ right RETURN
36
+ preclow
37
+
38
+ # We expect 4 shift/reduce errors for optional syntax.
39
+ # There used to be 252 -- greatly improved.
40
+ expect 4
41
+
42
+ rule
43
+
44
+ # All parsing will end in this rule, being the trunk of the AST.
45
+ Root:
46
+ /* nothing */ { result = Expressions.new([]) }
47
+ | Terminator { result = Expressions.new([]) }
48
+ | Expressions { result = val[0] }
49
+ ;
50
+
51
+ # Any list of expressions or method body, seperated by line breaks or semis.
52
+ Expressions:
53
+ Expression { result = Expressions.new(val) }
54
+ | Expressions Terminator Expression { result = val[0] << val[2] }
55
+ | Expressions Terminator { result = val[0] }
56
+ | Terminator Expressions { result = val[1] }
57
+ ;
58
+
59
+ # All types of expressions in our language.
60
+ Expression:
61
+ PureExpression
62
+ | Statement
63
+ ;
64
+
65
+ # The parts that are natural JavaScript expressions.
66
+ PureExpression:
67
+ Literal
68
+ | Value
69
+ | Call
70
+ | Code
71
+ | Operation
72
+ ;
73
+
74
+ # We have to take extra care to convert these statements into expressions.
75
+ Statement:
76
+ Assign
77
+ | If
78
+ | Try
79
+ | Throw
80
+ | Return
81
+ | While
82
+ | For
83
+ | Switch
84
+ | Comment
85
+ ;
86
+
87
+ # All tokens that can terminate an expression.
88
+ Terminator:
89
+ "\n"
90
+ | ";"
91
+ ;
92
+
93
+ # All tokens that can serve to begin the second block of a multi-part expression.
94
+ Then:
95
+ THEN
96
+ | Terminator
97
+ ;
98
+
99
+ # All hard-coded values.
100
+ Literal:
101
+ NUMBER { result = LiteralNode.new(val[0]) }
102
+ | STRING { result = LiteralNode.new(val[0]) }
103
+ | JS { result = LiteralNode.new(val[0]) }
104
+ | REGEX { result = LiteralNode.new(val[0]) }
105
+ | BREAK { result = LiteralNode.new(val[0]) }
106
+ | CONTINUE { result = LiteralNode.new(val[0]) }
107
+ | TRUE { result = LiteralNode.new(true) }
108
+ | FALSE { result = LiteralNode.new(false) }
109
+ | YES { result = LiteralNode.new(true) }
110
+ | NO { result = LiteralNode.new(false) }
111
+ | ON { result = LiteralNode.new(true) }
112
+ | OFF { result = LiteralNode.new(false) }
113
+ ;
114
+
115
+ # Assignment to a variable.
116
+ Assign:
117
+ Value ":" Expression { result = AssignNode.new(val[0], val[2]) }
118
+ ;
119
+
120
+ # Assignment within an object literal.
121
+ AssignObj:
122
+ IDENTIFIER ":" Expression { result = AssignNode.new(val[0], val[2], :object) }
123
+ | Comment { result = val[0] }
124
+ ;
125
+
126
+ # A return statement.
127
+ Return:
128
+ RETURN Expression { result = ReturnNode.new(val[1]) }
129
+ ;
130
+
131
+ # A comment.
132
+ Comment:
133
+ COMMENT { result = CommentNode.new(val[0]) }
134
+ ;
135
+
136
+ # Arithmetic and logical operators
137
+ # For Ruby's Operator precedence, see:
138
+ # https://www.cs.auckland.ac.nz/references/ruby/ProgrammingRuby/language.html
139
+ Operation:
140
+ '!' Expression { result = OpNode.new(val[0], val[1]) }
141
+ | '!!' Expression { result = OpNode.new(val[0], val[1]) }
142
+ | '-' Expression = UMINUS { result = OpNode.new(val[0], val[1]) }
143
+ | NOT Expression { result = OpNode.new(val[0], val[1]) }
144
+ | '~' Expression { result = OpNode.new(val[0], val[1]) }
145
+ | '--' Expression { result = OpNode.new(val[0], val[1]) }
146
+ | '++' Expression { result = OpNode.new(val[0], val[1]) }
147
+ | Expression '--' { result = OpNode.new(val[1], val[0], nil, true) }
148
+ | Expression '++' { result = OpNode.new(val[1], val[0], nil, true) }
149
+
150
+ | Expression '*' Expression { result = OpNode.new(val[1], val[0], val[2]) }
151
+ | Expression '/' Expression { result = OpNode.new(val[1], val[0], val[2]) }
152
+ | Expression '%' Expression { result = OpNode.new(val[1], val[0], val[2]) }
153
+
154
+ | Expression '+' Expression { result = OpNode.new(val[1], val[0], val[2]) }
155
+ | Expression '-' Expression { result = OpNode.new(val[1], val[0], val[2]) }
156
+
157
+ | Expression '<<' Expression { result = OpNode.new(val[1], val[0], val[2]) }
158
+ | Expression '>>' Expression { result = OpNode.new(val[1], val[0], val[2]) }
159
+ | Expression '>>>' Expression { result = OpNode.new(val[1], val[0], val[2]) }
160
+
161
+ | Expression '&' Expression { result = OpNode.new(val[1], val[0], val[2]) }
162
+ | Expression '|' Expression { result = OpNode.new(val[1], val[0], val[2]) }
163
+ | Expression '^' Expression { result = OpNode.new(val[1], val[0], val[2]) }
164
+
165
+ | Expression '<=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
166
+ | Expression '<' Expression { result = OpNode.new(val[1], val[0], val[2]) }
167
+ | Expression '>' Expression { result = OpNode.new(val[1], val[0], val[2]) }
168
+ | Expression '>=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
169
+
170
+ | Expression '==' Expression { result = OpNode.new(val[1], val[0], val[2]) }
171
+ | Expression '!=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
172
+ | Expression IS Expression { result = OpNode.new(val[1], val[0], val[2]) }
173
+ | Expression AINT Expression { result = OpNode.new(val[1], val[0], val[2]) }
174
+
175
+ | Expression '&&' Expression { result = OpNode.new(val[1], val[0], val[2]) }
176
+ | Expression '||' Expression { result = OpNode.new(val[1], val[0], val[2]) }
177
+ | Expression AND Expression { result = OpNode.new(val[1], val[0], val[2]) }
178
+ | Expression OR Expression { result = OpNode.new(val[1], val[0], val[2]) }
179
+
180
+ | Expression '-=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
181
+ | Expression '+=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
182
+ | Expression '/=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
183
+ | Expression '*=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
184
+ | Expression '||:' Expression { result = OpNode.new(val[1], val[0], val[2]) }
185
+ | Expression '&&:' Expression { result = OpNode.new(val[1], val[0], val[2]) }
186
+
187
+ | DELETE Expression { result = OpNode.new(val[0], val[1]) }
188
+ ;
189
+
190
+ # Function definition.
191
+ Code:
192
+ ParamList "=>" CodeBody "." { result = CodeNode.new(val[0], val[2]) }
193
+ | "=>" CodeBody "." { result = CodeNode.new([], val[1]) }
194
+ ;
195
+
196
+ # The body of a function.
197
+ CodeBody:
198
+ /* nothing */ { result = Expressions.new([]) }
199
+ | Expressions { result = val[0] }
200
+ ;
201
+
202
+ # The parameters to a function definition.
203
+ ParamList:
204
+ PARAM { result = val }
205
+ | ParamList "," PARAM { result = val[0] << val[2] }
206
+ ;
207
+
208
+ # Expressions that can be treated as values.
209
+ Value:
210
+ IDENTIFIER { result = ValueNode.new(val[0]) }
211
+ | Array { result = ValueNode.new(val[0]) }
212
+ | Object { result = ValueNode.new(val[0]) }
213
+ | Parenthetical { result = ValueNode.new(val[0]) }
214
+ | Value Accessor { result = val[0] << val[1] }
215
+ | Invocation Accessor { result = ValueNode.new(val[0], [val[1]]) }
216
+ ;
217
+
218
+ # Accessing into an object or array, through dot or index notation.
219
+ Accessor:
220
+ PROPERTY_ACCESS IDENTIFIER { result = AccessorNode.new(val[1]) }
221
+ | Index { result = val[0] }
222
+ | Slice { result = val[0] }
223
+ ;
224
+
225
+ # Indexing into an object or array.
226
+ Index:
227
+ "[" Expression "]" { result = IndexNode.new(val[1]) }
228
+ ;
229
+
230
+ # Array slice literal.
231
+ Slice:
232
+ "[" Expression "," Expression "]" { result = SliceNode.new(val[1], val[3]) }
233
+ ;
234
+
235
+ # An object literal.
236
+ Object:
237
+ "{" AssignList "}" { result = ObjectNode.new(val[1]) }
238
+ ;
239
+
240
+ # Assignment within an object literal (comma or newline separated).
241
+ AssignList:
242
+ /* nothing */ { result = []}
243
+ | AssignObj { result = val }
244
+ | AssignList "," AssignObj { result = val[0] << val[2] }
245
+ | AssignList Terminator AssignObj { result = val[0] << val[2] }
246
+ ;
247
+
248
+ # All flavors of function call (instantiation, super, and regular).
249
+ Call:
250
+ Invocation { result = val[0] }
251
+ | NEW Invocation { result = val[1].new_instance }
252
+ | Super { result = val[0] }
253
+ ;
254
+
255
+ # A generic function invocation.
256
+ Invocation:
257
+ Value "(" ArgList ")" { result = CallNode.new(val[0], val[2]) }
258
+ ;
259
+
260
+ # Calling super.
261
+ Super:
262
+ SUPER "(" ArgList ")" { result = CallNode.new(:super, val[2]) }
263
+ ;
264
+
265
+ # The array literal.
266
+ Array:
267
+ "[" ArgList "]" { result = ArrayNode.new(val[1]) }
268
+ ;
269
+
270
+ # A list of arguments to a method call, or as the contents of an array.
271
+ ArgList:
272
+ /* nothing */ { result = [] }
273
+ | Expression { result = val }
274
+ | ArgList "," Expression { result = val[0] << val[2] }
275
+ | ArgList Terminator Expression { result = val[0] << val[2] }
276
+ ;
277
+
278
+ # Try/catch/finally exception handling blocks.
279
+ Try:
280
+ TRY Expressions Catch "." { result = TryNode.new(val[1], val[2][0], val[2][1]) }
281
+ | TRY Expressions Catch
282
+ FINALLY Expressions "." { result = TryNode.new(val[1], val[2][0], val[2][1], val[4]) }
283
+ ;
284
+
285
+ # A catch clause.
286
+ Catch:
287
+ /* nothing */ { result = [nil, nil] }
288
+ | CATCH IDENTIFIER Expressions { result = [val[1], val[2]] }
289
+ ;
290
+
291
+ # Throw an exception.
292
+ Throw:
293
+ THROW Expression { result = ThrowNode.new(val[1]) }
294
+ ;
295
+
296
+ # Parenthetical expressions.
297
+ Parenthetical:
298
+ "(" Expressions ")" { result = ParentheticalNode.new(val[1]) }
299
+ ;
300
+
301
+ # The while loop. (there is no do..while).
302
+ While:
303
+ WHILE Expression Then
304
+ Expressions "." { result = WhileNode.new(val[1], val[3]) }
305
+ ;
306
+
307
+ # Array comprehensions, including guard and current index.
308
+ For:
309
+ Expression FOR IDENTIFIER
310
+ IN PureExpression "." { result = ForNode.new(val[0], val[4], val[2], nil) }
311
+ | Expression FOR
312
+ IDENTIFIER "," IDENTIFIER
313
+ IN PureExpression "." { result = ForNode.new(val[0], val[6], val[2], nil, val[4]) }
314
+ | Expression FOR IDENTIFIER
315
+ IN PureExpression
316
+ IF Expression "." { result = ForNode.new(val[0], val[4], val[2], val[6]) }
317
+ | Expression FOR
318
+ IDENTIFIER "," IDENTIFIER
319
+ IN PureExpression
320
+ IF Expression "." { result = ForNode.new(val[0], val[6], val[2], val[8], val[4]) }
321
+ ;
322
+
323
+ # Switch/When blocks.
324
+ Switch:
325
+ SWITCH Expression Then
326
+ Whens "." { result = val[3].rewrite_condition(val[1]) }
327
+ | SWITCH Expression Then
328
+ Whens ELSE Expressions "." { result = val[3].rewrite_condition(val[1]).add_else(val[5]) }
329
+ ;
330
+
331
+ # The inner list of whens.
332
+ Whens:
333
+ When { result = val[0] }
334
+ | Whens When { result = val[0] << val[1] }
335
+ ;
336
+
337
+ # An individual when.
338
+ When:
339
+ WHEN Expression Then Expressions { result = IfNode.new(val[1], val[3]) }
340
+ ;
341
+
342
+ # All of the following nutso if-else destructuring is to make the
343
+ # grammar expand unambiguously.
344
+
345
+ # An elsif portion of an if-else block.
346
+ ElsIf:
347
+ ELSE IF Expression
348
+ Then Expressions { result = IfNode.new(val[2], val[4]) }
349
+ ;
350
+
351
+ # Multiple elsifs can be chained together.
352
+ ElsIfs:
353
+ ElsIf { result = val[0] }
354
+ | ElsIfs ElsIf { result = val[0].add_else(val[1]) }
355
+ ;
356
+
357
+ # Terminating else bodies are strictly optional.
358
+ ElseBody
359
+ "." { result = nil }
360
+ | ELSE Expressions "." { result = val[1] }
361
+ ;
362
+
363
+ # All the alternatives for ending an if-else block.
364
+ IfEnd:
365
+ ElseBody { result = val[0] }
366
+ | ElsIfs ElseBody { result = val[0].add_else(val[1]) }
367
+ ;
368
+
369
+ # The full complement of if blocks, including postfix one-liner ifs and unlesses.
370
+ If:
371
+ IF Expression
372
+ Then Expressions IfEnd { result = IfNode.new(val[1], val[3], val[4]) }
373
+ | Expression IF Expression { result = IfNode.new(val[2], Expressions.new([val[0]]), nil, {:statement => true}) }
374
+ | Expression UNLESS Expression { result = IfNode.new(val[2], Expressions.new([val[0]]), nil, {:statement => true, :invert => true}) }
375
+ ;
376
+
377
+ end
378
+
379
+ ---- header
380
+ module CoffeeScript
381
+
382
+ ---- inner
383
+ # Lex and parse a CoffeeScript.
384
+ def parse(code)
385
+ # Uncomment the following line to enable grammar debugging, in combination
386
+ # with the -g flag in the Rake build task.
387
+ # @yydebug = true
388
+ @tokens = Lexer.new.tokenize(code)
389
+ do_parse
390
+ end
391
+
392
+ # Retrieve the next token from the list.
393
+ def next_token
394
+ @tokens.shift
395
+ end
396
+
397
+ # Raise a custom error class that knows about line numbers.
398
+ def on_error(error_token_id, error_value, value_stack)
399
+ raise ParseError.new(token_to_str(error_token_id), error_value, value_stack)
400
+ end
401
+
402
+ ---- footer
403
+ end
@@ -0,0 +1,187 @@
1
+ module CoffeeScript
2
+
3
+ # The lexer reads a stream of CoffeeScript and divvys it up into tagged
4
+ # tokens. A minor bit of the ambiguity in the grammar has been avoided by
5
+ # pushing some extra smarts into the Lexer.
6
+ class Lexer
7
+
8
+ # The list of keywords passed verbatim to the parser.
9
+ KEYWORDS = ["if", "else", "then", "unless",
10
+ "true", "false", "yes", "no", "on", "off",
11
+ "and", "or", "is", "aint", "not",
12
+ "new", "return",
13
+ "try", "catch", "finally", "throw",
14
+ "break", "continue",
15
+ "for", "in", "while",
16
+ "switch", "when",
17
+ "super",
18
+ "delete"]
19
+
20
+ # Token matching regexes.
21
+ IDENTIFIER = /\A([a-zA-Z$_]\w*)/
22
+ NUMBER = /\A\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?))\b/i
23
+ STRING = /\A(""|''|"(.*?)[^\\]"|'(.*?)[^\\]')/m
24
+ JS = /\A(``|`(.*?)[^\\]`)/m
25
+ OPERATOR = /\A([+\*&|\/\-%=<>:!]+)/
26
+ WHITESPACE = /\A([ \t\r]+)/
27
+ NEWLINE = /\A(\n+)/
28
+ COMMENT = /\A((#[^\n]*\s*)+)/m
29
+ CODE = /\A(=>)/
30
+ REGEX = /\A(\/(.*?)[^\\]\/[imgy]{0,4})/
31
+
32
+ # Token cleaning regexes.
33
+ JS_CLEANER = /(\A`|`\Z)/
34
+ MULTILINER = /\n/
35
+ COMMENT_CLEANER = /(^\s*#|\n\s*$)/
36
+
37
+ # Tokens that always constitute the start of an expression.
38
+ EXP_START = ['{', '(', '[']
39
+
40
+ # Tokens that always constitute the end of an expression.
41
+ EXP_END = ['}', ')', ']']
42
+
43
+ # Scan by attempting to match tokens one character at a time. Slow and steady.
44
+ def tokenize(code)
45
+ @code = code.chomp # Cleanup code by remove extra line breaks
46
+ @i = 0 # Current character position we're parsing
47
+ @line = 1 # The current line.
48
+ @tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
49
+ while @i < @code.length
50
+ @chunk = @code[@i..-1]
51
+ extract_next_token
52
+ end
53
+ @tokens
54
+ end
55
+
56
+ # At every position, run this list of match attempts, short-circuiting if
57
+ # any of them succeed.
58
+ def extract_next_token
59
+ return if identifier_token
60
+ return if number_token
61
+ return if string_token
62
+ return if js_token
63
+ return if regex_token
64
+ return if comment_token
65
+ return if whitespace_token
66
+ return literal_token
67
+ end
68
+
69
+ # Matches identifying literals: variables, keywords, method names, etc.
70
+ def identifier_token
71
+ return false unless identifier = @chunk[IDENTIFIER, 1]
72
+ # Keywords are special identifiers tagged with their own name, 'if' will result
73
+ # in an [:IF, "if"] token
74
+ tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
75
+ @tokens[-1][0] = :PROPERTY_ACCESS if tag == :IDENTIFIER && last_value == '.'
76
+ token(tag, identifier)
77
+ @i += identifier.length
78
+ end
79
+
80
+ # Matches numbers, including decimals, hex, and exponential notation.
81
+ def number_token
82
+ return false unless number = @chunk[NUMBER, 1]
83
+ token(:NUMBER, number)
84
+ @i += number.length
85
+ end
86
+
87
+ # Matches strings, including multi-line strings.
88
+ def string_token
89
+ return false unless string = @chunk[STRING, 1]
90
+ escaped = string.gsub(MULTILINER) do |match|
91
+ @line += 1
92
+ " \\\n"
93
+ end
94
+ token(:STRING, escaped)
95
+ @i += string.length
96
+ end
97
+
98
+ # Matches interpolated JavaScript.
99
+ def js_token
100
+ return false unless script = @chunk[JS, 1]
101
+ token(:JS, script.gsub(JS_CLEANER, ''))
102
+ @i += script.length
103
+ end
104
+
105
+ # Matches regular expression literals.
106
+ def regex_token
107
+ return false unless regex = @chunk[REGEX, 1]
108
+ token(:REGEX, regex)
109
+ @i += regex.length
110
+ end
111
+
112
+ # Matches and consumes comments.
113
+ def comment_token
114
+ return false unless comment = @chunk[COMMENT, 1]
115
+ @line += comment.scan(MULTILINER).length
116
+ token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER))
117
+ token("\n", "\n")
118
+ @i += comment.length
119
+ end
120
+
121
+ # Matches and consumes non-meaningful whitespace.
122
+ def whitespace_token
123
+ return false unless whitespace = @chunk[WHITESPACE, 1]
124
+ @i += whitespace.length
125
+ end
126
+
127
+ # We treat all other single characters as a token. Eg.: ( ) , . !
128
+ # Multi-character operators are also literal tokens, so that Racc can assign
129
+ # the proper order of operations. Multiple newlines get merged together.
130
+ def literal_token
131
+ value = @chunk[NEWLINE, 1]
132
+ if value
133
+ @line += value.length
134
+ token("\n", "\n") unless last_value == "\n"
135
+ return @i += value.length
136
+ end
137
+ value = @chunk[OPERATOR, 1]
138
+ tag_parameters if value && value.match(CODE)
139
+ value ||= @chunk[0,1]
140
+ skip_following_newlines if EXP_START.include?(value)
141
+ remove_leading_newlines if EXP_END.include?(value)
142
+ token(value, value)
143
+ @i += value.length
144
+ end
145
+
146
+ # Add a token to the results, taking note of the line number, and
147
+ # immediately-preceding comment.
148
+ def token(tag, value)
149
+ @tokens << [tag, Value.new(value, @line)]
150
+ end
151
+
152
+ # Peek at the previous token.
153
+ def last_value
154
+ @tokens.last && @tokens.last[1]
155
+ end
156
+
157
+ # A source of ambiguity in our grammar was parameter lists in function
158
+ # definitions (as opposed to argument lists in function calls). Tag
159
+ # parameter identifiers in order to avoid this.
160
+ def tag_parameters
161
+ index = 0
162
+ loop do
163
+ tok = @tokens[index -= 1]
164
+ return if !tok
165
+ next if tok[0] == ','
166
+ return if tok[0] != :IDENTIFIER
167
+ tok[0] = :PARAM
168
+ end
169
+ end
170
+
171
+ # Consume and ignore newlines immediately after this point.
172
+ def skip_following_newlines
173
+ newlines = @code[(@i+1)..-1][NEWLINE, 1]
174
+ if newlines
175
+ @line += newlines.length
176
+ @i += newlines.length
177
+ end
178
+ end
179
+
180
+ # Discard newlines immediately before this point.
181
+ def remove_leading_newlines
182
+ @tokens.pop if last_value == "\n"
183
+ end
184
+
185
+ end
186
+
187
+ end