coffee-script 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,403 @@
1
+ class Parser
2
+
3
+ # Declare tokens produced by the lexer
4
+ token IF ELSE THEN UNLESS
5
+ token NUMBER STRING REGEX
6
+ token TRUE FALSE YES NO ON OFF
7
+ token IDENTIFIER PROPERTY_ACCESS
8
+ token CODE PARAM NEW RETURN
9
+ token TRY CATCH FINALLY THROW
10
+ token BREAK CONTINUE
11
+ token FOR IN WHILE
12
+ token SWITCH WHEN
13
+ token SUPER
14
+ token DELETE
15
+ token NEWLINE
16
+ token COMMENT
17
+ token JS
18
+
19
+ # Declare order of operations.
20
+ prechigh
21
+ nonassoc UMINUS NOT '!' '!!' '~' '++' '--'
22
+ left '*' '/' '%'
23
+ left '+' '-'
24
+ left '<<' '>>' '>>>'
25
+ left '&' '|' '^'
26
+ left '<=' '<' '>' '>='
27
+ right '==' '!=' IS AINT
28
+ left '&&' '||' AND OR
29
+ right '-=' '+=' '/=' '*='
30
+ right DELETE
31
+ left "."
32
+ right THROW FOR IN WHILE NEW
33
+ left UNLESS IF ELSE
34
+ left ":" '||:' '&&:'
35
+ right RETURN
36
+ preclow
37
+
38
+ # We expect 4 shift/reduce errors for optional syntax.
39
+ # There used to be 252 -- greatly improved.
40
+ expect 4
41
+
42
+ rule
43
+
44
+ # All parsing will end in this rule, being the trunk of the AST.
45
+ Root:
46
+ /* nothing */ { result = Expressions.new([]) }
47
+ | Terminator { result = Expressions.new([]) }
48
+ | Expressions { result = val[0] }
49
+ ;
50
+
51
+ # Any list of expressions or method body, seperated by line breaks or semis.
52
+ Expressions:
53
+ Expression { result = Expressions.new(val) }
54
+ | Expressions Terminator Expression { result = val[0] << val[2] }
55
+ | Expressions Terminator { result = val[0] }
56
+ | Terminator Expressions { result = val[1] }
57
+ ;
58
+
59
+ # All types of expressions in our language.
60
+ Expression:
61
+ PureExpression
62
+ | Statement
63
+ ;
64
+
65
+ # The parts that are natural JavaScript expressions.
66
+ PureExpression:
67
+ Literal
68
+ | Value
69
+ | Call
70
+ | Code
71
+ | Operation
72
+ ;
73
+
74
+ # We have to take extra care to convert these statements into expressions.
75
+ Statement:
76
+ Assign
77
+ | If
78
+ | Try
79
+ | Throw
80
+ | Return
81
+ | While
82
+ | For
83
+ | Switch
84
+ | Comment
85
+ ;
86
+
87
+ # All tokens that can terminate an expression.
88
+ Terminator:
89
+ "\n"
90
+ | ";"
91
+ ;
92
+
93
+ # All tokens that can serve to begin the second block of a multi-part expression.
94
+ Then:
95
+ THEN
96
+ | Terminator
97
+ ;
98
+
99
+ # All hard-coded values.
100
+ Literal:
101
+ NUMBER { result = LiteralNode.new(val[0]) }
102
+ | STRING { result = LiteralNode.new(val[0]) }
103
+ | JS { result = LiteralNode.new(val[0]) }
104
+ | REGEX { result = LiteralNode.new(val[0]) }
105
+ | BREAK { result = LiteralNode.new(val[0]) }
106
+ | CONTINUE { result = LiteralNode.new(val[0]) }
107
+ | TRUE { result = LiteralNode.new(true) }
108
+ | FALSE { result = LiteralNode.new(false) }
109
+ | YES { result = LiteralNode.new(true) }
110
+ | NO { result = LiteralNode.new(false) }
111
+ | ON { result = LiteralNode.new(true) }
112
+ | OFF { result = LiteralNode.new(false) }
113
+ ;
114
+
115
+ # Assignment to a variable.
116
+ Assign:
117
+ Value ":" Expression { result = AssignNode.new(val[0], val[2]) }
118
+ ;
119
+
120
+ # Assignment within an object literal.
121
+ AssignObj:
122
+ IDENTIFIER ":" Expression { result = AssignNode.new(val[0], val[2], :object) }
123
+ | Comment { result = val[0] }
124
+ ;
125
+
126
+ # A return statement.
127
+ Return:
128
+ RETURN Expression { result = ReturnNode.new(val[1]) }
129
+ ;
130
+
131
+ # A comment.
132
+ Comment:
133
+ COMMENT { result = CommentNode.new(val[0]) }
134
+ ;
135
+
136
+ # Arithmetic and logical operators
137
+ # For Ruby's Operator precedence, see:
138
+ # https://www.cs.auckland.ac.nz/references/ruby/ProgrammingRuby/language.html
139
+ Operation:
140
+ '!' Expression { result = OpNode.new(val[0], val[1]) }
141
+ | '!!' Expression { result = OpNode.new(val[0], val[1]) }
142
+ | '-' Expression = UMINUS { result = OpNode.new(val[0], val[1]) }
143
+ | NOT Expression { result = OpNode.new(val[0], val[1]) }
144
+ | '~' Expression { result = OpNode.new(val[0], val[1]) }
145
+ | '--' Expression { result = OpNode.new(val[0], val[1]) }
146
+ | '++' Expression { result = OpNode.new(val[0], val[1]) }
147
+ | Expression '--' { result = OpNode.new(val[1], val[0], nil, true) }
148
+ | Expression '++' { result = OpNode.new(val[1], val[0], nil, true) }
149
+
150
+ | Expression '*' Expression { result = OpNode.new(val[1], val[0], val[2]) }
151
+ | Expression '/' Expression { result = OpNode.new(val[1], val[0], val[2]) }
152
+ | Expression '%' Expression { result = OpNode.new(val[1], val[0], val[2]) }
153
+
154
+ | Expression '+' Expression { result = OpNode.new(val[1], val[0], val[2]) }
155
+ | Expression '-' Expression { result = OpNode.new(val[1], val[0], val[2]) }
156
+
157
+ | Expression '<<' Expression { result = OpNode.new(val[1], val[0], val[2]) }
158
+ | Expression '>>' Expression { result = OpNode.new(val[1], val[0], val[2]) }
159
+ | Expression '>>>' Expression { result = OpNode.new(val[1], val[0], val[2]) }
160
+
161
+ | Expression '&' Expression { result = OpNode.new(val[1], val[0], val[2]) }
162
+ | Expression '|' Expression { result = OpNode.new(val[1], val[0], val[2]) }
163
+ | Expression '^' Expression { result = OpNode.new(val[1], val[0], val[2]) }
164
+
165
+ | Expression '<=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
166
+ | Expression '<' Expression { result = OpNode.new(val[1], val[0], val[2]) }
167
+ | Expression '>' Expression { result = OpNode.new(val[1], val[0], val[2]) }
168
+ | Expression '>=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
169
+
170
+ | Expression '==' Expression { result = OpNode.new(val[1], val[0], val[2]) }
171
+ | Expression '!=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
172
+ | Expression IS Expression { result = OpNode.new(val[1], val[0], val[2]) }
173
+ | Expression AINT Expression { result = OpNode.new(val[1], val[0], val[2]) }
174
+
175
+ | Expression '&&' Expression { result = OpNode.new(val[1], val[0], val[2]) }
176
+ | Expression '||' Expression { result = OpNode.new(val[1], val[0], val[2]) }
177
+ | Expression AND Expression { result = OpNode.new(val[1], val[0], val[2]) }
178
+ | Expression OR Expression { result = OpNode.new(val[1], val[0], val[2]) }
179
+
180
+ | Expression '-=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
181
+ | Expression '+=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
182
+ | Expression '/=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
183
+ | Expression '*=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
184
+ | Expression '||:' Expression { result = OpNode.new(val[1], val[0], val[2]) }
185
+ | Expression '&&:' Expression { result = OpNode.new(val[1], val[0], val[2]) }
186
+
187
+ | DELETE Expression { result = OpNode.new(val[0], val[1]) }
188
+ ;
189
+
190
+ # Function definition.
191
+ Code:
192
+ ParamList "=>" CodeBody "." { result = CodeNode.new(val[0], val[2]) }
193
+ | "=>" CodeBody "." { result = CodeNode.new([], val[1]) }
194
+ ;
195
+
196
+ # The body of a function.
197
+ CodeBody:
198
+ /* nothing */ { result = Expressions.new([]) }
199
+ | Expressions { result = val[0] }
200
+ ;
201
+
202
+ # The parameters to a function definition.
203
+ ParamList:
204
+ PARAM { result = val }
205
+ | ParamList "," PARAM { result = val[0] << val[2] }
206
+ ;
207
+
208
+ # Expressions that can be treated as values.
209
+ Value:
210
+ IDENTIFIER { result = ValueNode.new(val[0]) }
211
+ | Array { result = ValueNode.new(val[0]) }
212
+ | Object { result = ValueNode.new(val[0]) }
213
+ | Parenthetical { result = ValueNode.new(val[0]) }
214
+ | Value Accessor { result = val[0] << val[1] }
215
+ | Invocation Accessor { result = ValueNode.new(val[0], [val[1]]) }
216
+ ;
217
+
218
+ # Accessing into an object or array, through dot or index notation.
219
+ Accessor:
220
+ PROPERTY_ACCESS IDENTIFIER { result = AccessorNode.new(val[1]) }
221
+ | Index { result = val[0] }
222
+ | Slice { result = val[0] }
223
+ ;
224
+
225
+ # Indexing into an object or array.
226
+ Index:
227
+ "[" Expression "]" { result = IndexNode.new(val[1]) }
228
+ ;
229
+
230
+ # Array slice literal.
231
+ Slice:
232
+ "[" Expression "," Expression "]" { result = SliceNode.new(val[1], val[3]) }
233
+ ;
234
+
235
+ # An object literal.
236
+ Object:
237
+ "{" AssignList "}" { result = ObjectNode.new(val[1]) }
238
+ ;
239
+
240
+ # Assignment within an object literal (comma or newline separated).
241
+ AssignList:
242
+ /* nothing */ { result = []}
243
+ | AssignObj { result = val }
244
+ | AssignList "," AssignObj { result = val[0] << val[2] }
245
+ | AssignList Terminator AssignObj { result = val[0] << val[2] }
246
+ ;
247
+
248
+ # All flavors of function call (instantiation, super, and regular).
249
+ Call:
250
+ Invocation { result = val[0] }
251
+ | NEW Invocation { result = val[1].new_instance }
252
+ | Super { result = val[0] }
253
+ ;
254
+
255
+ # A generic function invocation.
256
+ Invocation:
257
+ Value "(" ArgList ")" { result = CallNode.new(val[0], val[2]) }
258
+ ;
259
+
260
+ # Calling super.
261
+ Super:
262
+ SUPER "(" ArgList ")" { result = CallNode.new(:super, val[2]) }
263
+ ;
264
+
265
+ # The array literal.
266
+ Array:
267
+ "[" ArgList "]" { result = ArrayNode.new(val[1]) }
268
+ ;
269
+
270
+ # A list of arguments to a method call, or as the contents of an array.
271
+ ArgList:
272
+ /* nothing */ { result = [] }
273
+ | Expression { result = val }
274
+ | ArgList "," Expression { result = val[0] << val[2] }
275
+ | ArgList Terminator Expression { result = val[0] << val[2] }
276
+ ;
277
+
278
+ # Try/catch/finally exception handling blocks.
279
+ Try:
280
+ TRY Expressions Catch "." { result = TryNode.new(val[1], val[2][0], val[2][1]) }
281
+ | TRY Expressions Catch
282
+ FINALLY Expressions "." { result = TryNode.new(val[1], val[2][0], val[2][1], val[4]) }
283
+ ;
284
+
285
+ # A catch clause.
286
+ Catch:
287
+ /* nothing */ { result = [nil, nil] }
288
+ | CATCH IDENTIFIER Expressions { result = [val[1], val[2]] }
289
+ ;
290
+
291
+ # Throw an exception.
292
+ Throw:
293
+ THROW Expression { result = ThrowNode.new(val[1]) }
294
+ ;
295
+
296
+ # Parenthetical expressions.
297
+ Parenthetical:
298
+ "(" Expressions ")" { result = ParentheticalNode.new(val[1]) }
299
+ ;
300
+
301
+ # The while loop. (there is no do..while).
302
+ While:
303
+ WHILE Expression Then
304
+ Expressions "." { result = WhileNode.new(val[1], val[3]) }
305
+ ;
306
+
307
+ # Array comprehensions, including guard and current index.
308
+ For:
309
+ Expression FOR IDENTIFIER
310
+ IN PureExpression "." { result = ForNode.new(val[0], val[4], val[2], nil) }
311
+ | Expression FOR
312
+ IDENTIFIER "," IDENTIFIER
313
+ IN PureExpression "." { result = ForNode.new(val[0], val[6], val[2], nil, val[4]) }
314
+ | Expression FOR IDENTIFIER
315
+ IN PureExpression
316
+ IF Expression "." { result = ForNode.new(val[0], val[4], val[2], val[6]) }
317
+ | Expression FOR
318
+ IDENTIFIER "," IDENTIFIER
319
+ IN PureExpression
320
+ IF Expression "." { result = ForNode.new(val[0], val[6], val[2], val[8], val[4]) }
321
+ ;
322
+
323
+ # Switch/When blocks.
324
+ Switch:
325
+ SWITCH Expression Then
326
+ Whens "." { result = val[3].rewrite_condition(val[1]) }
327
+ | SWITCH Expression Then
328
+ Whens ELSE Expressions "." { result = val[3].rewrite_condition(val[1]).add_else(val[5]) }
329
+ ;
330
+
331
+ # The inner list of whens.
332
+ Whens:
333
+ When { result = val[0] }
334
+ | Whens When { result = val[0] << val[1] }
335
+ ;
336
+
337
+ # An individual when.
338
+ When:
339
+ WHEN Expression Then Expressions { result = IfNode.new(val[1], val[3]) }
340
+ ;
341
+
342
+ # All of the following nutso if-else destructuring is to make the
343
+ # grammar expand unambiguously.
344
+
345
+ # An elsif portion of an if-else block.
346
+ ElsIf:
347
+ ELSE IF Expression
348
+ Then Expressions { result = IfNode.new(val[2], val[4]) }
349
+ ;
350
+
351
+ # Multiple elsifs can be chained together.
352
+ ElsIfs:
353
+ ElsIf { result = val[0] }
354
+ | ElsIfs ElsIf { result = val[0].add_else(val[1]) }
355
+ ;
356
+
357
+ # Terminating else bodies are strictly optional.
358
+ ElseBody
359
+ "." { result = nil }
360
+ | ELSE Expressions "." { result = val[1] }
361
+ ;
362
+
363
+ # All the alternatives for ending an if-else block.
364
+ IfEnd:
365
+ ElseBody { result = val[0] }
366
+ | ElsIfs ElseBody { result = val[0].add_else(val[1]) }
367
+ ;
368
+
369
+ # The full complement of if blocks, including postfix one-liner ifs and unlesses.
370
+ If:
371
+ IF Expression
372
+ Then Expressions IfEnd { result = IfNode.new(val[1], val[3], val[4]) }
373
+ | Expression IF Expression { result = IfNode.new(val[2], Expressions.new([val[0]]), nil, {:statement => true}) }
374
+ | Expression UNLESS Expression { result = IfNode.new(val[2], Expressions.new([val[0]]), nil, {:statement => true, :invert => true}) }
375
+ ;
376
+
377
+ end
378
+
379
+ ---- header
380
+ module CoffeeScript
381
+
382
+ ---- inner
383
+ # Lex and parse a CoffeeScript.
384
+ def parse(code)
385
+ # Uncomment the following line to enable grammar debugging, in combination
386
+ # with the -g flag in the Rake build task.
387
+ # @yydebug = true
388
+ @tokens = Lexer.new.tokenize(code)
389
+ do_parse
390
+ end
391
+
392
+ # Retrieve the next token from the list.
393
+ def next_token
394
+ @tokens.shift
395
+ end
396
+
397
+ # Raise a custom error class that knows about line numbers.
398
+ def on_error(error_token_id, error_value, value_stack)
399
+ raise ParseError.new(token_to_str(error_token_id), error_value, value_stack)
400
+ end
401
+
402
+ ---- footer
403
+ end
@@ -0,0 +1,187 @@
1
+ module CoffeeScript
2
+
3
+ # The lexer reads a stream of CoffeeScript and divvys it up into tagged
4
+ # tokens. A minor bit of the ambiguity in the grammar has been avoided by
5
+ # pushing some extra smarts into the Lexer.
6
+ class Lexer
7
+
8
+ # The list of keywords passed verbatim to the parser.
9
+ KEYWORDS = ["if", "else", "then", "unless",
10
+ "true", "false", "yes", "no", "on", "off",
11
+ "and", "or", "is", "aint", "not",
12
+ "new", "return",
13
+ "try", "catch", "finally", "throw",
14
+ "break", "continue",
15
+ "for", "in", "while",
16
+ "switch", "when",
17
+ "super",
18
+ "delete"]
19
+
20
+ # Token matching regexes.
21
+ IDENTIFIER = /\A([a-zA-Z$_]\w*)/
22
+ NUMBER = /\A\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?))\b/i
23
+ STRING = /\A(""|''|"(.*?)[^\\]"|'(.*?)[^\\]')/m
24
+ JS = /\A(``|`(.*?)[^\\]`)/m
25
+ OPERATOR = /\A([+\*&|\/\-%=<>:!]+)/
26
+ WHITESPACE = /\A([ \t\r]+)/
27
+ NEWLINE = /\A(\n+)/
28
+ COMMENT = /\A((#[^\n]*\s*)+)/m
29
+ CODE = /\A(=>)/
30
+ REGEX = /\A(\/(.*?)[^\\]\/[imgy]{0,4})/
31
+
32
+ # Token cleaning regexes.
33
+ JS_CLEANER = /(\A`|`\Z)/
34
+ MULTILINER = /\n/
35
+ COMMENT_CLEANER = /(^\s*#|\n\s*$)/
36
+
37
+ # Tokens that always constitute the start of an expression.
38
+ EXP_START = ['{', '(', '[']
39
+
40
+ # Tokens that always constitute the end of an expression.
41
+ EXP_END = ['}', ')', ']']
42
+
43
+ # Scan by attempting to match tokens one character at a time. Slow and steady.
44
+ def tokenize(code)
45
+ @code = code.chomp # Cleanup code by remove extra line breaks
46
+ @i = 0 # Current character position we're parsing
47
+ @line = 1 # The current line.
48
+ @tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
49
+ while @i < @code.length
50
+ @chunk = @code[@i..-1]
51
+ extract_next_token
52
+ end
53
+ @tokens
54
+ end
55
+
56
+ # At every position, run this list of match attempts, short-circuiting if
57
+ # any of them succeed.
58
+ def extract_next_token
59
+ return if identifier_token
60
+ return if number_token
61
+ return if string_token
62
+ return if js_token
63
+ return if regex_token
64
+ return if comment_token
65
+ return if whitespace_token
66
+ return literal_token
67
+ end
68
+
69
+ # Matches identifying literals: variables, keywords, method names, etc.
70
+ def identifier_token
71
+ return false unless identifier = @chunk[IDENTIFIER, 1]
72
+ # Keywords are special identifiers tagged with their own name, 'if' will result
73
+ # in an [:IF, "if"] token
74
+ tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
75
+ @tokens[-1][0] = :PROPERTY_ACCESS if tag == :IDENTIFIER && last_value == '.'
76
+ token(tag, identifier)
77
+ @i += identifier.length
78
+ end
79
+
80
+ # Matches numbers, including decimals, hex, and exponential notation.
81
+ def number_token
82
+ return false unless number = @chunk[NUMBER, 1]
83
+ token(:NUMBER, number)
84
+ @i += number.length
85
+ end
86
+
87
+ # Matches strings, including multi-line strings.
88
+ def string_token
89
+ return false unless string = @chunk[STRING, 1]
90
+ escaped = string.gsub(MULTILINER) do |match|
91
+ @line += 1
92
+ " \\\n"
93
+ end
94
+ token(:STRING, escaped)
95
+ @i += string.length
96
+ end
97
+
98
+ # Matches interpolated JavaScript.
99
+ def js_token
100
+ return false unless script = @chunk[JS, 1]
101
+ token(:JS, script.gsub(JS_CLEANER, ''))
102
+ @i += script.length
103
+ end
104
+
105
+ # Matches regular expression literals.
106
+ def regex_token
107
+ return false unless regex = @chunk[REGEX, 1]
108
+ token(:REGEX, regex)
109
+ @i += regex.length
110
+ end
111
+
112
+ # Matches and consumes comments.
113
+ def comment_token
114
+ return false unless comment = @chunk[COMMENT, 1]
115
+ @line += comment.scan(MULTILINER).length
116
+ token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER))
117
+ token("\n", "\n")
118
+ @i += comment.length
119
+ end
120
+
121
+ # Matches and consumes non-meaningful whitespace.
122
+ def whitespace_token
123
+ return false unless whitespace = @chunk[WHITESPACE, 1]
124
+ @i += whitespace.length
125
+ end
126
+
127
+ # We treat all other single characters as a token. Eg.: ( ) , . !
128
+ # Multi-character operators are also literal tokens, so that Racc can assign
129
+ # the proper order of operations. Multiple newlines get merged together.
130
+ def literal_token
131
+ value = @chunk[NEWLINE, 1]
132
+ if value
133
+ @line += value.length
134
+ token("\n", "\n") unless last_value == "\n"
135
+ return @i += value.length
136
+ end
137
+ value = @chunk[OPERATOR, 1]
138
+ tag_parameters if value && value.match(CODE)
139
+ value ||= @chunk[0,1]
140
+ skip_following_newlines if EXP_START.include?(value)
141
+ remove_leading_newlines if EXP_END.include?(value)
142
+ token(value, value)
143
+ @i += value.length
144
+ end
145
+
146
+ # Add a token to the results, taking note of the line number, and
147
+ # immediately-preceding comment.
148
+ def token(tag, value)
149
+ @tokens << [tag, Value.new(value, @line)]
150
+ end
151
+
152
+ # Peek at the previous token.
153
+ def last_value
154
+ @tokens.last && @tokens.last[1]
155
+ end
156
+
157
+ # A source of ambiguity in our grammar was parameter lists in function
158
+ # definitions (as opposed to argument lists in function calls). Tag
159
+ # parameter identifiers in order to avoid this.
160
+ def tag_parameters
161
+ index = 0
162
+ loop do
163
+ tok = @tokens[index -= 1]
164
+ return if !tok
165
+ next if tok[0] == ','
166
+ return if tok[0] != :IDENTIFIER
167
+ tok[0] = :PARAM
168
+ end
169
+ end
170
+
171
+ # Consume and ignore newlines immediately after this point.
172
+ def skip_following_newlines
173
+ newlines = @code[(@i+1)..-1][NEWLINE, 1]
174
+ if newlines
175
+ @line += newlines.length
176
+ @i += newlines.length
177
+ end
178
+ end
179
+
180
+ # Discard newlines immediately before this point.
181
+ def remove_leading_newlines
182
+ @tokens.pop if last_value == "\n"
183
+ end
184
+
185
+ end
186
+
187
+ end