coffee-script 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +22 -0
- data/README +38 -0
- data/bin/coffee-script +5 -0
- data/coffee-script.gemspec +21 -0
- data/examples/code.cs +173 -0
- data/examples/documents.cs +72 -0
- data/examples/poignant.cs +153 -0
- data/examples/syntax_errors.cs +20 -0
- data/examples/underscore.cs +597 -0
- data/lib/coffee-script.rb +20 -0
- data/lib/coffee_script/CoffeeScript.tmbundle/Preferences/CoffeeScript.tmPreferences +24 -0
- data/lib/coffee_script/CoffeeScript.tmbundle/Syntaxes/CoffeeScript.tmLanguage +329 -0
- data/lib/coffee_script/CoffeeScript.tmbundle/info.plist +10 -0
- data/lib/coffee_script/command_line.rb +183 -0
- data/lib/coffee_script/grammar.y +403 -0
- data/lib/coffee_script/lexer.rb +187 -0
- data/lib/coffee_script/nodes.rb +680 -0
- data/lib/coffee_script/parse_error.rb +22 -0
- data/lib/coffee_script/parser.rb +1987 -0
- data/lib/coffee_script/scope.rb +45 -0
- data/lib/coffee_script/value.rb +42 -0
- metadata +75 -0
@@ -0,0 +1,403 @@
|
|
1
|
+
class Parser
|
2
|
+
|
3
|
+
# Declare tokens produced by the lexer
|
4
|
+
token IF ELSE THEN UNLESS
|
5
|
+
token NUMBER STRING REGEX
|
6
|
+
token TRUE FALSE YES NO ON OFF
|
7
|
+
token IDENTIFIER PROPERTY_ACCESS
|
8
|
+
token CODE PARAM NEW RETURN
|
9
|
+
token TRY CATCH FINALLY THROW
|
10
|
+
token BREAK CONTINUE
|
11
|
+
token FOR IN WHILE
|
12
|
+
token SWITCH WHEN
|
13
|
+
token SUPER
|
14
|
+
token DELETE
|
15
|
+
token NEWLINE
|
16
|
+
token COMMENT
|
17
|
+
token JS
|
18
|
+
|
19
|
+
# Declare order of operations.
|
20
|
+
prechigh
|
21
|
+
nonassoc UMINUS NOT '!' '!!' '~' '++' '--'
|
22
|
+
left '*' '/' '%'
|
23
|
+
left '+' '-'
|
24
|
+
left '<<' '>>' '>>>'
|
25
|
+
left '&' '|' '^'
|
26
|
+
left '<=' '<' '>' '>='
|
27
|
+
right '==' '!=' IS AINT
|
28
|
+
left '&&' '||' AND OR
|
29
|
+
right '-=' '+=' '/=' '*='
|
30
|
+
right DELETE
|
31
|
+
left "."
|
32
|
+
right THROW FOR IN WHILE NEW
|
33
|
+
left UNLESS IF ELSE
|
34
|
+
left ":" '||:' '&&:'
|
35
|
+
right RETURN
|
36
|
+
preclow
|
37
|
+
|
38
|
+
# We expect 4 shift/reduce errors for optional syntax.
|
39
|
+
# There used to be 252 -- greatly improved.
|
40
|
+
expect 4
|
41
|
+
|
42
|
+
rule
|
43
|
+
|
44
|
+
# All parsing will end in this rule, being the trunk of the AST.
|
45
|
+
Root:
|
46
|
+
/* nothing */ { result = Expressions.new([]) }
|
47
|
+
| Terminator { result = Expressions.new([]) }
|
48
|
+
| Expressions { result = val[0] }
|
49
|
+
;
|
50
|
+
|
51
|
+
# Any list of expressions or method body, seperated by line breaks or semis.
|
52
|
+
Expressions:
|
53
|
+
Expression { result = Expressions.new(val) }
|
54
|
+
| Expressions Terminator Expression { result = val[0] << val[2] }
|
55
|
+
| Expressions Terminator { result = val[0] }
|
56
|
+
| Terminator Expressions { result = val[1] }
|
57
|
+
;
|
58
|
+
|
59
|
+
# All types of expressions in our language.
|
60
|
+
Expression:
|
61
|
+
PureExpression
|
62
|
+
| Statement
|
63
|
+
;
|
64
|
+
|
65
|
+
# The parts that are natural JavaScript expressions.
|
66
|
+
PureExpression:
|
67
|
+
Literal
|
68
|
+
| Value
|
69
|
+
| Call
|
70
|
+
| Code
|
71
|
+
| Operation
|
72
|
+
;
|
73
|
+
|
74
|
+
# We have to take extra care to convert these statements into expressions.
|
75
|
+
Statement:
|
76
|
+
Assign
|
77
|
+
| If
|
78
|
+
| Try
|
79
|
+
| Throw
|
80
|
+
| Return
|
81
|
+
| While
|
82
|
+
| For
|
83
|
+
| Switch
|
84
|
+
| Comment
|
85
|
+
;
|
86
|
+
|
87
|
+
# All tokens that can terminate an expression.
|
88
|
+
Terminator:
|
89
|
+
"\n"
|
90
|
+
| ";"
|
91
|
+
;
|
92
|
+
|
93
|
+
# All tokens that can serve to begin the second block of a multi-part expression.
|
94
|
+
Then:
|
95
|
+
THEN
|
96
|
+
| Terminator
|
97
|
+
;
|
98
|
+
|
99
|
+
# All hard-coded values.
|
100
|
+
Literal:
|
101
|
+
NUMBER { result = LiteralNode.new(val[0]) }
|
102
|
+
| STRING { result = LiteralNode.new(val[0]) }
|
103
|
+
| JS { result = LiteralNode.new(val[0]) }
|
104
|
+
| REGEX { result = LiteralNode.new(val[0]) }
|
105
|
+
| BREAK { result = LiteralNode.new(val[0]) }
|
106
|
+
| CONTINUE { result = LiteralNode.new(val[0]) }
|
107
|
+
| TRUE { result = LiteralNode.new(true) }
|
108
|
+
| FALSE { result = LiteralNode.new(false) }
|
109
|
+
| YES { result = LiteralNode.new(true) }
|
110
|
+
| NO { result = LiteralNode.new(false) }
|
111
|
+
| ON { result = LiteralNode.new(true) }
|
112
|
+
| OFF { result = LiteralNode.new(false) }
|
113
|
+
;
|
114
|
+
|
115
|
+
# Assignment to a variable.
|
116
|
+
Assign:
|
117
|
+
Value ":" Expression { result = AssignNode.new(val[0], val[2]) }
|
118
|
+
;
|
119
|
+
|
120
|
+
# Assignment within an object literal.
|
121
|
+
AssignObj:
|
122
|
+
IDENTIFIER ":" Expression { result = AssignNode.new(val[0], val[2], :object) }
|
123
|
+
| Comment { result = val[0] }
|
124
|
+
;
|
125
|
+
|
126
|
+
# A return statement.
|
127
|
+
Return:
|
128
|
+
RETURN Expression { result = ReturnNode.new(val[1]) }
|
129
|
+
;
|
130
|
+
|
131
|
+
# A comment.
|
132
|
+
Comment:
|
133
|
+
COMMENT { result = CommentNode.new(val[0]) }
|
134
|
+
;
|
135
|
+
|
136
|
+
# Arithmetic and logical operators
|
137
|
+
# For Ruby's Operator precedence, see:
|
138
|
+
# https://www.cs.auckland.ac.nz/references/ruby/ProgrammingRuby/language.html
|
139
|
+
Operation:
|
140
|
+
'!' Expression { result = OpNode.new(val[0], val[1]) }
|
141
|
+
| '!!' Expression { result = OpNode.new(val[0], val[1]) }
|
142
|
+
| '-' Expression = UMINUS { result = OpNode.new(val[0], val[1]) }
|
143
|
+
| NOT Expression { result = OpNode.new(val[0], val[1]) }
|
144
|
+
| '~' Expression { result = OpNode.new(val[0], val[1]) }
|
145
|
+
| '--' Expression { result = OpNode.new(val[0], val[1]) }
|
146
|
+
| '++' Expression { result = OpNode.new(val[0], val[1]) }
|
147
|
+
| Expression '--' { result = OpNode.new(val[1], val[0], nil, true) }
|
148
|
+
| Expression '++' { result = OpNode.new(val[1], val[0], nil, true) }
|
149
|
+
|
150
|
+
| Expression '*' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
151
|
+
| Expression '/' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
152
|
+
| Expression '%' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
153
|
+
|
154
|
+
| Expression '+' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
155
|
+
| Expression '-' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
156
|
+
|
157
|
+
| Expression '<<' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
158
|
+
| Expression '>>' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
159
|
+
| Expression '>>>' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
160
|
+
|
161
|
+
| Expression '&' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
162
|
+
| Expression '|' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
163
|
+
| Expression '^' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
164
|
+
|
165
|
+
| Expression '<=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
166
|
+
| Expression '<' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
167
|
+
| Expression '>' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
168
|
+
| Expression '>=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
169
|
+
|
170
|
+
| Expression '==' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
171
|
+
| Expression '!=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
172
|
+
| Expression IS Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
173
|
+
| Expression AINT Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
174
|
+
|
175
|
+
| Expression '&&' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
176
|
+
| Expression '||' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
177
|
+
| Expression AND Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
178
|
+
| Expression OR Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
179
|
+
|
180
|
+
| Expression '-=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
181
|
+
| Expression '+=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
182
|
+
| Expression '/=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
183
|
+
| Expression '*=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
184
|
+
| Expression '||:' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
185
|
+
| Expression '&&:' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
186
|
+
|
187
|
+
| DELETE Expression { result = OpNode.new(val[0], val[1]) }
|
188
|
+
;
|
189
|
+
|
190
|
+
# Function definition.
|
191
|
+
Code:
|
192
|
+
ParamList "=>" CodeBody "." { result = CodeNode.new(val[0], val[2]) }
|
193
|
+
| "=>" CodeBody "." { result = CodeNode.new([], val[1]) }
|
194
|
+
;
|
195
|
+
|
196
|
+
# The body of a function.
|
197
|
+
CodeBody:
|
198
|
+
/* nothing */ { result = Expressions.new([]) }
|
199
|
+
| Expressions { result = val[0] }
|
200
|
+
;
|
201
|
+
|
202
|
+
# The parameters to a function definition.
|
203
|
+
ParamList:
|
204
|
+
PARAM { result = val }
|
205
|
+
| ParamList "," PARAM { result = val[0] << val[2] }
|
206
|
+
;
|
207
|
+
|
208
|
+
# Expressions that can be treated as values.
|
209
|
+
Value:
|
210
|
+
IDENTIFIER { result = ValueNode.new(val[0]) }
|
211
|
+
| Array { result = ValueNode.new(val[0]) }
|
212
|
+
| Object { result = ValueNode.new(val[0]) }
|
213
|
+
| Parenthetical { result = ValueNode.new(val[0]) }
|
214
|
+
| Value Accessor { result = val[0] << val[1] }
|
215
|
+
| Invocation Accessor { result = ValueNode.new(val[0], [val[1]]) }
|
216
|
+
;
|
217
|
+
|
218
|
+
# Accessing into an object or array, through dot or index notation.
|
219
|
+
Accessor:
|
220
|
+
PROPERTY_ACCESS IDENTIFIER { result = AccessorNode.new(val[1]) }
|
221
|
+
| Index { result = val[0] }
|
222
|
+
| Slice { result = val[0] }
|
223
|
+
;
|
224
|
+
|
225
|
+
# Indexing into an object or array.
|
226
|
+
Index:
|
227
|
+
"[" Expression "]" { result = IndexNode.new(val[1]) }
|
228
|
+
;
|
229
|
+
|
230
|
+
# Array slice literal.
|
231
|
+
Slice:
|
232
|
+
"[" Expression "," Expression "]" { result = SliceNode.new(val[1], val[3]) }
|
233
|
+
;
|
234
|
+
|
235
|
+
# An object literal.
|
236
|
+
Object:
|
237
|
+
"{" AssignList "}" { result = ObjectNode.new(val[1]) }
|
238
|
+
;
|
239
|
+
|
240
|
+
# Assignment within an object literal (comma or newline separated).
|
241
|
+
AssignList:
|
242
|
+
/* nothing */ { result = []}
|
243
|
+
| AssignObj { result = val }
|
244
|
+
| AssignList "," AssignObj { result = val[0] << val[2] }
|
245
|
+
| AssignList Terminator AssignObj { result = val[0] << val[2] }
|
246
|
+
;
|
247
|
+
|
248
|
+
# All flavors of function call (instantiation, super, and regular).
|
249
|
+
Call:
|
250
|
+
Invocation { result = val[0] }
|
251
|
+
| NEW Invocation { result = val[1].new_instance }
|
252
|
+
| Super { result = val[0] }
|
253
|
+
;
|
254
|
+
|
255
|
+
# A generic function invocation.
|
256
|
+
Invocation:
|
257
|
+
Value "(" ArgList ")" { result = CallNode.new(val[0], val[2]) }
|
258
|
+
;
|
259
|
+
|
260
|
+
# Calling super.
|
261
|
+
Super:
|
262
|
+
SUPER "(" ArgList ")" { result = CallNode.new(:super, val[2]) }
|
263
|
+
;
|
264
|
+
|
265
|
+
# The array literal.
|
266
|
+
Array:
|
267
|
+
"[" ArgList "]" { result = ArrayNode.new(val[1]) }
|
268
|
+
;
|
269
|
+
|
270
|
+
# A list of arguments to a method call, or as the contents of an array.
|
271
|
+
ArgList:
|
272
|
+
/* nothing */ { result = [] }
|
273
|
+
| Expression { result = val }
|
274
|
+
| ArgList "," Expression { result = val[0] << val[2] }
|
275
|
+
| ArgList Terminator Expression { result = val[0] << val[2] }
|
276
|
+
;
|
277
|
+
|
278
|
+
# Try/catch/finally exception handling blocks.
|
279
|
+
Try:
|
280
|
+
TRY Expressions Catch "." { result = TryNode.new(val[1], val[2][0], val[2][1]) }
|
281
|
+
| TRY Expressions Catch
|
282
|
+
FINALLY Expressions "." { result = TryNode.new(val[1], val[2][0], val[2][1], val[4]) }
|
283
|
+
;
|
284
|
+
|
285
|
+
# A catch clause.
|
286
|
+
Catch:
|
287
|
+
/* nothing */ { result = [nil, nil] }
|
288
|
+
| CATCH IDENTIFIER Expressions { result = [val[1], val[2]] }
|
289
|
+
;
|
290
|
+
|
291
|
+
# Throw an exception.
|
292
|
+
Throw:
|
293
|
+
THROW Expression { result = ThrowNode.new(val[1]) }
|
294
|
+
;
|
295
|
+
|
296
|
+
# Parenthetical expressions.
|
297
|
+
Parenthetical:
|
298
|
+
"(" Expressions ")" { result = ParentheticalNode.new(val[1]) }
|
299
|
+
;
|
300
|
+
|
301
|
+
# The while loop. (there is no do..while).
|
302
|
+
While:
|
303
|
+
WHILE Expression Then
|
304
|
+
Expressions "." { result = WhileNode.new(val[1], val[3]) }
|
305
|
+
;
|
306
|
+
|
307
|
+
# Array comprehensions, including guard and current index.
|
308
|
+
For:
|
309
|
+
Expression FOR IDENTIFIER
|
310
|
+
IN PureExpression "." { result = ForNode.new(val[0], val[4], val[2], nil) }
|
311
|
+
| Expression FOR
|
312
|
+
IDENTIFIER "," IDENTIFIER
|
313
|
+
IN PureExpression "." { result = ForNode.new(val[0], val[6], val[2], nil, val[4]) }
|
314
|
+
| Expression FOR IDENTIFIER
|
315
|
+
IN PureExpression
|
316
|
+
IF Expression "." { result = ForNode.new(val[0], val[4], val[2], val[6]) }
|
317
|
+
| Expression FOR
|
318
|
+
IDENTIFIER "," IDENTIFIER
|
319
|
+
IN PureExpression
|
320
|
+
IF Expression "." { result = ForNode.new(val[0], val[6], val[2], val[8], val[4]) }
|
321
|
+
;
|
322
|
+
|
323
|
+
# Switch/When blocks.
|
324
|
+
Switch:
|
325
|
+
SWITCH Expression Then
|
326
|
+
Whens "." { result = val[3].rewrite_condition(val[1]) }
|
327
|
+
| SWITCH Expression Then
|
328
|
+
Whens ELSE Expressions "." { result = val[3].rewrite_condition(val[1]).add_else(val[5]) }
|
329
|
+
;
|
330
|
+
|
331
|
+
# The inner list of whens.
|
332
|
+
Whens:
|
333
|
+
When { result = val[0] }
|
334
|
+
| Whens When { result = val[0] << val[1] }
|
335
|
+
;
|
336
|
+
|
337
|
+
# An individual when.
|
338
|
+
When:
|
339
|
+
WHEN Expression Then Expressions { result = IfNode.new(val[1], val[3]) }
|
340
|
+
;
|
341
|
+
|
342
|
+
# All of the following nutso if-else destructuring is to make the
|
343
|
+
# grammar expand unambiguously.
|
344
|
+
|
345
|
+
# An elsif portion of an if-else block.
|
346
|
+
ElsIf:
|
347
|
+
ELSE IF Expression
|
348
|
+
Then Expressions { result = IfNode.new(val[2], val[4]) }
|
349
|
+
;
|
350
|
+
|
351
|
+
# Multiple elsifs can be chained together.
|
352
|
+
ElsIfs:
|
353
|
+
ElsIf { result = val[0] }
|
354
|
+
| ElsIfs ElsIf { result = val[0].add_else(val[1]) }
|
355
|
+
;
|
356
|
+
|
357
|
+
# Terminating else bodies are strictly optional.
|
358
|
+
ElseBody
|
359
|
+
"." { result = nil }
|
360
|
+
| ELSE Expressions "." { result = val[1] }
|
361
|
+
;
|
362
|
+
|
363
|
+
# All the alternatives for ending an if-else block.
|
364
|
+
IfEnd:
|
365
|
+
ElseBody { result = val[0] }
|
366
|
+
| ElsIfs ElseBody { result = val[0].add_else(val[1]) }
|
367
|
+
;
|
368
|
+
|
369
|
+
# The full complement of if blocks, including postfix one-liner ifs and unlesses.
|
370
|
+
If:
|
371
|
+
IF Expression
|
372
|
+
Then Expressions IfEnd { result = IfNode.new(val[1], val[3], val[4]) }
|
373
|
+
| Expression IF Expression { result = IfNode.new(val[2], Expressions.new([val[0]]), nil, {:statement => true}) }
|
374
|
+
| Expression UNLESS Expression { result = IfNode.new(val[2], Expressions.new([val[0]]), nil, {:statement => true, :invert => true}) }
|
375
|
+
;
|
376
|
+
|
377
|
+
end
|
378
|
+
|
379
|
+
---- header
|
380
|
+
module CoffeeScript
|
381
|
+
|
382
|
+
---- inner
|
383
|
+
# Lex and parse a CoffeeScript.
|
384
|
+
def parse(code)
|
385
|
+
# Uncomment the following line to enable grammar debugging, in combination
|
386
|
+
# with the -g flag in the Rake build task.
|
387
|
+
# @yydebug = true
|
388
|
+
@tokens = Lexer.new.tokenize(code)
|
389
|
+
do_parse
|
390
|
+
end
|
391
|
+
|
392
|
+
# Retrieve the next token from the list.
|
393
|
+
def next_token
|
394
|
+
@tokens.shift
|
395
|
+
end
|
396
|
+
|
397
|
+
# Raise a custom error class that knows about line numbers.
|
398
|
+
def on_error(error_token_id, error_value, value_stack)
|
399
|
+
raise ParseError.new(token_to_str(error_token_id), error_value, value_stack)
|
400
|
+
end
|
401
|
+
|
402
|
+
---- footer
|
403
|
+
end
|
@@ -0,0 +1,187 @@
|
|
1
|
+
module CoffeeScript
|
2
|
+
|
3
|
+
# The lexer reads a stream of CoffeeScript and divvys it up into tagged
|
4
|
+
# tokens. A minor bit of the ambiguity in the grammar has been avoided by
|
5
|
+
# pushing some extra smarts into the Lexer.
|
6
|
+
class Lexer
|
7
|
+
|
8
|
+
# The list of keywords passed verbatim to the parser.
|
9
|
+
KEYWORDS = ["if", "else", "then", "unless",
|
10
|
+
"true", "false", "yes", "no", "on", "off",
|
11
|
+
"and", "or", "is", "aint", "not",
|
12
|
+
"new", "return",
|
13
|
+
"try", "catch", "finally", "throw",
|
14
|
+
"break", "continue",
|
15
|
+
"for", "in", "while",
|
16
|
+
"switch", "when",
|
17
|
+
"super",
|
18
|
+
"delete"]
|
19
|
+
|
20
|
+
# Token matching regexes.
|
21
|
+
IDENTIFIER = /\A([a-zA-Z$_]\w*)/
|
22
|
+
NUMBER = /\A\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?))\b/i
|
23
|
+
STRING = /\A(""|''|"(.*?)[^\\]"|'(.*?)[^\\]')/m
|
24
|
+
JS = /\A(``|`(.*?)[^\\]`)/m
|
25
|
+
OPERATOR = /\A([+\*&|\/\-%=<>:!]+)/
|
26
|
+
WHITESPACE = /\A([ \t\r]+)/
|
27
|
+
NEWLINE = /\A(\n+)/
|
28
|
+
COMMENT = /\A((#[^\n]*\s*)+)/m
|
29
|
+
CODE = /\A(=>)/
|
30
|
+
REGEX = /\A(\/(.*?)[^\\]\/[imgy]{0,4})/
|
31
|
+
|
32
|
+
# Token cleaning regexes.
|
33
|
+
JS_CLEANER = /(\A`|`\Z)/
|
34
|
+
MULTILINER = /\n/
|
35
|
+
COMMENT_CLEANER = /(^\s*#|\n\s*$)/
|
36
|
+
|
37
|
+
# Tokens that always constitute the start of an expression.
|
38
|
+
EXP_START = ['{', '(', '[']
|
39
|
+
|
40
|
+
# Tokens that always constitute the end of an expression.
|
41
|
+
EXP_END = ['}', ')', ']']
|
42
|
+
|
43
|
+
# Scan by attempting to match tokens one character at a time. Slow and steady.
|
44
|
+
def tokenize(code)
|
45
|
+
@code = code.chomp # Cleanup code by remove extra line breaks
|
46
|
+
@i = 0 # Current character position we're parsing
|
47
|
+
@line = 1 # The current line.
|
48
|
+
@tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
|
49
|
+
while @i < @code.length
|
50
|
+
@chunk = @code[@i..-1]
|
51
|
+
extract_next_token
|
52
|
+
end
|
53
|
+
@tokens
|
54
|
+
end
|
55
|
+
|
56
|
+
# At every position, run this list of match attempts, short-circuiting if
|
57
|
+
# any of them succeed.
|
58
|
+
def extract_next_token
|
59
|
+
return if identifier_token
|
60
|
+
return if number_token
|
61
|
+
return if string_token
|
62
|
+
return if js_token
|
63
|
+
return if regex_token
|
64
|
+
return if comment_token
|
65
|
+
return if whitespace_token
|
66
|
+
return literal_token
|
67
|
+
end
|
68
|
+
|
69
|
+
# Matches identifying literals: variables, keywords, method names, etc.
|
70
|
+
def identifier_token
|
71
|
+
return false unless identifier = @chunk[IDENTIFIER, 1]
|
72
|
+
# Keywords are special identifiers tagged with their own name, 'if' will result
|
73
|
+
# in an [:IF, "if"] token
|
74
|
+
tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
|
75
|
+
@tokens[-1][0] = :PROPERTY_ACCESS if tag == :IDENTIFIER && last_value == '.'
|
76
|
+
token(tag, identifier)
|
77
|
+
@i += identifier.length
|
78
|
+
end
|
79
|
+
|
80
|
+
# Matches numbers, including decimals, hex, and exponential notation.
|
81
|
+
def number_token
|
82
|
+
return false unless number = @chunk[NUMBER, 1]
|
83
|
+
token(:NUMBER, number)
|
84
|
+
@i += number.length
|
85
|
+
end
|
86
|
+
|
87
|
+
# Matches strings, including multi-line strings.
|
88
|
+
def string_token
|
89
|
+
return false unless string = @chunk[STRING, 1]
|
90
|
+
escaped = string.gsub(MULTILINER) do |match|
|
91
|
+
@line += 1
|
92
|
+
" \\\n"
|
93
|
+
end
|
94
|
+
token(:STRING, escaped)
|
95
|
+
@i += string.length
|
96
|
+
end
|
97
|
+
|
98
|
+
# Matches interpolated JavaScript.
|
99
|
+
def js_token
|
100
|
+
return false unless script = @chunk[JS, 1]
|
101
|
+
token(:JS, script.gsub(JS_CLEANER, ''))
|
102
|
+
@i += script.length
|
103
|
+
end
|
104
|
+
|
105
|
+
# Matches regular expression literals.
|
106
|
+
def regex_token
|
107
|
+
return false unless regex = @chunk[REGEX, 1]
|
108
|
+
token(:REGEX, regex)
|
109
|
+
@i += regex.length
|
110
|
+
end
|
111
|
+
|
112
|
+
# Matches and consumes comments.
|
113
|
+
def comment_token
|
114
|
+
return false unless comment = @chunk[COMMENT, 1]
|
115
|
+
@line += comment.scan(MULTILINER).length
|
116
|
+
token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER))
|
117
|
+
token("\n", "\n")
|
118
|
+
@i += comment.length
|
119
|
+
end
|
120
|
+
|
121
|
+
# Matches and consumes non-meaningful whitespace.
|
122
|
+
def whitespace_token
|
123
|
+
return false unless whitespace = @chunk[WHITESPACE, 1]
|
124
|
+
@i += whitespace.length
|
125
|
+
end
|
126
|
+
|
127
|
+
# We treat all other single characters as a token. Eg.: ( ) , . !
|
128
|
+
# Multi-character operators are also literal tokens, so that Racc can assign
|
129
|
+
# the proper order of operations. Multiple newlines get merged together.
|
130
|
+
def literal_token
|
131
|
+
value = @chunk[NEWLINE, 1]
|
132
|
+
if value
|
133
|
+
@line += value.length
|
134
|
+
token("\n", "\n") unless last_value == "\n"
|
135
|
+
return @i += value.length
|
136
|
+
end
|
137
|
+
value = @chunk[OPERATOR, 1]
|
138
|
+
tag_parameters if value && value.match(CODE)
|
139
|
+
value ||= @chunk[0,1]
|
140
|
+
skip_following_newlines if EXP_START.include?(value)
|
141
|
+
remove_leading_newlines if EXP_END.include?(value)
|
142
|
+
token(value, value)
|
143
|
+
@i += value.length
|
144
|
+
end
|
145
|
+
|
146
|
+
# Add a token to the results, taking note of the line number, and
|
147
|
+
# immediately-preceding comment.
|
148
|
+
def token(tag, value)
|
149
|
+
@tokens << [tag, Value.new(value, @line)]
|
150
|
+
end
|
151
|
+
|
152
|
+
# Peek at the previous token.
|
153
|
+
def last_value
|
154
|
+
@tokens.last && @tokens.last[1]
|
155
|
+
end
|
156
|
+
|
157
|
+
# A source of ambiguity in our grammar was parameter lists in function
|
158
|
+
# definitions (as opposed to argument lists in function calls). Tag
|
159
|
+
# parameter identifiers in order to avoid this.
|
160
|
+
def tag_parameters
|
161
|
+
index = 0
|
162
|
+
loop do
|
163
|
+
tok = @tokens[index -= 1]
|
164
|
+
return if !tok
|
165
|
+
next if tok[0] == ','
|
166
|
+
return if tok[0] != :IDENTIFIER
|
167
|
+
tok[0] = :PARAM
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# Consume and ignore newlines immediately after this point.
|
172
|
+
def skip_following_newlines
|
173
|
+
newlines = @code[(@i+1)..-1][NEWLINE, 1]
|
174
|
+
if newlines
|
175
|
+
@line += newlines.length
|
176
|
+
@i += newlines.length
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Discard newlines immediately before this point.
|
181
|
+
def remove_leading_newlines
|
182
|
+
@tokens.pop if last_value == "\n"
|
183
|
+
end
|
184
|
+
|
185
|
+
end
|
186
|
+
|
187
|
+
end
|