coffee-script 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +22 -0
- data/README +38 -0
- data/bin/coffee-script +5 -0
- data/coffee-script.gemspec +21 -0
- data/examples/code.cs +173 -0
- data/examples/documents.cs +72 -0
- data/examples/poignant.cs +153 -0
- data/examples/syntax_errors.cs +20 -0
- data/examples/underscore.cs +597 -0
- data/lib/coffee-script.rb +20 -0
- data/lib/coffee_script/CoffeeScript.tmbundle/Preferences/CoffeeScript.tmPreferences +24 -0
- data/lib/coffee_script/CoffeeScript.tmbundle/Syntaxes/CoffeeScript.tmLanguage +329 -0
- data/lib/coffee_script/CoffeeScript.tmbundle/info.plist +10 -0
- data/lib/coffee_script/command_line.rb +183 -0
- data/lib/coffee_script/grammar.y +403 -0
- data/lib/coffee_script/lexer.rb +187 -0
- data/lib/coffee_script/nodes.rb +680 -0
- data/lib/coffee_script/parse_error.rb +22 -0
- data/lib/coffee_script/parser.rb +1987 -0
- data/lib/coffee_script/scope.rb +45 -0
- data/lib/coffee_script/value.rb +42 -0
- metadata +75 -0
@@ -0,0 +1,403 @@
|
|
1
|
+
class Parser
|
2
|
+
|
3
|
+
# Declare tokens produced by the lexer
|
4
|
+
token IF ELSE THEN UNLESS
|
5
|
+
token NUMBER STRING REGEX
|
6
|
+
token TRUE FALSE YES NO ON OFF
|
7
|
+
token IDENTIFIER PROPERTY_ACCESS
|
8
|
+
token CODE PARAM NEW RETURN
|
9
|
+
token TRY CATCH FINALLY THROW
|
10
|
+
token BREAK CONTINUE
|
11
|
+
token FOR IN WHILE
|
12
|
+
token SWITCH WHEN
|
13
|
+
token SUPER
|
14
|
+
token DELETE
|
15
|
+
token NEWLINE
|
16
|
+
token COMMENT
|
17
|
+
token JS
|
18
|
+
|
19
|
+
# Declare order of operations.
|
20
|
+
prechigh
|
21
|
+
nonassoc UMINUS NOT '!' '!!' '~' '++' '--'
|
22
|
+
left '*' '/' '%'
|
23
|
+
left '+' '-'
|
24
|
+
left '<<' '>>' '>>>'
|
25
|
+
left '&' '|' '^'
|
26
|
+
left '<=' '<' '>' '>='
|
27
|
+
right '==' '!=' IS AINT
|
28
|
+
left '&&' '||' AND OR
|
29
|
+
right '-=' '+=' '/=' '*='
|
30
|
+
right DELETE
|
31
|
+
left "."
|
32
|
+
right THROW FOR IN WHILE NEW
|
33
|
+
left UNLESS IF ELSE
|
34
|
+
left ":" '||:' '&&:'
|
35
|
+
right RETURN
|
36
|
+
preclow
|
37
|
+
|
38
|
+
# We expect 4 shift/reduce errors for optional syntax.
|
39
|
+
# There used to be 252 -- greatly improved.
|
40
|
+
expect 4
|
41
|
+
|
42
|
+
rule
|
43
|
+
|
44
|
+
# All parsing will end in this rule, being the trunk of the AST.
|
45
|
+
Root:
|
46
|
+
/* nothing */ { result = Expressions.new([]) }
|
47
|
+
| Terminator { result = Expressions.new([]) }
|
48
|
+
| Expressions { result = val[0] }
|
49
|
+
;
|
50
|
+
|
51
|
+
# Any list of expressions or method body, seperated by line breaks or semis.
|
52
|
+
Expressions:
|
53
|
+
Expression { result = Expressions.new(val) }
|
54
|
+
| Expressions Terminator Expression { result = val[0] << val[2] }
|
55
|
+
| Expressions Terminator { result = val[0] }
|
56
|
+
| Terminator Expressions { result = val[1] }
|
57
|
+
;
|
58
|
+
|
59
|
+
# All types of expressions in our language.
|
60
|
+
Expression:
|
61
|
+
PureExpression
|
62
|
+
| Statement
|
63
|
+
;
|
64
|
+
|
65
|
+
# The parts that are natural JavaScript expressions.
|
66
|
+
PureExpression:
|
67
|
+
Literal
|
68
|
+
| Value
|
69
|
+
| Call
|
70
|
+
| Code
|
71
|
+
| Operation
|
72
|
+
;
|
73
|
+
|
74
|
+
# We have to take extra care to convert these statements into expressions.
|
75
|
+
Statement:
|
76
|
+
Assign
|
77
|
+
| If
|
78
|
+
| Try
|
79
|
+
| Throw
|
80
|
+
| Return
|
81
|
+
| While
|
82
|
+
| For
|
83
|
+
| Switch
|
84
|
+
| Comment
|
85
|
+
;
|
86
|
+
|
87
|
+
# All tokens that can terminate an expression.
|
88
|
+
Terminator:
|
89
|
+
"\n"
|
90
|
+
| ";"
|
91
|
+
;
|
92
|
+
|
93
|
+
# All tokens that can serve to begin the second block of a multi-part expression.
|
94
|
+
Then:
|
95
|
+
THEN
|
96
|
+
| Terminator
|
97
|
+
;
|
98
|
+
|
99
|
+
# All hard-coded values.
|
100
|
+
Literal:
|
101
|
+
NUMBER { result = LiteralNode.new(val[0]) }
|
102
|
+
| STRING { result = LiteralNode.new(val[0]) }
|
103
|
+
| JS { result = LiteralNode.new(val[0]) }
|
104
|
+
| REGEX { result = LiteralNode.new(val[0]) }
|
105
|
+
| BREAK { result = LiteralNode.new(val[0]) }
|
106
|
+
| CONTINUE { result = LiteralNode.new(val[0]) }
|
107
|
+
| TRUE { result = LiteralNode.new(true) }
|
108
|
+
| FALSE { result = LiteralNode.new(false) }
|
109
|
+
| YES { result = LiteralNode.new(true) }
|
110
|
+
| NO { result = LiteralNode.new(false) }
|
111
|
+
| ON { result = LiteralNode.new(true) }
|
112
|
+
| OFF { result = LiteralNode.new(false) }
|
113
|
+
;
|
114
|
+
|
115
|
+
# Assignment to a variable.
|
116
|
+
Assign:
|
117
|
+
Value ":" Expression { result = AssignNode.new(val[0], val[2]) }
|
118
|
+
;
|
119
|
+
|
120
|
+
# Assignment within an object literal.
|
121
|
+
AssignObj:
|
122
|
+
IDENTIFIER ":" Expression { result = AssignNode.new(val[0], val[2], :object) }
|
123
|
+
| Comment { result = val[0] }
|
124
|
+
;
|
125
|
+
|
126
|
+
# A return statement.
|
127
|
+
Return:
|
128
|
+
RETURN Expression { result = ReturnNode.new(val[1]) }
|
129
|
+
;
|
130
|
+
|
131
|
+
# A comment.
|
132
|
+
Comment:
|
133
|
+
COMMENT { result = CommentNode.new(val[0]) }
|
134
|
+
;
|
135
|
+
|
136
|
+
# Arithmetic and logical operators
|
137
|
+
# For Ruby's Operator precedence, see:
|
138
|
+
# https://www.cs.auckland.ac.nz/references/ruby/ProgrammingRuby/language.html
|
139
|
+
Operation:
|
140
|
+
'!' Expression { result = OpNode.new(val[0], val[1]) }
|
141
|
+
| '!!' Expression { result = OpNode.new(val[0], val[1]) }
|
142
|
+
| '-' Expression = UMINUS { result = OpNode.new(val[0], val[1]) }
|
143
|
+
| NOT Expression { result = OpNode.new(val[0], val[1]) }
|
144
|
+
| '~' Expression { result = OpNode.new(val[0], val[1]) }
|
145
|
+
| '--' Expression { result = OpNode.new(val[0], val[1]) }
|
146
|
+
| '++' Expression { result = OpNode.new(val[0], val[1]) }
|
147
|
+
| Expression '--' { result = OpNode.new(val[1], val[0], nil, true) }
|
148
|
+
| Expression '++' { result = OpNode.new(val[1], val[0], nil, true) }
|
149
|
+
|
150
|
+
| Expression '*' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
151
|
+
| Expression '/' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
152
|
+
| Expression '%' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
153
|
+
|
154
|
+
| Expression '+' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
155
|
+
| Expression '-' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
156
|
+
|
157
|
+
| Expression '<<' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
158
|
+
| Expression '>>' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
159
|
+
| Expression '>>>' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
160
|
+
|
161
|
+
| Expression '&' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
162
|
+
| Expression '|' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
163
|
+
| Expression '^' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
164
|
+
|
165
|
+
| Expression '<=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
166
|
+
| Expression '<' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
167
|
+
| Expression '>' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
168
|
+
| Expression '>=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
169
|
+
|
170
|
+
| Expression '==' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
171
|
+
| Expression '!=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
172
|
+
| Expression IS Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
173
|
+
| Expression AINT Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
174
|
+
|
175
|
+
| Expression '&&' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
176
|
+
| Expression '||' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
177
|
+
| Expression AND Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
178
|
+
| Expression OR Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
179
|
+
|
180
|
+
| Expression '-=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
181
|
+
| Expression '+=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
182
|
+
| Expression '/=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
183
|
+
| Expression '*=' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
184
|
+
| Expression '||:' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
185
|
+
| Expression '&&:' Expression { result = OpNode.new(val[1], val[0], val[2]) }
|
186
|
+
|
187
|
+
| DELETE Expression { result = OpNode.new(val[0], val[1]) }
|
188
|
+
;
|
189
|
+
|
190
|
+
# Function definition.
|
191
|
+
Code:
|
192
|
+
ParamList "=>" CodeBody "." { result = CodeNode.new(val[0], val[2]) }
|
193
|
+
| "=>" CodeBody "." { result = CodeNode.new([], val[1]) }
|
194
|
+
;
|
195
|
+
|
196
|
+
# The body of a function.
|
197
|
+
CodeBody:
|
198
|
+
/* nothing */ { result = Expressions.new([]) }
|
199
|
+
| Expressions { result = val[0] }
|
200
|
+
;
|
201
|
+
|
202
|
+
# The parameters to a function definition.
|
203
|
+
ParamList:
|
204
|
+
PARAM { result = val }
|
205
|
+
| ParamList "," PARAM { result = val[0] << val[2] }
|
206
|
+
;
|
207
|
+
|
208
|
+
# Expressions that can be treated as values.
|
209
|
+
Value:
|
210
|
+
IDENTIFIER { result = ValueNode.new(val[0]) }
|
211
|
+
| Array { result = ValueNode.new(val[0]) }
|
212
|
+
| Object { result = ValueNode.new(val[0]) }
|
213
|
+
| Parenthetical { result = ValueNode.new(val[0]) }
|
214
|
+
| Value Accessor { result = val[0] << val[1] }
|
215
|
+
| Invocation Accessor { result = ValueNode.new(val[0], [val[1]]) }
|
216
|
+
;
|
217
|
+
|
218
|
+
# Accessing into an object or array, through dot or index notation.
|
219
|
+
Accessor:
|
220
|
+
PROPERTY_ACCESS IDENTIFIER { result = AccessorNode.new(val[1]) }
|
221
|
+
| Index { result = val[0] }
|
222
|
+
| Slice { result = val[0] }
|
223
|
+
;
|
224
|
+
|
225
|
+
# Indexing into an object or array.
|
226
|
+
Index:
|
227
|
+
"[" Expression "]" { result = IndexNode.new(val[1]) }
|
228
|
+
;
|
229
|
+
|
230
|
+
# Array slice literal.
|
231
|
+
Slice:
|
232
|
+
"[" Expression "," Expression "]" { result = SliceNode.new(val[1], val[3]) }
|
233
|
+
;
|
234
|
+
|
235
|
+
# An object literal.
|
236
|
+
Object:
|
237
|
+
"{" AssignList "}" { result = ObjectNode.new(val[1]) }
|
238
|
+
;
|
239
|
+
|
240
|
+
# Assignment within an object literal (comma or newline separated).
|
241
|
+
AssignList:
|
242
|
+
/* nothing */ { result = []}
|
243
|
+
| AssignObj { result = val }
|
244
|
+
| AssignList "," AssignObj { result = val[0] << val[2] }
|
245
|
+
| AssignList Terminator AssignObj { result = val[0] << val[2] }
|
246
|
+
;
|
247
|
+
|
248
|
+
# All flavors of function call (instantiation, super, and regular).
|
249
|
+
Call:
|
250
|
+
Invocation { result = val[0] }
|
251
|
+
| NEW Invocation { result = val[1].new_instance }
|
252
|
+
| Super { result = val[0] }
|
253
|
+
;
|
254
|
+
|
255
|
+
# A generic function invocation.
|
256
|
+
Invocation:
|
257
|
+
Value "(" ArgList ")" { result = CallNode.new(val[0], val[2]) }
|
258
|
+
;
|
259
|
+
|
260
|
+
# Calling super.
|
261
|
+
Super:
|
262
|
+
SUPER "(" ArgList ")" { result = CallNode.new(:super, val[2]) }
|
263
|
+
;
|
264
|
+
|
265
|
+
# The array literal.
|
266
|
+
Array:
|
267
|
+
"[" ArgList "]" { result = ArrayNode.new(val[1]) }
|
268
|
+
;
|
269
|
+
|
270
|
+
# A list of arguments to a method call, or as the contents of an array.
|
271
|
+
ArgList:
|
272
|
+
/* nothing */ { result = [] }
|
273
|
+
| Expression { result = val }
|
274
|
+
| ArgList "," Expression { result = val[0] << val[2] }
|
275
|
+
| ArgList Terminator Expression { result = val[0] << val[2] }
|
276
|
+
;
|
277
|
+
|
278
|
+
# Try/catch/finally exception handling blocks.
|
279
|
+
Try:
|
280
|
+
TRY Expressions Catch "." { result = TryNode.new(val[1], val[2][0], val[2][1]) }
|
281
|
+
| TRY Expressions Catch
|
282
|
+
FINALLY Expressions "." { result = TryNode.new(val[1], val[2][0], val[2][1], val[4]) }
|
283
|
+
;
|
284
|
+
|
285
|
+
# A catch clause.
|
286
|
+
Catch:
|
287
|
+
/* nothing */ { result = [nil, nil] }
|
288
|
+
| CATCH IDENTIFIER Expressions { result = [val[1], val[2]] }
|
289
|
+
;
|
290
|
+
|
291
|
+
# Throw an exception.
|
292
|
+
Throw:
|
293
|
+
THROW Expression { result = ThrowNode.new(val[1]) }
|
294
|
+
;
|
295
|
+
|
296
|
+
# Parenthetical expressions.
|
297
|
+
Parenthetical:
|
298
|
+
"(" Expressions ")" { result = ParentheticalNode.new(val[1]) }
|
299
|
+
;
|
300
|
+
|
301
|
+
# The while loop. (there is no do..while).
|
302
|
+
While:
|
303
|
+
WHILE Expression Then
|
304
|
+
Expressions "." { result = WhileNode.new(val[1], val[3]) }
|
305
|
+
;
|
306
|
+
|
307
|
+
# Array comprehensions, including guard and current index.
|
308
|
+
For:
|
309
|
+
Expression FOR IDENTIFIER
|
310
|
+
IN PureExpression "." { result = ForNode.new(val[0], val[4], val[2], nil) }
|
311
|
+
| Expression FOR
|
312
|
+
IDENTIFIER "," IDENTIFIER
|
313
|
+
IN PureExpression "." { result = ForNode.new(val[0], val[6], val[2], nil, val[4]) }
|
314
|
+
| Expression FOR IDENTIFIER
|
315
|
+
IN PureExpression
|
316
|
+
IF Expression "." { result = ForNode.new(val[0], val[4], val[2], val[6]) }
|
317
|
+
| Expression FOR
|
318
|
+
IDENTIFIER "," IDENTIFIER
|
319
|
+
IN PureExpression
|
320
|
+
IF Expression "." { result = ForNode.new(val[0], val[6], val[2], val[8], val[4]) }
|
321
|
+
;
|
322
|
+
|
323
|
+
# Switch/When blocks.
|
324
|
+
Switch:
|
325
|
+
SWITCH Expression Then
|
326
|
+
Whens "." { result = val[3].rewrite_condition(val[1]) }
|
327
|
+
| SWITCH Expression Then
|
328
|
+
Whens ELSE Expressions "." { result = val[3].rewrite_condition(val[1]).add_else(val[5]) }
|
329
|
+
;
|
330
|
+
|
331
|
+
# The inner list of whens.
|
332
|
+
Whens:
|
333
|
+
When { result = val[0] }
|
334
|
+
| Whens When { result = val[0] << val[1] }
|
335
|
+
;
|
336
|
+
|
337
|
+
# An individual when.
|
338
|
+
When:
|
339
|
+
WHEN Expression Then Expressions { result = IfNode.new(val[1], val[3]) }
|
340
|
+
;
|
341
|
+
|
342
|
+
# All of the following nutso if-else destructuring is to make the
|
343
|
+
# grammar expand unambiguously.
|
344
|
+
|
345
|
+
# An elsif portion of an if-else block.
|
346
|
+
ElsIf:
|
347
|
+
ELSE IF Expression
|
348
|
+
Then Expressions { result = IfNode.new(val[2], val[4]) }
|
349
|
+
;
|
350
|
+
|
351
|
+
# Multiple elsifs can be chained together.
|
352
|
+
ElsIfs:
|
353
|
+
ElsIf { result = val[0] }
|
354
|
+
| ElsIfs ElsIf { result = val[0].add_else(val[1]) }
|
355
|
+
;
|
356
|
+
|
357
|
+
# Terminating else bodies are strictly optional.
|
358
|
+
ElseBody
|
359
|
+
"." { result = nil }
|
360
|
+
| ELSE Expressions "." { result = val[1] }
|
361
|
+
;
|
362
|
+
|
363
|
+
# All the alternatives for ending an if-else block.
|
364
|
+
IfEnd:
|
365
|
+
ElseBody { result = val[0] }
|
366
|
+
| ElsIfs ElseBody { result = val[0].add_else(val[1]) }
|
367
|
+
;
|
368
|
+
|
369
|
+
# The full complement of if blocks, including postfix one-liner ifs and unlesses.
|
370
|
+
If:
|
371
|
+
IF Expression
|
372
|
+
Then Expressions IfEnd { result = IfNode.new(val[1], val[3], val[4]) }
|
373
|
+
| Expression IF Expression { result = IfNode.new(val[2], Expressions.new([val[0]]), nil, {:statement => true}) }
|
374
|
+
| Expression UNLESS Expression { result = IfNode.new(val[2], Expressions.new([val[0]]), nil, {:statement => true, :invert => true}) }
|
375
|
+
;
|
376
|
+
|
377
|
+
end
|
378
|
+
|
379
|
+
---- header
|
380
|
+
module CoffeeScript
|
381
|
+
|
382
|
+
---- inner
|
383
|
+
# Lex and parse a CoffeeScript.
|
384
|
+
def parse(code)
|
385
|
+
# Uncomment the following line to enable grammar debugging, in combination
|
386
|
+
# with the -g flag in the Rake build task.
|
387
|
+
# @yydebug = true
|
388
|
+
@tokens = Lexer.new.tokenize(code)
|
389
|
+
do_parse
|
390
|
+
end
|
391
|
+
|
392
|
+
# Retrieve the next token from the list.
|
393
|
+
def next_token
|
394
|
+
@tokens.shift
|
395
|
+
end
|
396
|
+
|
397
|
+
# Raise a custom error class that knows about line numbers.
|
398
|
+
def on_error(error_token_id, error_value, value_stack)
|
399
|
+
raise ParseError.new(token_to_str(error_token_id), error_value, value_stack)
|
400
|
+
end
|
401
|
+
|
402
|
+
---- footer
|
403
|
+
end
|
@@ -0,0 +1,187 @@
|
|
1
|
+
module CoffeeScript
|
2
|
+
|
3
|
+
# The lexer reads a stream of CoffeeScript and divvys it up into tagged
|
4
|
+
# tokens. A minor bit of the ambiguity in the grammar has been avoided by
|
5
|
+
# pushing some extra smarts into the Lexer.
|
6
|
+
class Lexer
|
7
|
+
|
8
|
+
# The list of keywords passed verbatim to the parser.
|
9
|
+
KEYWORDS = ["if", "else", "then", "unless",
|
10
|
+
"true", "false", "yes", "no", "on", "off",
|
11
|
+
"and", "or", "is", "aint", "not",
|
12
|
+
"new", "return",
|
13
|
+
"try", "catch", "finally", "throw",
|
14
|
+
"break", "continue",
|
15
|
+
"for", "in", "while",
|
16
|
+
"switch", "when",
|
17
|
+
"super",
|
18
|
+
"delete"]
|
19
|
+
|
20
|
+
# Token matching regexes.
|
21
|
+
IDENTIFIER = /\A([a-zA-Z$_]\w*)/
|
22
|
+
NUMBER = /\A\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?))\b/i
|
23
|
+
STRING = /\A(""|''|"(.*?)[^\\]"|'(.*?)[^\\]')/m
|
24
|
+
JS = /\A(``|`(.*?)[^\\]`)/m
|
25
|
+
OPERATOR = /\A([+\*&|\/\-%=<>:!]+)/
|
26
|
+
WHITESPACE = /\A([ \t\r]+)/
|
27
|
+
NEWLINE = /\A(\n+)/
|
28
|
+
COMMENT = /\A((#[^\n]*\s*)+)/m
|
29
|
+
CODE = /\A(=>)/
|
30
|
+
REGEX = /\A(\/(.*?)[^\\]\/[imgy]{0,4})/
|
31
|
+
|
32
|
+
# Token cleaning regexes.
|
33
|
+
JS_CLEANER = /(\A`|`\Z)/
|
34
|
+
MULTILINER = /\n/
|
35
|
+
COMMENT_CLEANER = /(^\s*#|\n\s*$)/
|
36
|
+
|
37
|
+
# Tokens that always constitute the start of an expression.
|
38
|
+
EXP_START = ['{', '(', '[']
|
39
|
+
|
40
|
+
# Tokens that always constitute the end of an expression.
|
41
|
+
EXP_END = ['}', ')', ']']
|
42
|
+
|
43
|
+
# Scan by attempting to match tokens one character at a time. Slow and steady.
|
44
|
+
def tokenize(code)
|
45
|
+
@code = code.chomp # Cleanup code by remove extra line breaks
|
46
|
+
@i = 0 # Current character position we're parsing
|
47
|
+
@line = 1 # The current line.
|
48
|
+
@tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
|
49
|
+
while @i < @code.length
|
50
|
+
@chunk = @code[@i..-1]
|
51
|
+
extract_next_token
|
52
|
+
end
|
53
|
+
@tokens
|
54
|
+
end
|
55
|
+
|
56
|
+
# At every position, run this list of match attempts, short-circuiting if
|
57
|
+
# any of them succeed.
|
58
|
+
def extract_next_token
|
59
|
+
return if identifier_token
|
60
|
+
return if number_token
|
61
|
+
return if string_token
|
62
|
+
return if js_token
|
63
|
+
return if regex_token
|
64
|
+
return if comment_token
|
65
|
+
return if whitespace_token
|
66
|
+
return literal_token
|
67
|
+
end
|
68
|
+
|
69
|
+
# Matches identifying literals: variables, keywords, method names, etc.
|
70
|
+
def identifier_token
|
71
|
+
return false unless identifier = @chunk[IDENTIFIER, 1]
|
72
|
+
# Keywords are special identifiers tagged with their own name, 'if' will result
|
73
|
+
# in an [:IF, "if"] token
|
74
|
+
tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
|
75
|
+
@tokens[-1][0] = :PROPERTY_ACCESS if tag == :IDENTIFIER && last_value == '.'
|
76
|
+
token(tag, identifier)
|
77
|
+
@i += identifier.length
|
78
|
+
end
|
79
|
+
|
80
|
+
# Matches numbers, including decimals, hex, and exponential notation.
|
81
|
+
def number_token
|
82
|
+
return false unless number = @chunk[NUMBER, 1]
|
83
|
+
token(:NUMBER, number)
|
84
|
+
@i += number.length
|
85
|
+
end
|
86
|
+
|
87
|
+
# Matches strings, including multi-line strings.
|
88
|
+
def string_token
|
89
|
+
return false unless string = @chunk[STRING, 1]
|
90
|
+
escaped = string.gsub(MULTILINER) do |match|
|
91
|
+
@line += 1
|
92
|
+
" \\\n"
|
93
|
+
end
|
94
|
+
token(:STRING, escaped)
|
95
|
+
@i += string.length
|
96
|
+
end
|
97
|
+
|
98
|
+
# Matches interpolated JavaScript.
|
99
|
+
def js_token
|
100
|
+
return false unless script = @chunk[JS, 1]
|
101
|
+
token(:JS, script.gsub(JS_CLEANER, ''))
|
102
|
+
@i += script.length
|
103
|
+
end
|
104
|
+
|
105
|
+
# Matches regular expression literals.
|
106
|
+
def regex_token
|
107
|
+
return false unless regex = @chunk[REGEX, 1]
|
108
|
+
token(:REGEX, regex)
|
109
|
+
@i += regex.length
|
110
|
+
end
|
111
|
+
|
112
|
+
# Matches and consumes comments.
|
113
|
+
def comment_token
|
114
|
+
return false unless comment = @chunk[COMMENT, 1]
|
115
|
+
@line += comment.scan(MULTILINER).length
|
116
|
+
token(:COMMENT, comment.gsub(COMMENT_CLEANER, '').split(MULTILINER))
|
117
|
+
token("\n", "\n")
|
118
|
+
@i += comment.length
|
119
|
+
end
|
120
|
+
|
121
|
+
# Matches and consumes non-meaningful whitespace.
|
122
|
+
def whitespace_token
|
123
|
+
return false unless whitespace = @chunk[WHITESPACE, 1]
|
124
|
+
@i += whitespace.length
|
125
|
+
end
|
126
|
+
|
127
|
+
# We treat all other single characters as a token. Eg.: ( ) , . !
|
128
|
+
# Multi-character operators are also literal tokens, so that Racc can assign
|
129
|
+
# the proper order of operations. Multiple newlines get merged together.
|
130
|
+
def literal_token
|
131
|
+
value = @chunk[NEWLINE, 1]
|
132
|
+
if value
|
133
|
+
@line += value.length
|
134
|
+
token("\n", "\n") unless last_value == "\n"
|
135
|
+
return @i += value.length
|
136
|
+
end
|
137
|
+
value = @chunk[OPERATOR, 1]
|
138
|
+
tag_parameters if value && value.match(CODE)
|
139
|
+
value ||= @chunk[0,1]
|
140
|
+
skip_following_newlines if EXP_START.include?(value)
|
141
|
+
remove_leading_newlines if EXP_END.include?(value)
|
142
|
+
token(value, value)
|
143
|
+
@i += value.length
|
144
|
+
end
|
145
|
+
|
146
|
+
# Add a token to the results, taking note of the line number, and
|
147
|
+
# immediately-preceding comment.
|
148
|
+
def token(tag, value)
|
149
|
+
@tokens << [tag, Value.new(value, @line)]
|
150
|
+
end
|
151
|
+
|
152
|
+
# Peek at the previous token.
|
153
|
+
def last_value
|
154
|
+
@tokens.last && @tokens.last[1]
|
155
|
+
end
|
156
|
+
|
157
|
+
# A source of ambiguity in our grammar was parameter lists in function
|
158
|
+
# definitions (as opposed to argument lists in function calls). Tag
|
159
|
+
# parameter identifiers in order to avoid this.
|
160
|
+
def tag_parameters
|
161
|
+
index = 0
|
162
|
+
loop do
|
163
|
+
tok = @tokens[index -= 1]
|
164
|
+
return if !tok
|
165
|
+
next if tok[0] == ','
|
166
|
+
return if tok[0] != :IDENTIFIER
|
167
|
+
tok[0] = :PARAM
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# Consume and ignore newlines immediately after this point.
|
172
|
+
def skip_following_newlines
|
173
|
+
newlines = @code[(@i+1)..-1][NEWLINE, 1]
|
174
|
+
if newlines
|
175
|
+
@line += newlines.length
|
176
|
+
@i += newlines.length
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Discard newlines immediately before this point.
|
181
|
+
def remove_leading_newlines
|
182
|
+
@tokens.pop if last_value == "\n"
|
183
|
+
end
|
184
|
+
|
185
|
+
end
|
186
|
+
|
187
|
+
end
|