antlr3 1.8.0 → 1.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. data/History.txt +35 -0
  2. data/Manifest.txt +73 -0
  3. data/README.txt +6 -13
  4. data/java/RubyTarget.java +43 -19
  5. data/java/antlr-full-3.2.1.jar +0 -0
  6. data/lib/antlr3/debug.rb +2 -0
  7. data/lib/antlr3/debug/event-hub.rb +55 -55
  8. data/lib/antlr3/debug/record-event-listener.rb +2 -2
  9. data/lib/antlr3/debug/rule-tracer.rb +14 -14
  10. data/lib/antlr3/debug/socket.rb +47 -47
  11. data/lib/antlr3/debug/trace-event-listener.rb +8 -8
  12. data/lib/antlr3/main.rb +29 -9
  13. data/lib/antlr3/modes/ast-builder.rb +7 -7
  14. data/lib/antlr3/modes/filter.rb +19 -17
  15. data/lib/antlr3/profile.rb +34 -6
  16. data/lib/antlr3/recognizers.rb +50 -1
  17. data/lib/antlr3/streams.rb +19 -15
  18. data/lib/antlr3/streams/rewrite.rb +241 -229
  19. data/lib/antlr3/template/group-file-lexer.rb +6 -8
  20. data/lib/antlr3/template/group-file-parser.rb +16 -16
  21. data/lib/antlr3/template/group-file.rb +1 -1
  22. data/lib/antlr3/test/call-stack.rb +13 -13
  23. data/lib/antlr3/test/core-extensions.rb +69 -69
  24. data/lib/antlr3/test/functional.rb +0 -4
  25. data/lib/antlr3/test/grammar.rb +70 -70
  26. data/lib/antlr3/token.rb +41 -17
  27. data/lib/antlr3/tree.rb +11 -14
  28. data/lib/antlr3/tree/debug.rb +53 -53
  29. data/lib/antlr3/tree/visitor.rb +11 -11
  30. data/lib/antlr3/tree/wizard.rb +35 -35
  31. data/lib/antlr3/util.rb +18 -0
  32. data/lib/antlr3/version.rb +1 -1
  33. data/rakefile +1 -0
  34. data/samples/ANTLRv3Grammar.g +3 -3
  35. data/samples/JavaScript.g +702 -0
  36. data/samples/standard/C/C.g +543 -0
  37. data/samples/standard/C/C.tokens +175 -0
  38. data/samples/standard/C/C__testrig.st +0 -0
  39. data/samples/standard/C/c.rb +12 -0
  40. data/samples/standard/C/input +3479 -0
  41. data/samples/standard/C/output +171 -0
  42. data/samples/standard/LL-star/LLStar.g +101 -0
  43. data/samples/standard/LL-star/input +12 -0
  44. data/samples/standard/LL-star/ll-star.rb +12 -0
  45. data/samples/standard/LL-star/output +2 -0
  46. data/samples/standard/calc/Calculator.g +47 -0
  47. data/samples/standard/calc/Calculator.py +16 -0
  48. data/samples/standard/calc/Calculator.rb +28 -0
  49. data/samples/standard/cminus/CMinus.g +141 -0
  50. data/samples/standard/cminus/bytecode.group +80 -0
  51. data/samples/standard/cminus/cminus.rb +16 -0
  52. data/samples/standard/cminus/input +9 -0
  53. data/samples/standard/cminus/java.group +91 -0
  54. data/samples/standard/cminus/output +11 -0
  55. data/samples/standard/cminus/python.group +48 -0
  56. data/samples/standard/dynamic-scope/DynamicScopes.g +50 -0
  57. data/samples/standard/dynamic-scope/dynamic-scopes.rb +12 -0
  58. data/samples/standard/dynamic-scope/input +7 -0
  59. data/samples/standard/dynamic-scope/output +4 -0
  60. data/samples/standard/fuzzy/FuzzyJava.g +89 -0
  61. data/samples/standard/fuzzy/fuzzy.py +11 -0
  62. data/samples/standard/fuzzy/fuzzy.rb +9 -0
  63. data/samples/standard/fuzzy/input +13 -0
  64. data/samples/standard/fuzzy/output +12 -0
  65. data/samples/standard/hoisted-predicates/HoistedPredicates.g +40 -0
  66. data/samples/standard/hoisted-predicates/hoisted-predicates.rb +13 -0
  67. data/samples/standard/hoisted-predicates/input +1 -0
  68. data/samples/standard/hoisted-predicates/output +1 -0
  69. data/samples/standard/island-grammar/Javadoc.g +46 -0
  70. data/samples/standard/island-grammar/Simple.g +104 -0
  71. data/samples/standard/island-grammar/input +11 -0
  72. data/samples/standard/island-grammar/island.rb +12 -0
  73. data/samples/standard/island-grammar/output +16 -0
  74. data/samples/standard/java/Java.g +827 -0
  75. data/samples/standard/java/input +80 -0
  76. data/samples/standard/java/java.rb +13 -0
  77. data/samples/standard/java/output +1 -0
  78. data/samples/standard/python/Python.g +718 -0
  79. data/samples/standard/python/PythonTokenSource.rb +107 -0
  80. data/samples/standard/python/input +210 -0
  81. data/samples/standard/python/output +24 -0
  82. data/samples/standard/python/python.rb +14 -0
  83. data/samples/standard/rakefile +18 -0
  84. data/samples/standard/scopes/SymbolTable.g +66 -0
  85. data/samples/standard/scopes/input +12 -0
  86. data/samples/standard/scopes/output +3 -0
  87. data/samples/standard/scopes/scopes.rb +12 -0
  88. data/samples/standard/simplecTreeParser/SimpleC.g +113 -0
  89. data/samples/standard/simplecTreeParser/SimpleCWalker.g +64 -0
  90. data/samples/standard/simplecTreeParser/input +12 -0
  91. data/samples/standard/simplecTreeParser/output +1 -0
  92. data/samples/standard/simplecTreeParser/simplec.rb +18 -0
  93. data/samples/standard/treeparser/Lang.g +24 -0
  94. data/samples/standard/treeparser/LangDumpDecl.g +17 -0
  95. data/samples/standard/treeparser/input +1 -0
  96. data/samples/standard/treeparser/output +2 -0
  97. data/samples/standard/treeparser/treeparser.rb +18 -0
  98. data/samples/standard/tweak/Tweak.g +68 -0
  99. data/samples/standard/tweak/input +9 -0
  100. data/samples/standard/tweak/output +16 -0
  101. data/samples/standard/tweak/tweak.rb +13 -0
  102. data/samples/standard/xml/README +16 -0
  103. data/samples/standard/xml/XML.g +123 -0
  104. data/samples/standard/xml/input +21 -0
  105. data/samples/standard/xml/output +39 -0
  106. data/samples/standard/xml/xml.rb +9 -0
  107. data/templates/Ruby.stg +4 -4
  108. data/test/functional/ast-output/auto-ast.rb +0 -5
  109. data/test/functional/ast-output/rewrites.rb +4 -4
  110. data/test/unit/test-scope.rb +45 -0
  111. metadata +96 -8
@@ -0,0 +1,80 @@
1
+ import java.io.*;
2
+ import org.antlr.runtime.*;
3
+
4
+ /** Parse a java file or directory of java files using the generated parser
5
+ * ANTLR builds from java.g
6
+ */
7
+ class Main {
8
+
9
+ static CommonTokenStream tokens = new CommonTokenStream();
10
+
11
+ public static void main(String[] args) {
12
+ try {
13
+ if (args.length > 0 ) {
14
+ // for each directory/file specified on the command line
15
+ for(int i=0; i< args.length;i++) {
16
+ doFile(new File(args[i])); // parse it
17
+ }
18
+ }
19
+ else {
20
+ System.err.println("Usage: java Main <directory or file name>");
21
+ }
22
+ }
23
+ catch(Exception e) {
24
+ System.err.println("exception: "+e);
25
+ e.printStackTrace(System.err); // so we can get stack trace
26
+ }
27
+ }
28
+
29
+
30
+ // This method decides what action to take based on the type of
31
+ // file we are looking at
32
+ public static void doFile(File f)
33
+ throws Exception {
34
+ // If this is a directory, walk each file/dir in that directory
35
+ if (f.isDirectory()) {
36
+ String files[] = f.list();
37
+ for(int i=0; i < files.length; i++)
38
+ doFile(new File(f, files[i]));
39
+ }
40
+
41
+ // otherwise, if this is a java file, parse it!
42
+ else if ((f.getName().length()>5) &&
43
+ f.getName().substring(f.getName().length()-5).equals(".java")) {
44
+ System.err.println(" "+f.getAbsolutePath());
45
+ // parseFile(f.getName(), new FileInputStream(f));
46
+ parseFile(f.getAbsolutePath());
47
+ }
48
+ }
49
+
50
+ // Here's where we do the real work...
51
+ public static void parseFile(String f)
52
+ throws Exception {
53
+ try {
54
+ // Create a scanner that reads from the input stream passed to us
55
+ JavaParserLexer lexer = new JavaParserLexer(new ANTLRFileStream(f));
56
+ //tokens.discardOffChannelTokens(true);
57
+ tokens.setTokenSource(lexer);
58
+ /*
59
+ long t1 = System.currentTimeMillis();
60
+ tokens.LT(1);
61
+ long t2 = System.currentTimeMillis();
62
+ System.out.println("lexing time: "+(t2-t1)+"ms");
63
+ */
64
+ //System.out.println(tokens);
65
+
66
+ // Create a parser that reads from the scanner
67
+ JavaParser parser = new JavaParser(tokens);
68
+
69
+ // start parsing at the compilationUnit rule
70
+ parser.compilationUnit();
71
+
72
+ }
73
+ catch (Exception e) {
74
+ System.err.println("parser exception: "+e);
75
+ e.printStackTrace(); // so we can get stack trace
76
+ }
77
+ }
78
+
79
+ }
80
+
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+ $:.unshift( File.dirname( __FILE__ ) )
4
+ require 'JavaLexer'
5
+ require 'JavaParser'
6
+
7
+ for file in ARGV
8
+ input = ANTLR3::FileStream.new( file )
9
+ lexer = Java::Lexer.new( input )
10
+ parser = Java::Parser.new( lexer )
11
+ parser.compilationUnit
12
+ puts( "finished parsing #{ file }" )
13
+ end
@@ -0,0 +1 @@
1
+ finished parsing OK
@@ -0,0 +1,718 @@
1
+ /*
2
+ [The 'BSD licence']
3
+ Copyright (c) 2004 Terence Parr and Loring Craymer
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions
8
+ are met:
9
+ 1. Redistributions of source code must retain the above copyright
10
+ notice, this list of conditions and the following disclaimer.
11
+ 2. Redistributions in binary form must reproduce the above copyright
12
+ notice, this list of conditions and the following disclaimer in the
13
+ documentation and/or other materials provided with the distribution.
14
+ 3. The name of the author may not be used to endorse or promote products
15
+ derived from this software without specific prior written permission.
16
+
17
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
+ */
28
+
29
+ /** Python 2.5 Grammar
30
+ *
31
+ * Terence Parr and Loring Craymer
32
+ * February 2004
33
+ *
34
+ * Converted to ANTLR v3 November 2005 by Terence Parr.
35
+ *
36
+ * Updated to Python 2.5 by Aaron Maxwell, 22 July 2008
37
+ *
38
+ * This grammar was derived automatically from the Python 2.3.3
39
+ * parser grammar to get a syntactically correct ANTLR grammar
40
+ * for Python. Then Terence hand tweaked it to be semantically
41
+ * correct; i.e., removed lookahead issues etc... It is LL(1)
42
+ * except for the (sometimes optional) trailing commas and semi-colons.
43
+ * It needs two symbols of lookahead in this case.
44
+ *
45
+ * Starting with Loring's preliminary lexer for Python, I modified it
46
+ * to do my version of the whole nasty INDENT/DEDENT issue just so I
47
+ * could understand the problem better. This grammar requires
48
+ * PythonTokenStream.java to work. Also I used some rules from the
49
+ * semi-formal grammar on the web for Python (automatically
50
+ * translated to ANTLR format by an ANTLR grammar, naturally <grin>).
51
+ * The lexical rules for python are particularly nasty and it took me
52
+ * a long time to get it 'right'; i.e., think about it in the proper
53
+ * way. Resist changing the lexer unless you've used ANTLR a lot. ;)
54
+ *
55
+ * I (Terence) tested this by running it on the jython-2.1/Lib
56
+ * directory of 40k lines of Python.
57
+ *
58
+ */
59
+
60
+ grammar Python;
61
+
62
+ options { language = Ruby; }
63
+
64
+ tokens {
65
+ INDENT; DEDENT; BLOCK;
66
+ PARAMS; PARAM_TUPLE; LONGINT;
67
+ COMPLEX; CONCAT; CALL;
68
+ TUPLE; LIST; DICT;
69
+ DECORATION; MODULE; DOCUMENT;
70
+ }
71
+
72
+ @lexer::init {
73
+ @line_join_level = 0 # if > 0, newlines are insignificant
74
+ @in_indent = false
75
+ }
76
+
77
+ @lexer::members {
78
+
79
+ # @in_indent becomes true at the start of a line with leading whitespace.
80
+ # It remains true until manually set to false by the leading_ws rule.
81
+ def in_indent?
82
+ if @input.beginning_of_line?
83
+ @in_indent =
84
+ case @input.peek
85
+ when ?\s, ?\t then true
86
+ else false
87
+ end
88
+ end
89
+ return @in_indent
90
+ end
91
+
92
+ }
93
+
94
+ @parser::main {
95
+ require 'PythonLexer'
96
+ require 'PythonTokenSource'
97
+
98
+ main = ANTLR3::Main::ParserMain.new(
99
+ Python::Parser, :lexer_class => Python::Lexer
100
+ )
101
+
102
+ class << main
103
+
104
+ def recognize( in_stream )
105
+ parser_options = {}
106
+ @port and parser_options[:port] = @port
107
+ @debug_socket and parser_options[:debug_socket] = @error
108
+
109
+ lexer = Python::Lexer.new(in_stream)
110
+ post_process = Python::TokenSource.new(lexer)
111
+ token_stream = ANTLR3::CommonTokenStream.new(post_process)
112
+ parser = Python::Parser.new(token_stream, parser_options)
113
+
114
+ result = parser.send(@parser_rule)
115
+
116
+ if result
117
+ if result.respond_to?(:tree) and tree = result.tree
118
+ puts(tree.inspect)
119
+ else puts(result.inspect)
120
+ end
121
+ end
122
+ end
123
+
124
+ end
125
+
126
+ main.execute(ARGV)
127
+
128
+ }
129
+
130
+ file_input
131
+ : ( NEWLINE* DOCUMENT )?
132
+ ( NEWLINE | statement )*
133
+ ;
134
+
135
+ single_input
136
+ : NEWLINE
137
+ | simple_statement
138
+ | compound_statement NEWLINE
139
+ ;
140
+
141
+ eval_input
142
+ : NEWLINE* test_list NEWLINE*
143
+ ;
144
+
145
+ decorator
146
+ : '@' dotted_attr ( LPAREN arg_list? RPAREN )? NEWLINE
147
+ ;
148
+
149
+ dotted_attr
150
+ : NAME ( '.' NAME )*
151
+ ;
152
+
153
+ decoration
154
+ : decorator+
155
+ |
156
+ ;
157
+
158
+ func_def
159
+ : decoration 'def' NAME LPAREN var_args_list? RPAREN ':' suite
160
+ { puts( "found method def #{ $NAME.text }" ) }
161
+ ;
162
+
163
+ parameters
164
+ : LPAREN var_args_list? RPAREN
165
+ ;
166
+
167
+ var_args_list
168
+ : def_parameter ( ',' def_parameter )*
169
+ ( ','
170
+ ( '*' single=NAME
171
+ ( ',' '**' double=NAME
172
+ |
173
+ )
174
+ | '**' double=NAME
175
+ )?
176
+ )?
177
+ | '*' single=NAME
178
+ ( ',' '**' double=NAME
179
+ |
180
+ )
181
+ | '**' NAME
182
+ ;
183
+
184
+ def_parameter
185
+ : fp_def ( '=' test )?
186
+ ;
187
+
188
+ fp_def
189
+ : NAME
190
+ | LPAREN fp_list RPAREN
191
+ ;
192
+
193
+ fp_list
194
+ : fp_def ( ',' fp_def )* ','?
195
+ ;
196
+
197
+ statement
198
+ : simple_statement
199
+ | compound_statement
200
+ ;
201
+
202
+ simple_statement
203
+ : small_statement
204
+ ( ';' small_statement )*
205
+ ';'? NEWLINE
206
+
207
+ ;
208
+
209
+ small_statement
210
+ : expr_statement
211
+ | print_statement
212
+ | del_statement
213
+ | pass_statement
214
+ | flow_statement
215
+ | import_statement
216
+ | global_statement
217
+ | exec_statement
218
+ | assert_statement
219
+ ;
220
+
221
+ expr_statement
222
+ : test_list
223
+ ( ( '+='
224
+ | '-='
225
+ | '*='
226
+ | '/='
227
+ | '%='
228
+ | '&='
229
+ | '|='
230
+ | '^='
231
+ | '<<='
232
+ | '>>='
233
+ | '**='
234
+ | '//='
235
+ )
236
+ ( yield_expr
237
+ | test_list
238
+ )
239
+ | ( '=' ( test_list | yield_expr ) )+
240
+ )?
241
+ ;
242
+
243
+ print_statement
244
+ : 'print'
245
+ ( test ( ',' test )* no_nl=','?
246
+ | '>>' dev=test ( ',' obj=test )* no_nl=','?
247
+ |
248
+ )
249
+ ;
250
+
251
+ del_statement
252
+ : 'del' exprlist
253
+ ;
254
+
255
+ pass_statement
256
+ : 'pass'
257
+ ;
258
+
259
+ flow_statement
260
+ : break_statement
261
+ | continue_statement
262
+ | return_statement
263
+ | raise_statement
264
+ | yield_statement
265
+ ;
266
+
267
+ break_statement
268
+ : 'break'
269
+ ;
270
+
271
+ continue_statement
272
+ : 'continue'
273
+ ;
274
+
275
+ return_statement
276
+ : 'return' test_list?
277
+ ;
278
+
279
+ yield_statement
280
+ : yield_expr
281
+ ;
282
+
283
+ raise_statement
284
+ : 'raise' ( test ( ',' test ( ',' test )? )? )?
285
+ ;
286
+
287
+ import_statement
288
+ : import_name
289
+ | import_from
290
+ ;
291
+
292
+ import_name
293
+ : 'import' dotted_as_names
294
+ ;
295
+
296
+ import_from
297
+ : 'from' ( '.'* dotted_name | '.'+ ) 'import'
298
+ ( '*'
299
+ | import_as_names
300
+ | LPAREN import_as_names RPAREN
301
+ )
302
+ ;
303
+
304
+ import_as_names
305
+ : import_as_name ( ',' import_as_name )* ','?
306
+ ;
307
+
308
+ import_as_name
309
+ : NAME ( 'as' NAME )?
310
+ ;
311
+
312
+ dotted_as_name
313
+ : dotted_name ( 'as' NAME )?
314
+ ;
315
+
316
+ dotted_as_names
317
+ : dotted_as_name ( ',' dotted_as_name )*
318
+ ;
319
+
320
+ dotted_name
321
+ : NAME ( '.' NAME )*
322
+ ;
323
+
324
+ global_statement
325
+ : 'global' NAME ( ',' NAME )*
326
+ ;
327
+
328
+ exec_statement
329
+ : 'exec' expr ( 'in' test ( ',' test )? )?
330
+ ;
331
+
332
+ assert_statement
333
+ : 'assert' assertion=test ( ',' error_value=test )?
334
+
335
+ ;
336
+
337
+ compound_statement
338
+ : if_statement
339
+ | while_statement
340
+ | for_statement
341
+ | try_statement
342
+ | with_statement
343
+ | func_def
344
+ | class_def
345
+ ;
346
+
347
+ if_statement
348
+ : 'if' test ':' suite elif_clause*
349
+ ( 'else' ':' suite )?
350
+ ;
351
+
352
+ elif_clause
353
+ : 'elif' test ':' suite
354
+ ;
355
+
356
+ while_statement
357
+ : 'while' test ':' suite
358
+ ( 'else' ':' suite )?
359
+ ;
360
+
361
+ for_statement
362
+ : 'for' exprlist 'in' test_list ':' suite ( 'else' ':' suite )?
363
+ ;
364
+
365
+ try_statement
366
+ : 'try' ':' suite
367
+ ( except_clause+ ( 'else' ':' suite )? ( 'finally' ':' suite)?
368
+ | 'finally' ':' suite
369
+ )
370
+ ;
371
+
372
+ with_statement
373
+ : 'with' test
374
+ ( 'as' NAME ':' suite
375
+ | ':' suite
376
+ )
377
+ ;
378
+
379
+ except_clause
380
+ : 'except' ( test ( ',' test )? )? ':' suite
381
+ ;
382
+
383
+ suite
384
+ : simple_statement
385
+ | NEWLINE INDENT (statement)+ DEDENT
386
+ ;
387
+
388
+ test
389
+ : or_test
390
+ ( ('if' or_test 'else') => 'if' or_test 'else' test )?
391
+ | lamb_def
392
+ ;
393
+
394
+ or_test
395
+ : and_test ( 'or' and_test )*
396
+ ;
397
+
398
+ and_test
399
+ : not_test ( 'and' not_test )*
400
+ ;
401
+
402
+ not_test
403
+ : 'not' not_test
404
+ | comparison
405
+ ;
406
+
407
+ comparison
408
+ : expr
409
+ (
410
+ ( '<'
411
+ | '>'
412
+ | '=='
413
+ | '>='
414
+ | '<='
415
+ | '<>'
416
+ | '!='
417
+ | 'in'
418
+ | 'not' 'in'
419
+ | 'is'
420
+ | 'is' 'not'
421
+ )
422
+ expr
423
+ )*
424
+ ;
425
+
426
+ expr
427
+ : xor_expr ( '|' xor_expr )*
428
+ ;
429
+
430
+ xor_expr
431
+ : and_expr ( '^' and_expr )*
432
+ ;
433
+
434
+ and_expr
435
+ : shift_expr ( '&' shift_expr )*
436
+ ;
437
+
438
+ shift_expr
439
+ : arith_expr ( ( '<<' | '>>' ) arith_expr )*
440
+ ;
441
+
442
+ arith_expr
443
+ : term ( ( '+' | '-' ) term )*
444
+ ;
445
+
446
+ term
447
+ : factor ( ( '*' | '/' | '%' | '//' ) factor )*
448
+ ;
449
+
450
+ factor
451
+ : '+' factor
452
+ | '-' factor
453
+ | '~' factor
454
+ | power
455
+ ;
456
+
457
+ power
458
+ : atom trailer* ( '**' factor )?
459
+ ;
460
+
461
+ atom
462
+ : LPAREN
463
+ ( yield_expr
464
+ | testlist_gexp
465
+ )?
466
+ RPAREN
467
+ | LBRACK list_maker? RBRACK
468
+ | LCURLY dict_maker? RCURLY
469
+ | '`' test_list '`'
470
+ | NAME
471
+ | INT
472
+ | LONGINT
473
+ | FLOAT
474
+ | COMPLEX
475
+ | ( STRING | DOCUMENT )+
476
+ ;
477
+
478
+ list_maker
479
+ : test
480
+ ( list_for
481
+ | (options {greedy=true;}:',' test)*
482
+ ) ','?
483
+ ;
484
+
485
+ testlist_gexp
486
+ : test
487
+ ( (options {k=2;}: ',' test)* ','?
488
+ | gen_for
489
+ )
490
+ ;
491
+
492
+ lamb_def
493
+ : 'lambda' var_args_list? ':' test
494
+ ;
495
+
496
+ trailer
497
+ : LPAREN arg_list? RPAREN
498
+ | LBRACK subscript_list RBRACK
499
+ | '.' NAME
500
+ ;
501
+
502
+ subscript_list : subscript (options {greedy=true;}:',' subscript)* (',')?
503
+ ;
504
+
505
+ subscript : '.' '.' '.'
506
+ | test (':' (test)? (sliceop)?)?
507
+ | ':' (test)? (sliceop)?
508
+ ;
509
+
510
+ sliceop : ':' (test)?
511
+ ;
512
+
513
+ exprlist : expr (options {k=2;}: ',' expr)* (',')?
514
+ ;
515
+
516
+ test_list
517
+ : test (options {k=2;}: ',' test)* (',')?
518
+ ;
519
+
520
+ dict_maker
521
+ : test ':' test (options {k=2;}:',' test ':' test )* ','?
522
+ ;
523
+
524
+ class_def
525
+ : 'class' NAME (LPAREN test_list? RPAREN)? ':' suite
526
+ { puts( "found method def #{ $NAME.text }" ) }
527
+ ;
528
+
529
+ arg_list
530
+ : argument (',' argument)*
531
+ ( ','
532
+ ( '*' test (',' '**' test)?
533
+ | '**' test
534
+ )?
535
+ )?
536
+ | '*' test ( ',' '**' test )?
537
+ | '**' test
538
+ ;
539
+
540
+ argument
541
+ : test ( '=' test | gen_for )?
542
+ ;
543
+
544
+ list_iter
545
+ : list_for
546
+ | list_if
547
+ ;
548
+
549
+ list_for
550
+ : 'for' exprlist 'in' test_list list_iter?
551
+ ;
552
+
553
+ list_if
554
+ : 'if' test list_iter?
555
+ ;
556
+
557
+ gen_iter
558
+ : gen_for
559
+ | gen_if
560
+ ;
561
+
562
+ gen_for
563
+ : 'for' exprlist 'in' or_test gen_iter?
564
+ ;
565
+
566
+ gen_if
567
+ : 'if' test gen_iter?
568
+ ;
569
+
570
+ yield_expr
571
+ : 'yield' test_list?
572
+ ;
573
+
574
+ /*~~~~~~~~~~~~~~~~~~~~~~~~~ LEXER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
575
+
576
+ LPAREN : '(' { @line_join_level += 1 };
577
+
578
+ RPAREN : ')' { @line_join_level -= 1 };
579
+
580
+ LBRACK : '[' { @line_join_level += 1 };
581
+
582
+ RBRACK : ']' { @line_join_level -= 1 };
583
+
584
+ LCURLY : '{' { @line_join_level += 1 };
585
+
586
+ RCURLY : '}' { @line_join_level -= 1 };
587
+
588
+ FLOAT
589
+ : ( '.' ( '0' .. '9' )+ EXP?
590
+ | ( '0' .. '9' )+ '.'? EXP
591
+ | ( '0' .. '9' )+ '.' ( ( '0' .. '9' )+ EXP? )?
592
+ )
593
+ ( ( 'j' | 'J' ) { $type = COMPLEX } )?
594
+ ;
595
+
596
+ INT
597
+ : ( // Hex
598
+ '0' ( 'x' | 'X' ) ( '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' )+
599
+ | ( '0' .. '9' )+
600
+ )
601
+ ( ( 'l' | 'L' ) { $type = LONGINT }
602
+ | ( 'j' | 'J' ) { $type = COMPLEX }
603
+ )?
604
+ ;
605
+
606
+ NAME: ( 'a' .. 'z' | 'A' .. 'Z' | '_')
607
+ ( 'a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9' )*
608
+ ;
609
+
610
+ /** Match various string types. Note that greedy=false implies '''
611
+ * should make us exit loop not continue.
612
+ */
613
+ STRING
614
+ : ( 'r' | 'u' | 'ur' | 'R' | 'U' | 'UR' | 'Ur' | 'uR' )?
615
+ ( '\'\'\'' (options {greedy=false;}: TRIAPOS )* '\'\'\'' { $type = DOCUMENT }
616
+ | '"""' (options {greedy=false;}: TRIQUOTE )* '"""' { $type = DOCUMENT }
617
+ | '"' ( ESC | ~( '\\' | '\n' | '"' ) )* '"'
618
+ | '\'' ( ESC | ~( '\\' | '\n' | '\'' ) )* '\''
619
+ )
620
+ ;
621
+
622
+ /** Consume a newline and any whitespace at start of next line
623
+ * unless the next line contains only white space, in that case
624
+ * emit a newline.
625
+ */
626
+ CONTINUED_LINE
627
+ : '\\' '\r'? '\n' ( ' ' | '\t' )*
628
+ ( NEWLINE
629
+ {
630
+ $type = NEWLINE
631
+ # $text = $nl.text
632
+ }
633
+ | { $channel = HIDDEN }
634
+ )
635
+ ;
636
+
637
+ /** Treat a sequence of blank lines as a single blank line. If
638
+ * nested within a (..), {..}, or [..], then ignore newlines.
639
+ * If the first newline starts in column one, they are to be ignored.
640
+ *
641
+ * Frank Wierzbicki added: Also ignore FORMFEEDS (\u000C).
642
+ */
643
+ NEWLINE
644
+ @init {
645
+ if @input.beginning_of_line? or @line_join_level > 0
646
+ $channel = HIDDEN
647
+ end
648
+ }
649
+ : ( '\u000C'? '\r'? '\n' )+
650
+ ;
651
+
652
+ /** Grab everything before a real symbol. Then if newline, kill it
653
+ * as this is a blank line. If whitespace followed by comment, kill it
654
+ * as it's a comment on a line by itself.
655
+ *
656
+ * Ignore leading whitespace when nested in [..], (..), {..}.
657
+ */
658
+ LEADING_WS
659
+ @after { @in_indent = false }
660
+ : { in_indent? }?=> // <-- semantic predicate meaning "only valid if in indent"
661
+ ( { @line_join_level > 0 }? ( ' ' | '\t' )+ { $channel = HIDDEN }
662
+ | ( ' ' | '\t' )+
663
+ ( '#' ~'\n'* ('\r'? '\n')*
664
+ {
665
+ $type = COMMENT
666
+ $channel = HIDDEN
667
+ }
668
+ )?
669
+ ( '\r'? '\n' { $channel = HIDDEN } )*
670
+ // kill trailing newline if present and then ignore
671
+ )
672
+ ;
673
+
674
+ /** Comments not on line by themselves are turned into newlines.
675
+
676
+ b = a # end of line comment
677
+
678
+ or
679
+
680
+ a = [1, # weird
681
+ 2]
682
+
683
+ This rule is invoked directly by nextToken when the comment is in
684
+ first column or when comment is on end of nonwhitespace line.
685
+
686
+ Only match \n here if we didn't start on left edge; let NEWLINE return that.
687
+ Kill if newlines if we live on a line by ourselves
688
+
689
+ Consume any leading whitespace if it starts on left edge.
690
+ */
691
+
692
+ COMMENT
693
+ : '#' ~'\n'* { $channel = HIDDEN }
694
+ ;
695
+
696
+ WS
697
+ : ( ' ' | '\t' )+ {$channel=HIDDEN}
698
+ ;
699
+
700
+ fragment
701
+ TRIQUOTE
702
+ : '"'? '"'? ( ESC | ~( '\\' | '"' ) )+
703
+ ;
704
+
705
+ fragment
706
+ TRIAPOS
707
+ : '\''? '\''? ( ESC | ~( '\\' | '\'' ) )+
708
+ ;
709
+
710
+ fragment
711
+ ESC
712
+ : '\\' .
713
+ ;
714
+
715
+ fragment
716
+ EXP
717
+ : ( 'e' | 'E' ) ( '+' | '-' )? ( '0' .. '9' )+
718
+ ;