antlr3 1.8.0 → 1.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. data/History.txt +35 -0
  2. data/Manifest.txt +73 -0
  3. data/README.txt +6 -13
  4. data/java/RubyTarget.java +43 -19
  5. data/java/antlr-full-3.2.1.jar +0 -0
  6. data/lib/antlr3/debug.rb +2 -0
  7. data/lib/antlr3/debug/event-hub.rb +55 -55
  8. data/lib/antlr3/debug/record-event-listener.rb +2 -2
  9. data/lib/antlr3/debug/rule-tracer.rb +14 -14
  10. data/lib/antlr3/debug/socket.rb +47 -47
  11. data/lib/antlr3/debug/trace-event-listener.rb +8 -8
  12. data/lib/antlr3/main.rb +29 -9
  13. data/lib/antlr3/modes/ast-builder.rb +7 -7
  14. data/lib/antlr3/modes/filter.rb +19 -17
  15. data/lib/antlr3/profile.rb +34 -6
  16. data/lib/antlr3/recognizers.rb +50 -1
  17. data/lib/antlr3/streams.rb +19 -15
  18. data/lib/antlr3/streams/rewrite.rb +241 -229
  19. data/lib/antlr3/template/group-file-lexer.rb +6 -8
  20. data/lib/antlr3/template/group-file-parser.rb +16 -16
  21. data/lib/antlr3/template/group-file.rb +1 -1
  22. data/lib/antlr3/test/call-stack.rb +13 -13
  23. data/lib/antlr3/test/core-extensions.rb +69 -69
  24. data/lib/antlr3/test/functional.rb +0 -4
  25. data/lib/antlr3/test/grammar.rb +70 -70
  26. data/lib/antlr3/token.rb +41 -17
  27. data/lib/antlr3/tree.rb +11 -14
  28. data/lib/antlr3/tree/debug.rb +53 -53
  29. data/lib/antlr3/tree/visitor.rb +11 -11
  30. data/lib/antlr3/tree/wizard.rb +35 -35
  31. data/lib/antlr3/util.rb +18 -0
  32. data/lib/antlr3/version.rb +1 -1
  33. data/rakefile +1 -0
  34. data/samples/ANTLRv3Grammar.g +3 -3
  35. data/samples/JavaScript.g +702 -0
  36. data/samples/standard/C/C.g +543 -0
  37. data/samples/standard/C/C.tokens +175 -0
  38. data/samples/standard/C/C__testrig.st +0 -0
  39. data/samples/standard/C/c.rb +12 -0
  40. data/samples/standard/C/input +3479 -0
  41. data/samples/standard/C/output +171 -0
  42. data/samples/standard/LL-star/LLStar.g +101 -0
  43. data/samples/standard/LL-star/input +12 -0
  44. data/samples/standard/LL-star/ll-star.rb +12 -0
  45. data/samples/standard/LL-star/output +2 -0
  46. data/samples/standard/calc/Calculator.g +47 -0
  47. data/samples/standard/calc/Calculator.py +16 -0
  48. data/samples/standard/calc/Calculator.rb +28 -0
  49. data/samples/standard/cminus/CMinus.g +141 -0
  50. data/samples/standard/cminus/bytecode.group +80 -0
  51. data/samples/standard/cminus/cminus.rb +16 -0
  52. data/samples/standard/cminus/input +9 -0
  53. data/samples/standard/cminus/java.group +91 -0
  54. data/samples/standard/cminus/output +11 -0
  55. data/samples/standard/cminus/python.group +48 -0
  56. data/samples/standard/dynamic-scope/DynamicScopes.g +50 -0
  57. data/samples/standard/dynamic-scope/dynamic-scopes.rb +12 -0
  58. data/samples/standard/dynamic-scope/input +7 -0
  59. data/samples/standard/dynamic-scope/output +4 -0
  60. data/samples/standard/fuzzy/FuzzyJava.g +89 -0
  61. data/samples/standard/fuzzy/fuzzy.py +11 -0
  62. data/samples/standard/fuzzy/fuzzy.rb +9 -0
  63. data/samples/standard/fuzzy/input +13 -0
  64. data/samples/standard/fuzzy/output +12 -0
  65. data/samples/standard/hoisted-predicates/HoistedPredicates.g +40 -0
  66. data/samples/standard/hoisted-predicates/hoisted-predicates.rb +13 -0
  67. data/samples/standard/hoisted-predicates/input +1 -0
  68. data/samples/standard/hoisted-predicates/output +1 -0
  69. data/samples/standard/island-grammar/Javadoc.g +46 -0
  70. data/samples/standard/island-grammar/Simple.g +104 -0
  71. data/samples/standard/island-grammar/input +11 -0
  72. data/samples/standard/island-grammar/island.rb +12 -0
  73. data/samples/standard/island-grammar/output +16 -0
  74. data/samples/standard/java/Java.g +827 -0
  75. data/samples/standard/java/input +80 -0
  76. data/samples/standard/java/java.rb +13 -0
  77. data/samples/standard/java/output +1 -0
  78. data/samples/standard/python/Python.g +718 -0
  79. data/samples/standard/python/PythonTokenSource.rb +107 -0
  80. data/samples/standard/python/input +210 -0
  81. data/samples/standard/python/output +24 -0
  82. data/samples/standard/python/python.rb +14 -0
  83. data/samples/standard/rakefile +18 -0
  84. data/samples/standard/scopes/SymbolTable.g +66 -0
  85. data/samples/standard/scopes/input +12 -0
  86. data/samples/standard/scopes/output +3 -0
  87. data/samples/standard/scopes/scopes.rb +12 -0
  88. data/samples/standard/simplecTreeParser/SimpleC.g +113 -0
  89. data/samples/standard/simplecTreeParser/SimpleCWalker.g +64 -0
  90. data/samples/standard/simplecTreeParser/input +12 -0
  91. data/samples/standard/simplecTreeParser/output +1 -0
  92. data/samples/standard/simplecTreeParser/simplec.rb +18 -0
  93. data/samples/standard/treeparser/Lang.g +24 -0
  94. data/samples/standard/treeparser/LangDumpDecl.g +17 -0
  95. data/samples/standard/treeparser/input +1 -0
  96. data/samples/standard/treeparser/output +2 -0
  97. data/samples/standard/treeparser/treeparser.rb +18 -0
  98. data/samples/standard/tweak/Tweak.g +68 -0
  99. data/samples/standard/tweak/input +9 -0
  100. data/samples/standard/tweak/output +16 -0
  101. data/samples/standard/tweak/tweak.rb +13 -0
  102. data/samples/standard/xml/README +16 -0
  103. data/samples/standard/xml/XML.g +123 -0
  104. data/samples/standard/xml/input +21 -0
  105. data/samples/standard/xml/output +39 -0
  106. data/samples/standard/xml/xml.rb +9 -0
  107. data/templates/Ruby.stg +4 -4
  108. data/test/functional/ast-output/auto-ast.rb +0 -5
  109. data/test/functional/ast-output/rewrites.rb +4 -4
  110. data/test/unit/test-scope.rb +45 -0
  111. metadata +96 -8
@@ -0,0 +1,3 @@
1
+ globals: [i, j]
2
+ level 2 symbols: [j, k]
3
+ level 1 symbols: [i, k]
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+ $:.unshift( File.dirname( __FILE__ ) )
4
+ require 'SymbolTableLexer'
5
+ require 'SymbolTableParser'
6
+
7
+ for file in ARGV
8
+ input = ANTLR3::FileStream.new( file )
9
+ lexer = SymbolTable::Lexer.new( input )
10
+ parser = SymbolTable::Parser.new( lexer )
11
+ parser.prog
12
+ end
@@ -0,0 +1,113 @@
1
+ grammar SimpleC;
2
+ options {
3
+ language = Ruby;
4
+ output = AST;
5
+ }
6
+
7
+ tokens {
8
+ VAR_DEF;
9
+ ARG_DEF;
10
+ FUNC_HDR;
11
+ FUNC_DECL;
12
+ FUNC_DEF;
13
+ BLOCK;
14
+ }
15
+
16
+ program
17
+ : declaration+
18
+ ;
19
+
20
+ declaration
21
+ : variable
22
+ | functionHeader ';' -> ^(FUNC_DECL functionHeader)
23
+ | functionHeader block -> ^(FUNC_DEF functionHeader block)
24
+ ;
25
+
26
+ variable
27
+ : type declarator ';' -> ^(VAR_DEF type declarator)
28
+ ;
29
+
30
+ declarator
31
+ : ID
32
+ ;
33
+
34
+ functionHeader
35
+ : type ID '(' ( formalParameter ( ',' formalParameter )* )? ')'
36
+ -> ^(FUNC_HDR type ID formalParameter+)
37
+ ;
38
+
39
+ formalParameter
40
+ : type declarator -> ^(ARG_DEF type declarator)
41
+ ;
42
+
43
+ type
44
+ : 'int'
45
+ | 'char'
46
+ | 'void'
47
+ | ID
48
+ ;
49
+
50
+ block
51
+ : lc='{'
52
+ variable*
53
+ stat*
54
+ '}'
55
+ -> ^(BLOCK[$lc,"BLOCK"] variable* stat*)
56
+ ;
57
+
58
+ stat: forStat
59
+ | expr ';'!
60
+ | block
61
+ | assignStat ';'!
62
+ | ';'!
63
+ ;
64
+
65
+ forStat
66
+ : 'for' '(' start=assignStat ';' expr ';' next=assignStat ')' block
67
+ -> ^('for' $start expr $next block)
68
+ ;
69
+
70
+ assignStat
71
+ : ID EQ expr -> ^(EQ ID expr)
72
+ ;
73
+
74
+ expr: condExpr
75
+ ;
76
+
77
+ condExpr
78
+ : aexpr ( ('=='^ | '<'^) aexpr )?
79
+ ;
80
+
81
+ aexpr
82
+ : atom ( '+'^ atom )*
83
+ ;
84
+
85
+ atom
86
+ : ID
87
+ | INT
88
+ | '(' expr ')' -> expr
89
+ ;
90
+
91
+ FOR : 'for' ;
92
+ INT_TYPE : 'int' ;
93
+ CHAR: 'char';
94
+ VOID: 'void';
95
+
96
+ ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
97
+ ;
98
+
99
+ INT : ('0'..'9')+
100
+ ;
101
+
102
+ EQ : '=' ;
103
+ EQEQ : '==' ;
104
+ LT : '<' ;
105
+ PLUS : '+' ;
106
+
107
+ WS : ( ' '
108
+ | '\t'
109
+ | '\r'
110
+ | '\n'
111
+ )+
112
+ { $channel=HIDDEN }
113
+ ;
@@ -0,0 +1,64 @@
1
+ tree grammar SimpleCWalker;
2
+ options {
3
+ language = Ruby;
4
+ tokenVocab = SimpleC;
5
+ ASTLabelType = CommonTree;
6
+ }
7
+
8
+ program
9
+ : declaration+
10
+ ;
11
+
12
+ declaration
13
+ : variable
14
+ | ^(FUNC_DECL functionHeader)
15
+ | ^(FUNC_DEF functionHeader block)
16
+ ;
17
+
18
+ variable
19
+ : ^(VAR_DEF type declarator)
20
+ ;
21
+
22
+ declarator
23
+ : ID
24
+ ;
25
+
26
+ functionHeader
27
+ : ^(FUNC_HDR type ID formalParameter+)
28
+ ;
29
+
30
+ formalParameter
31
+ : ^(ARG_DEF type declarator)
32
+ ;
33
+
34
+ type
35
+ : 'int'
36
+ | 'char'
37
+ | 'void'
38
+ | ID
39
+ ;
40
+
41
+ block
42
+ : ^(BLOCK variable* stat*)
43
+ ;
44
+
45
+ stat: forStat
46
+ | expr
47
+ | block
48
+ ;
49
+
50
+ forStat
51
+ : ^('for' expr expr expr block)
52
+ ;
53
+
54
+ expr: ^(EQEQ expr expr)
55
+ | ^(LT expr expr)
56
+ | ^(PLUS expr expr)
57
+ | ^(EQ ID expr)
58
+ | atom
59
+ ;
60
+
61
+ atom
62
+ : ID
63
+ | INT
64
+ ;
@@ -0,0 +1,12 @@
1
+ char c;
2
+ int x;
3
+
4
+ void bar(int x);
5
+
6
+ int foo(int y, char d) {
7
+ int i;
8
+ for (i=0; i<3; i=i+1) {
9
+ x=3;
10
+ y=5;
11
+ }
12
+ }
@@ -0,0 +1 @@
1
+ tree=(VAR_DEF char c) (VAR_DEF int x) (FUNC_DECL (FUNC_HDR void bar (ARG_DEF int x))) (FUNC_DEF (FUNC_HDR int foo (ARG_DEF int y) (ARG_DEF char d)) (BLOCK (VAR_DEF int i) (for (= i 0) (< i 3) (= i (+ i 1)) (BLOCK (= x 3) (= y 5)))))
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+
4
+ $:.unshift( File.dirname( __FILE__ ) )
5
+ require 'SimpleCLexer'
6
+ require 'SimpleCParser'
7
+ require 'SimpleCWalker'
8
+
9
+ for file in ARGV
10
+ input = ANTLR3::FileStream.new( file )
11
+ parser = SimpleC::Parser.new( input )
12
+ tree = parser.program.tree
13
+ puts( "tree: #{ tree.inspect }" )
14
+ nodes = ANTLR3::AST::CommonTreeNodeStream.new(
15
+ tree, :token_stream => parser.input
16
+ )
17
+ SimpleCWalker::TreeParser.new( nodes ).program
18
+ end
@@ -0,0 +1,24 @@
1
+ grammar Lang;
2
+ options {
3
+ language = Ruby;
4
+ output = AST;
5
+ }
6
+
7
+ tokens {DECL;} // an imaginary node
8
+
9
+ start : decl ;
10
+
11
+ decl
12
+ : type ID ';' -> ^(DECL type ID)
13
+ ;
14
+
15
+ type
16
+ : INT_TYPE // automatic tree construction builds a node for this rule
17
+ | FLOAT_TYPE
18
+ ;
19
+
20
+ INT_TYPE : 'int' ;
21
+ FLOAT_TYPE : 'float' ;
22
+ ID : 'a'..'z'+ ;
23
+ INT : '0'..'9'+ ;
24
+ WS : (' '|'\n') {$channel=HIDDEN;} ;
@@ -0,0 +1,17 @@
1
+ tree grammar LangDumpDecl;
2
+
3
+ options {
4
+ language = Ruby;
5
+ tokenVocab = Lang;
6
+ }
7
+
8
+ decl : ^(DECL type declarator)
9
+ // label.start, label.start, label.text
10
+ { puts( "int #{ $declarator.text }" ) }
11
+ ;
12
+
13
+ type : INT_TYPE ;
14
+
15
+ declarator
16
+ : ID
17
+ ;
@@ -0,0 +1 @@
1
+ int a;
@@ -0,0 +1,2 @@
1
+ tree: (DECL int a)
2
+ int a
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+
4
+ $:.unshift( File.dirname( __FILE__ ) )
5
+ require 'LangLexer'
6
+ require 'LangParser'
7
+ require 'LangDumpDecl'
8
+
9
+ for file in ARGV
10
+ input = ANTLR3::FileStream.new( file )
11
+ parser = Lang::Parser.new( input )
12
+ tree = parser.start.tree
13
+ puts( "tree: #{ tree.inspect }" )
14
+ nodes = ANTLR3::AST::CommonTreeNodeStream.new(
15
+ tree, :token_stream => parser.input
16
+ )
17
+ LangDumpDecl::TreeParser.new( nodes ).decl
18
+ end
@@ -0,0 +1,68 @@
1
+ /** Convert the simple input to be java code; wrap in a class,
2
+ * convert method with "public void", add decls. This shows how to insert
3
+ * extra text into a stream of tokens and how to replace a token
4
+ * with some text. Calling toString() on the TokenRewriteStream
5
+ * in Main will print out the original input stream.
6
+ *
7
+ * Note that you can do the instructions in any order as the
8
+ * rewrite instructions just get queued up and executed upon toString().
9
+ */
10
+ grammar Tweak;
11
+ options { language = Ruby; }
12
+
13
+ @init {
14
+ @input = ANTLR3::TokenRewriteStream.new( @input )
15
+ }
16
+
17
+ program
18
+ @init { start = @input.look }
19
+ : method+
20
+ {
21
+ @input.insert_before( start,"public class Wrapper {\n" )
22
+ # note the reference to the last token matched for method:
23
+ @input.insert_after( $method.stop, "\n}\n" )
24
+ }
25
+ ;
26
+
27
+ method
28
+ : m='method' ID '(' ')' body
29
+ { @input.replace( $m, "public void" ) }
30
+ ;
31
+
32
+ body
33
+ // decls is on body's local variable stack but is visible to
34
+ // any rule that body calls such as stat. From other rules
35
+ // it is referenced as $body::decls
36
+ // From within rule body, you can use $decls shorthand
37
+ scope {
38
+ decls
39
+ }
40
+ @init {
41
+ $body::decls = Set.new
42
+ }
43
+ : lcurly='{' stat* '}'
44
+ {
45
+ # dump declarations for all identifiers seen in statement list
46
+ $body::decls.each { | i | @input.insert_after( $lcurly, "\nint #{ i };" ) }
47
+ }
48
+ ;
49
+
50
+ stat: ID '=' expr ';' { $body::decls.add( $ID.text ) } // track left-hand-sides/
51
+ ;
52
+
53
+ expr: mul ('+' mul)*
54
+ ;
55
+
56
+ mul : atom ('*' atom)*
57
+ ;
58
+
59
+ atom: ID
60
+ | INT
61
+ ;
62
+
63
+ ID : ('a'..'z'|'A'..'Z')+ ;
64
+
65
+ INT : ('0'..'9')+ ;
66
+
67
+ WS : (' '|'\t'|'\n')+ {$channel=HIDDEN;}
68
+ ;
@@ -0,0 +1,9 @@
1
+ method foo() {
2
+ i = 3;
3
+ k = i;
4
+ i = k*4;
5
+ }
6
+
7
+ method bar() {
8
+ j = i*2;
9
+ }
@@ -0,0 +1,16 @@
1
+ public class Wrapper {
2
+ public void foo() {
3
+ int k;
4
+ int i;
5
+ i = 3;
6
+ k = i;
7
+ i = k*4;
8
+ }
9
+
10
+ public void bar() {
11
+ int j;
12
+ j = i*2;
13
+ }
14
+ }
15
+
16
+
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+ $:.unshift( File.dirname( __FILE__ ) )
4
+ require 'TweakLexer'
5
+ require 'TweakParser'
6
+
7
+ for file in ARGV
8
+ input = ANTLR3::FileStream.new( file )
9
+ lexer = Tweak::Lexer.new( input )
10
+ parser = Tweak::Parser.new( lexer )
11
+ parser.program
12
+ puts( parser.input.render )
13
+ end
@@ -0,0 +1,16 @@
1
+ The example input file has been slightly modified from the analogous
2
+ Java example: the non-ascii characters have been removed and replaced
3
+ by some ascii data.
4
+
5
+ The lexer would fail, if it tries to compare a byte >0x7f from the input
6
+ to a unicode constant.
7
+ It would work fine, if you feed unicode into the lexer, but then
8
+ sys.stdout.write() would probably fail, when it tries to print non-ascii
9
+ data.
10
+
11
+ It will also work with non-ascii input (the lexer operates on unicode()
12
+ strings), but then you'll have to do some more work (which I omitted for
13
+ simplicity):
14
+ - decode the input using the appropriate encoding into a unicode string.
15
+ - encode the data as you send it to the console or store it in a file.
16
+
@@ -0,0 +1,123 @@
1
+ lexer grammar XML;
2
+ options { language = Ruby; }
3
+
4
+ @members {
5
+ def quote(text)
6
+ text = text.gsub(/\"/, '\\"')
7
+ \%("#{ text }")
8
+ end
9
+ }
10
+
11
+ DOCUMENT
12
+ : XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
13
+ ;
14
+
15
+ fragment DOCTYPE
16
+ :
17
+ '<!DOCTYPE' WS rootElementName=GENERIC_ID
18
+ {puts("ROOTELEMENT: " + $rootElementName.text)}
19
+ WS
20
+ (
21
+ ( 'SYSTEM' WS sys1=VALUE
22
+ {puts("SYSTEM: " + $sys1.text)}
23
+
24
+ | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
25
+ {puts("PUBLIC: " + $pub.text)}
26
+ {puts("SYSTEM: " + $sys2.text)}
27
+ )
28
+ ( WS )?
29
+ )?
30
+ ( dtd=INTERNAL_DTD
31
+ {puts("INTERNAL DTD: " + $dtd.text)}
32
+ )?
33
+ '>'
34
+ ;
35
+
36
+ fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
37
+
38
+ fragment PI :
39
+ '<?' target=GENERIC_ID WS?
40
+ {puts("PI: " + $target.text)}
41
+ ( ATTRIBUTE WS? )* '?>'
42
+ ;
43
+
44
+ fragment XMLDECL :
45
+ '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
46
+ {puts("XML declaration")}
47
+ ( ATTRIBUTE WS? )* '?>'
48
+ ;
49
+
50
+
51
+ fragment ELEMENT
52
+ : ( START_TAG
53
+ (ELEMENT
54
+ | t=PCDATA
55
+ {puts("PCDATA: " << quote($t.text))}
56
+ | t=CDATA
57
+ {puts("CDATA: " << quote($t.text))}
58
+ | t=COMMENT
59
+ {puts("Comment: " << quote($t.text))}
60
+ | pi=PI
61
+ )*
62
+ END_TAG
63
+ | EMPTY_ELEMENT
64
+ )
65
+ ;
66
+
67
+ fragment START_TAG
68
+ : '<' WS? name=GENERIC_ID WS?
69
+ {puts("Start Tag: " + $name.text)}
70
+ ( ATTRIBUTE WS? )* '>'
71
+ ;
72
+
73
+ fragment EMPTY_ELEMENT
74
+ : '<' WS? name=GENERIC_ID WS?
75
+ {puts("Empty Element: " + $name.text)}
76
+ ( ATTRIBUTE WS? )* '/>'
77
+ ;
78
+
79
+ fragment ATTRIBUTE
80
+ : name=GENERIC_ID WS? '=' WS? value=VALUE
81
+ {puts("Attr: " + $name.text + " = "+ $value.text)}
82
+ ;
83
+
84
+ fragment END_TAG
85
+ : '</' WS? name=GENERIC_ID WS? '>'
86
+ {puts("End Tag: " + $name.text)}
87
+ ;
88
+
89
+ fragment COMMENT
90
+ : '<!--' (options {greedy=false;} : .)* '-->'
91
+ ;
92
+
93
+ fragment CDATA
94
+ : '<![CDATA[' (options {greedy=false;} : .)* ']]>'
95
+ ;
96
+
97
+ fragment PCDATA : (~'<')+ ;
98
+
99
+ fragment VALUE :
100
+ ( '\"' (~'\"')* '\"'
101
+ | '\'' (~'\'')* '\''
102
+ )
103
+ ;
104
+
105
+ fragment GENERIC_ID
106
+ : ( LETTER | '_' | ':')
107
+ ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
108
+ ;
109
+
110
+ fragment LETTER
111
+ : 'a'..'z'
112
+ | 'A'..'Z'
113
+ ;
114
+
115
+ fragment WS :
116
+ ( ' '
117
+ | '\t'
118
+ | ( '\n'
119
+ | '\r\n'
120
+ | '\r'
121
+ )
122
+ )+
123
+ ;