antlr3 1.8.0 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. data/History.txt +35 -0
  2. data/Manifest.txt +73 -0
  3. data/README.txt +6 -13
  4. data/java/RubyTarget.java +43 -19
  5. data/java/antlr-full-3.2.1.jar +0 -0
  6. data/lib/antlr3/debug.rb +2 -0
  7. data/lib/antlr3/debug/event-hub.rb +55 -55
  8. data/lib/antlr3/debug/record-event-listener.rb +2 -2
  9. data/lib/antlr3/debug/rule-tracer.rb +14 -14
  10. data/lib/antlr3/debug/socket.rb +47 -47
  11. data/lib/antlr3/debug/trace-event-listener.rb +8 -8
  12. data/lib/antlr3/main.rb +29 -9
  13. data/lib/antlr3/modes/ast-builder.rb +7 -7
  14. data/lib/antlr3/modes/filter.rb +19 -17
  15. data/lib/antlr3/profile.rb +34 -6
  16. data/lib/antlr3/recognizers.rb +50 -1
  17. data/lib/antlr3/streams.rb +19 -15
  18. data/lib/antlr3/streams/rewrite.rb +241 -229
  19. data/lib/antlr3/template/group-file-lexer.rb +6 -8
  20. data/lib/antlr3/template/group-file-parser.rb +16 -16
  21. data/lib/antlr3/template/group-file.rb +1 -1
  22. data/lib/antlr3/test/call-stack.rb +13 -13
  23. data/lib/antlr3/test/core-extensions.rb +69 -69
  24. data/lib/antlr3/test/functional.rb +0 -4
  25. data/lib/antlr3/test/grammar.rb +70 -70
  26. data/lib/antlr3/token.rb +41 -17
  27. data/lib/antlr3/tree.rb +11 -14
  28. data/lib/antlr3/tree/debug.rb +53 -53
  29. data/lib/antlr3/tree/visitor.rb +11 -11
  30. data/lib/antlr3/tree/wizard.rb +35 -35
  31. data/lib/antlr3/util.rb +18 -0
  32. data/lib/antlr3/version.rb +1 -1
  33. data/rakefile +1 -0
  34. data/samples/ANTLRv3Grammar.g +3 -3
  35. data/samples/JavaScript.g +702 -0
  36. data/samples/standard/C/C.g +543 -0
  37. data/samples/standard/C/C.tokens +175 -0
  38. data/samples/standard/C/C__testrig.st +0 -0
  39. data/samples/standard/C/c.rb +12 -0
  40. data/samples/standard/C/input +3479 -0
  41. data/samples/standard/C/output +171 -0
  42. data/samples/standard/LL-star/LLStar.g +101 -0
  43. data/samples/standard/LL-star/input +12 -0
  44. data/samples/standard/LL-star/ll-star.rb +12 -0
  45. data/samples/standard/LL-star/output +2 -0
  46. data/samples/standard/calc/Calculator.g +47 -0
  47. data/samples/standard/calc/Calculator.py +16 -0
  48. data/samples/standard/calc/Calculator.rb +28 -0
  49. data/samples/standard/cminus/CMinus.g +141 -0
  50. data/samples/standard/cminus/bytecode.group +80 -0
  51. data/samples/standard/cminus/cminus.rb +16 -0
  52. data/samples/standard/cminus/input +9 -0
  53. data/samples/standard/cminus/java.group +91 -0
  54. data/samples/standard/cminus/output +11 -0
  55. data/samples/standard/cminus/python.group +48 -0
  56. data/samples/standard/dynamic-scope/DynamicScopes.g +50 -0
  57. data/samples/standard/dynamic-scope/dynamic-scopes.rb +12 -0
  58. data/samples/standard/dynamic-scope/input +7 -0
  59. data/samples/standard/dynamic-scope/output +4 -0
  60. data/samples/standard/fuzzy/FuzzyJava.g +89 -0
  61. data/samples/standard/fuzzy/fuzzy.py +11 -0
  62. data/samples/standard/fuzzy/fuzzy.rb +9 -0
  63. data/samples/standard/fuzzy/input +13 -0
  64. data/samples/standard/fuzzy/output +12 -0
  65. data/samples/standard/hoisted-predicates/HoistedPredicates.g +40 -0
  66. data/samples/standard/hoisted-predicates/hoisted-predicates.rb +13 -0
  67. data/samples/standard/hoisted-predicates/input +1 -0
  68. data/samples/standard/hoisted-predicates/output +1 -0
  69. data/samples/standard/island-grammar/Javadoc.g +46 -0
  70. data/samples/standard/island-grammar/Simple.g +104 -0
  71. data/samples/standard/island-grammar/input +11 -0
  72. data/samples/standard/island-grammar/island.rb +12 -0
  73. data/samples/standard/island-grammar/output +16 -0
  74. data/samples/standard/java/Java.g +827 -0
  75. data/samples/standard/java/input +80 -0
  76. data/samples/standard/java/java.rb +13 -0
  77. data/samples/standard/java/output +1 -0
  78. data/samples/standard/python/Python.g +718 -0
  79. data/samples/standard/python/PythonTokenSource.rb +107 -0
  80. data/samples/standard/python/input +210 -0
  81. data/samples/standard/python/output +24 -0
  82. data/samples/standard/python/python.rb +14 -0
  83. data/samples/standard/rakefile +18 -0
  84. data/samples/standard/scopes/SymbolTable.g +66 -0
  85. data/samples/standard/scopes/input +12 -0
  86. data/samples/standard/scopes/output +3 -0
  87. data/samples/standard/scopes/scopes.rb +12 -0
  88. data/samples/standard/simplecTreeParser/SimpleC.g +113 -0
  89. data/samples/standard/simplecTreeParser/SimpleCWalker.g +64 -0
  90. data/samples/standard/simplecTreeParser/input +12 -0
  91. data/samples/standard/simplecTreeParser/output +1 -0
  92. data/samples/standard/simplecTreeParser/simplec.rb +18 -0
  93. data/samples/standard/treeparser/Lang.g +24 -0
  94. data/samples/standard/treeparser/LangDumpDecl.g +17 -0
  95. data/samples/standard/treeparser/input +1 -0
  96. data/samples/standard/treeparser/output +2 -0
  97. data/samples/standard/treeparser/treeparser.rb +18 -0
  98. data/samples/standard/tweak/Tweak.g +68 -0
  99. data/samples/standard/tweak/input +9 -0
  100. data/samples/standard/tweak/output +16 -0
  101. data/samples/standard/tweak/tweak.rb +13 -0
  102. data/samples/standard/xml/README +16 -0
  103. data/samples/standard/xml/XML.g +123 -0
  104. data/samples/standard/xml/input +21 -0
  105. data/samples/standard/xml/output +39 -0
  106. data/samples/standard/xml/xml.rb +9 -0
  107. data/templates/Ruby.stg +4 -4
  108. data/test/functional/ast-output/auto-ast.rb +0 -5
  109. data/test/functional/ast-output/rewrites.rb +4 -4
  110. data/test/unit/test-scope.rb +45 -0
  111. metadata +96 -8
@@ -0,0 +1,3 @@
1
+ globals: [i, j]
2
+ level 2 symbols: [j, k]
3
+ level 1 symbols: [i, k]
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+ $:.unshift( File.dirname( __FILE__ ) )
4
+ require 'SymbolTableLexer'
5
+ require 'SymbolTableParser'
6
+
7
+ for file in ARGV
8
+ input = ANTLR3::FileStream.new( file )
9
+ lexer = SymbolTable::Lexer.new( input )
10
+ parser = SymbolTable::Parser.new( lexer )
11
+ parser.prog
12
+ end
@@ -0,0 +1,113 @@
1
+ grammar SimpleC;
2
+ options {
3
+ language = Ruby;
4
+ output = AST;
5
+ }
6
+
7
+ tokens {
8
+ VAR_DEF;
9
+ ARG_DEF;
10
+ FUNC_HDR;
11
+ FUNC_DECL;
12
+ FUNC_DEF;
13
+ BLOCK;
14
+ }
15
+
16
+ program
17
+ : declaration+
18
+ ;
19
+
20
+ declaration
21
+ : variable
22
+ | functionHeader ';' -> ^(FUNC_DECL functionHeader)
23
+ | functionHeader block -> ^(FUNC_DEF functionHeader block)
24
+ ;
25
+
26
+ variable
27
+ : type declarator ';' -> ^(VAR_DEF type declarator)
28
+ ;
29
+
30
+ declarator
31
+ : ID
32
+ ;
33
+
34
+ functionHeader
35
+ : type ID '(' ( formalParameter ( ',' formalParameter )* )? ')'
36
+ -> ^(FUNC_HDR type ID formalParameter+)
37
+ ;
38
+
39
+ formalParameter
40
+ : type declarator -> ^(ARG_DEF type declarator)
41
+ ;
42
+
43
+ type
44
+ : 'int'
45
+ | 'char'
46
+ | 'void'
47
+ | ID
48
+ ;
49
+
50
+ block
51
+ : lc='{'
52
+ variable*
53
+ stat*
54
+ '}'
55
+ -> ^(BLOCK[$lc,"BLOCK"] variable* stat*)
56
+ ;
57
+
58
+ stat: forStat
59
+ | expr ';'!
60
+ | block
61
+ | assignStat ';'!
62
+ | ';'!
63
+ ;
64
+
65
+ forStat
66
+ : 'for' '(' start=assignStat ';' expr ';' next=assignStat ')' block
67
+ -> ^('for' $start expr $next block)
68
+ ;
69
+
70
+ assignStat
71
+ : ID EQ expr -> ^(EQ ID expr)
72
+ ;
73
+
74
+ expr: condExpr
75
+ ;
76
+
77
+ condExpr
78
+ : aexpr ( ('=='^ | '<'^) aexpr )?
79
+ ;
80
+
81
+ aexpr
82
+ : atom ( '+'^ atom )*
83
+ ;
84
+
85
+ atom
86
+ : ID
87
+ | INT
88
+ | '(' expr ')' -> expr
89
+ ;
90
+
91
+ FOR : 'for' ;
92
+ INT_TYPE : 'int' ;
93
+ CHAR: 'char';
94
+ VOID: 'void';
95
+
96
+ ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
97
+ ;
98
+
99
+ INT : ('0'..'9')+
100
+ ;
101
+
102
+ EQ : '=' ;
103
+ EQEQ : '==' ;
104
+ LT : '<' ;
105
+ PLUS : '+' ;
106
+
107
+ WS : ( ' '
108
+ | '\t'
109
+ | '\r'
110
+ | '\n'
111
+ )+
112
+ { $channel=HIDDEN }
113
+ ;
@@ -0,0 +1,64 @@
1
+ tree grammar SimpleCWalker;
2
+ options {
3
+ language = Ruby;
4
+ tokenVocab = SimpleC;
5
+ ASTLabelType = CommonTree;
6
+ }
7
+
8
+ program
9
+ : declaration+
10
+ ;
11
+
12
+ declaration
13
+ : variable
14
+ | ^(FUNC_DECL functionHeader)
15
+ | ^(FUNC_DEF functionHeader block)
16
+ ;
17
+
18
+ variable
19
+ : ^(VAR_DEF type declarator)
20
+ ;
21
+
22
+ declarator
23
+ : ID
24
+ ;
25
+
26
+ functionHeader
27
+ : ^(FUNC_HDR type ID formalParameter+)
28
+ ;
29
+
30
+ formalParameter
31
+ : ^(ARG_DEF type declarator)
32
+ ;
33
+
34
+ type
35
+ : 'int'
36
+ | 'char'
37
+ | 'void'
38
+ | ID
39
+ ;
40
+
41
+ block
42
+ : ^(BLOCK variable* stat*)
43
+ ;
44
+
45
+ stat: forStat
46
+ | expr
47
+ | block
48
+ ;
49
+
50
+ forStat
51
+ : ^('for' expr expr expr block)
52
+ ;
53
+
54
+ expr: ^(EQEQ expr expr)
55
+ | ^(LT expr expr)
56
+ | ^(PLUS expr expr)
57
+ | ^(EQ ID expr)
58
+ | atom
59
+ ;
60
+
61
+ atom
62
+ : ID
63
+ | INT
64
+ ;
@@ -0,0 +1,12 @@
1
+ char c;
2
+ int x;
3
+
4
+ void bar(int x);
5
+
6
+ int foo(int y, char d) {
7
+ int i;
8
+ for (i=0; i<3; i=i+1) {
9
+ x=3;
10
+ y=5;
11
+ }
12
+ }
@@ -0,0 +1 @@
1
+ tree=(VAR_DEF char c) (VAR_DEF int x) (FUNC_DECL (FUNC_HDR void bar (ARG_DEF int x))) (FUNC_DEF (FUNC_HDR int foo (ARG_DEF int y) (ARG_DEF char d)) (BLOCK (VAR_DEF int i) (for (= i 0) (< i 3) (= i (+ i 1)) (BLOCK (= x 3) (= y 5)))))
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+
4
+ $:.unshift( File.dirname( __FILE__ ) )
5
+ require 'SimpleCLexer'
6
+ require 'SimpleCParser'
7
+ require 'SimpleCWalker'
8
+
9
+ for file in ARGV
10
+ input = ANTLR3::FileStream.new( file )
11
+ parser = SimpleC::Parser.new( input )
12
+ tree = parser.program.tree
13
+ puts( "tree: #{ tree.inspect }" )
14
+ nodes = ANTLR3::AST::CommonTreeNodeStream.new(
15
+ tree, :token_stream => parser.input
16
+ )
17
+ SimpleCWalker::TreeParser.new( nodes ).program
18
+ end
@@ -0,0 +1,24 @@
1
+ grammar Lang;
2
+ options {
3
+ language = Ruby;
4
+ output = AST;
5
+ }
6
+
7
+ tokens {DECL;} // an imaginary node
8
+
9
+ start : decl ;
10
+
11
+ decl
12
+ : type ID ';' -> ^(DECL type ID)
13
+ ;
14
+
15
+ type
16
+ : INT_TYPE // automatic tree construction builds a node for this rule
17
+ | FLOAT_TYPE
18
+ ;
19
+
20
+ INT_TYPE : 'int' ;
21
+ FLOAT_TYPE : 'float' ;
22
+ ID : 'a'..'z'+ ;
23
+ INT : '0'..'9'+ ;
24
+ WS : (' '|'\n') {$channel=HIDDEN;} ;
@@ -0,0 +1,17 @@
1
+ tree grammar LangDumpDecl;
2
+
3
+ options {
4
+ language = Ruby;
5
+ tokenVocab = Lang;
6
+ }
7
+
8
+ decl : ^(DECL type declarator)
9
+ // label.start, label.start, label.text
10
+ { puts( "int #{ $declarator.text }" ) }
11
+ ;
12
+
13
+ type : INT_TYPE ;
14
+
15
+ declarator
16
+ : ID
17
+ ;
@@ -0,0 +1 @@
1
+ int a;
@@ -0,0 +1,2 @@
1
+ tree: (DECL int a)
2
+ int a
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+
4
+ $:.unshift( File.dirname( __FILE__ ) )
5
+ require 'LangLexer'
6
+ require 'LangParser'
7
+ require 'LangDumpDecl'
8
+
9
+ for file in ARGV
10
+ input = ANTLR3::FileStream.new( file )
11
+ parser = Lang::Parser.new( input )
12
+ tree = parser.start.tree
13
+ puts( "tree: #{ tree.inspect }" )
14
+ nodes = ANTLR3::AST::CommonTreeNodeStream.new(
15
+ tree, :token_stream => parser.input
16
+ )
17
+ LangDumpDecl::TreeParser.new( nodes ).decl
18
+ end
@@ -0,0 +1,68 @@
1
+ /** Convert the simple input to be java code; wrap in a class,
2
+ * convert method with "public void", add decls. This shows how to insert
3
+ * extra text into a stream of tokens and how to replace a token
4
+ * with some text. Calling toString() on the TokenRewriteStream
5
+ * in Main will print out the original input stream.
6
+ *
7
+ * Note that you can do the instructions in any order as the
8
+ * rewrite instructions just get queued up and executed upon toString().
9
+ */
10
+ grammar Tweak;
11
+ options { language = Ruby; }
12
+
13
+ @init {
14
+ @input = ANTLR3::TokenRewriteStream.new( @input )
15
+ }
16
+
17
+ program
18
+ @init { start = @input.look }
19
+ : method+
20
+ {
21
+ @input.insert_before( start,"public class Wrapper {\n" )
22
+ # note the reference to the last token matched for method:
23
+ @input.insert_after( $method.stop, "\n}\n" )
24
+ }
25
+ ;
26
+
27
+ method
28
+ : m='method' ID '(' ')' body
29
+ { @input.replace( $m, "public void" ) }
30
+ ;
31
+
32
+ body
33
+ // decls is on body's local variable stack but is visible to
34
+ // any rule that body calls such as stat. From other rules
35
+ // it is referenced as $body::decls
36
+ // From within rule body, you can use $decls shorthand
37
+ scope {
38
+ decls
39
+ }
40
+ @init {
41
+ $body::decls = Set.new
42
+ }
43
+ : lcurly='{' stat* '}'
44
+ {
45
+ # dump declarations for all identifiers seen in statement list
46
+ $body::decls.each { | i | @input.insert_after( $lcurly, "\nint #{ i };" ) }
47
+ }
48
+ ;
49
+
50
+ stat: ID '=' expr ';' { $body::decls.add( $ID.text ) } // track left-hand-sides/
51
+ ;
52
+
53
+ expr: mul ('+' mul)*
54
+ ;
55
+
56
+ mul : atom ('*' atom)*
57
+ ;
58
+
59
+ atom: ID
60
+ | INT
61
+ ;
62
+
63
+ ID : ('a'..'z'|'A'..'Z')+ ;
64
+
65
+ INT : ('0'..'9')+ ;
66
+
67
+ WS : (' '|'\t'|'\n')+ {$channel=HIDDEN;}
68
+ ;
@@ -0,0 +1,9 @@
1
+ method foo() {
2
+ i = 3;
3
+ k = i;
4
+ i = k*4;
5
+ }
6
+
7
+ method bar() {
8
+ j = i*2;
9
+ }
@@ -0,0 +1,16 @@
1
+ public class Wrapper {
2
+ public void foo() {
3
+ int k;
4
+ int i;
5
+ i = 3;
6
+ k = i;
7
+ i = k*4;
8
+ }
9
+
10
+ public void bar() {
11
+ int j;
12
+ j = i*2;
13
+ }
14
+ }
15
+
16
+
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+ $:.unshift( File.dirname( __FILE__ ) )
4
+ require 'TweakLexer'
5
+ require 'TweakParser'
6
+
7
+ for file in ARGV
8
+ input = ANTLR3::FileStream.new( file )
9
+ lexer = Tweak::Lexer.new( input )
10
+ parser = Tweak::Parser.new( lexer )
11
+ parser.program
12
+ puts( parser.input.render )
13
+ end
@@ -0,0 +1,16 @@
1
+ The example input file has been slightly modified from the analogous
2
+ Java example: the non-ascii characters have been removed and replaced
3
+ by some ascii data.
4
+
5
+ The lexer would fail, if it tries to compare a byte >0x7f from the input
6
+ to a unicode constant.
7
+ It would work fine, if you feed unicode into the lexer, but then
8
+ sys.stdout.write() would probably fail, when it tries to print non-ascii
9
+ data.
10
+
11
+ It will also work with non-ascii input (the lexer operates on unicode()
12
+ strings), but then you'll have to do some more work (which I omitted for
13
+ simplicity):
14
+ - decode the input using the appropriate encoding into a unicode string.
15
+ - encode the data as you send it to the console or store it in a file.
16
+
@@ -0,0 +1,123 @@
1
+ lexer grammar XML;
2
+ options { language = Ruby; }
3
+
4
+ @members {
5
+ def quote(text)
6
+ text = text.gsub(/\"/, '\\"')
7
+ \%("#{ text }")
8
+ end
9
+ }
10
+
11
+ DOCUMENT
12
+ : XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
13
+ ;
14
+
15
+ fragment DOCTYPE
16
+ :
17
+ '<!DOCTYPE' WS rootElementName=GENERIC_ID
18
+ {puts("ROOTELEMENT: " + $rootElementName.text)}
19
+ WS
20
+ (
21
+ ( 'SYSTEM' WS sys1=VALUE
22
+ {puts("SYSTEM: " + $sys1.text)}
23
+
24
+ | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
25
+ {puts("PUBLIC: " + $pub.text)}
26
+ {puts("SYSTEM: " + $sys2.text)}
27
+ )
28
+ ( WS )?
29
+ )?
30
+ ( dtd=INTERNAL_DTD
31
+ {puts("INTERNAL DTD: " + $dtd.text)}
32
+ )?
33
+ '>'
34
+ ;
35
+
36
+ fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
37
+
38
+ fragment PI :
39
+ '<?' target=GENERIC_ID WS?
40
+ {puts("PI: " + $target.text)}
41
+ ( ATTRIBUTE WS? )* '?>'
42
+ ;
43
+
44
+ fragment XMLDECL :
45
+ '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
46
+ {puts("XML declaration")}
47
+ ( ATTRIBUTE WS? )* '?>'
48
+ ;
49
+
50
+
51
+ fragment ELEMENT
52
+ : ( START_TAG
53
+ (ELEMENT
54
+ | t=PCDATA
55
+ {puts("PCDATA: " << quote($t.text))}
56
+ | t=CDATA
57
+ {puts("CDATA: " << quote($t.text))}
58
+ | t=COMMENT
59
+ {puts("Comment: " << quote($t.text))}
60
+ | pi=PI
61
+ )*
62
+ END_TAG
63
+ | EMPTY_ELEMENT
64
+ )
65
+ ;
66
+
67
+ fragment START_TAG
68
+ : '<' WS? name=GENERIC_ID WS?
69
+ {puts("Start Tag: " + $name.text)}
70
+ ( ATTRIBUTE WS? )* '>'
71
+ ;
72
+
73
+ fragment EMPTY_ELEMENT
74
+ : '<' WS? name=GENERIC_ID WS?
75
+ {puts("Empty Element: " + $name.text)}
76
+ ( ATTRIBUTE WS? )* '/>'
77
+ ;
78
+
79
+ fragment ATTRIBUTE
80
+ : name=GENERIC_ID WS? '=' WS? value=VALUE
81
+ {puts("Attr: " + $name.text + " = "+ $value.text)}
82
+ ;
83
+
84
+ fragment END_TAG
85
+ : '</' WS? name=GENERIC_ID WS? '>'
86
+ {puts("End Tag: " + $name.text)}
87
+ ;
88
+
89
+ fragment COMMENT
90
+ : '<!--' (options {greedy=false;} : .)* '-->'
91
+ ;
92
+
93
+ fragment CDATA
94
+ : '<![CDATA[' (options {greedy=false;} : .)* ']]>'
95
+ ;
96
+
97
+ fragment PCDATA : (~'<')+ ;
98
+
99
+ fragment VALUE :
100
+ ( '\"' (~'\"')* '\"'
101
+ | '\'' (~'\'')* '\''
102
+ )
103
+ ;
104
+
105
+ fragment GENERIC_ID
106
+ : ( LETTER | '_' | ':')
107
+ ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
108
+ ;
109
+
110
+ fragment LETTER
111
+ : 'a'..'z'
112
+ | 'A'..'Z'
113
+ ;
114
+
115
+ fragment WS :
116
+ ( ' '
117
+ | '\t'
118
+ | ( '\n'
119
+ | '\r\n'
120
+ | '\r'
121
+ )
122
+ )+
123
+ ;