antlr3 1.8.0 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +35 -0
- data/Manifest.txt +73 -0
- data/README.txt +6 -13
- data/java/RubyTarget.java +43 -19
- data/java/antlr-full-3.2.1.jar +0 -0
- data/lib/antlr3/debug.rb +2 -0
- data/lib/antlr3/debug/event-hub.rb +55 -55
- data/lib/antlr3/debug/record-event-listener.rb +2 -2
- data/lib/antlr3/debug/rule-tracer.rb +14 -14
- data/lib/antlr3/debug/socket.rb +47 -47
- data/lib/antlr3/debug/trace-event-listener.rb +8 -8
- data/lib/antlr3/main.rb +29 -9
- data/lib/antlr3/modes/ast-builder.rb +7 -7
- data/lib/antlr3/modes/filter.rb +19 -17
- data/lib/antlr3/profile.rb +34 -6
- data/lib/antlr3/recognizers.rb +50 -1
- data/lib/antlr3/streams.rb +19 -15
- data/lib/antlr3/streams/rewrite.rb +241 -229
- data/lib/antlr3/template/group-file-lexer.rb +6 -8
- data/lib/antlr3/template/group-file-parser.rb +16 -16
- data/lib/antlr3/template/group-file.rb +1 -1
- data/lib/antlr3/test/call-stack.rb +13 -13
- data/lib/antlr3/test/core-extensions.rb +69 -69
- data/lib/antlr3/test/functional.rb +0 -4
- data/lib/antlr3/test/grammar.rb +70 -70
- data/lib/antlr3/token.rb +41 -17
- data/lib/antlr3/tree.rb +11 -14
- data/lib/antlr3/tree/debug.rb +53 -53
- data/lib/antlr3/tree/visitor.rb +11 -11
- data/lib/antlr3/tree/wizard.rb +35 -35
- data/lib/antlr3/util.rb +18 -0
- data/lib/antlr3/version.rb +1 -1
- data/rakefile +1 -0
- data/samples/ANTLRv3Grammar.g +3 -3
- data/samples/JavaScript.g +702 -0
- data/samples/standard/C/C.g +543 -0
- data/samples/standard/C/C.tokens +175 -0
- data/samples/standard/C/C__testrig.st +0 -0
- data/samples/standard/C/c.rb +12 -0
- data/samples/standard/C/input +3479 -0
- data/samples/standard/C/output +171 -0
- data/samples/standard/LL-star/LLStar.g +101 -0
- data/samples/standard/LL-star/input +12 -0
- data/samples/standard/LL-star/ll-star.rb +12 -0
- data/samples/standard/LL-star/output +2 -0
- data/samples/standard/calc/Calculator.g +47 -0
- data/samples/standard/calc/Calculator.py +16 -0
- data/samples/standard/calc/Calculator.rb +28 -0
- data/samples/standard/cminus/CMinus.g +141 -0
- data/samples/standard/cminus/bytecode.group +80 -0
- data/samples/standard/cminus/cminus.rb +16 -0
- data/samples/standard/cminus/input +9 -0
- data/samples/standard/cminus/java.group +91 -0
- data/samples/standard/cminus/output +11 -0
- data/samples/standard/cminus/python.group +48 -0
- data/samples/standard/dynamic-scope/DynamicScopes.g +50 -0
- data/samples/standard/dynamic-scope/dynamic-scopes.rb +12 -0
- data/samples/standard/dynamic-scope/input +7 -0
- data/samples/standard/dynamic-scope/output +4 -0
- data/samples/standard/fuzzy/FuzzyJava.g +89 -0
- data/samples/standard/fuzzy/fuzzy.py +11 -0
- data/samples/standard/fuzzy/fuzzy.rb +9 -0
- data/samples/standard/fuzzy/input +13 -0
- data/samples/standard/fuzzy/output +12 -0
- data/samples/standard/hoisted-predicates/HoistedPredicates.g +40 -0
- data/samples/standard/hoisted-predicates/hoisted-predicates.rb +13 -0
- data/samples/standard/hoisted-predicates/input +1 -0
- data/samples/standard/hoisted-predicates/output +1 -0
- data/samples/standard/island-grammar/Javadoc.g +46 -0
- data/samples/standard/island-grammar/Simple.g +104 -0
- data/samples/standard/island-grammar/input +11 -0
- data/samples/standard/island-grammar/island.rb +12 -0
- data/samples/standard/island-grammar/output +16 -0
- data/samples/standard/java/Java.g +827 -0
- data/samples/standard/java/input +80 -0
- data/samples/standard/java/java.rb +13 -0
- data/samples/standard/java/output +1 -0
- data/samples/standard/python/Python.g +718 -0
- data/samples/standard/python/PythonTokenSource.rb +107 -0
- data/samples/standard/python/input +210 -0
- data/samples/standard/python/output +24 -0
- data/samples/standard/python/python.rb +14 -0
- data/samples/standard/rakefile +18 -0
- data/samples/standard/scopes/SymbolTable.g +66 -0
- data/samples/standard/scopes/input +12 -0
- data/samples/standard/scopes/output +3 -0
- data/samples/standard/scopes/scopes.rb +12 -0
- data/samples/standard/simplecTreeParser/SimpleC.g +113 -0
- data/samples/standard/simplecTreeParser/SimpleCWalker.g +64 -0
- data/samples/standard/simplecTreeParser/input +12 -0
- data/samples/standard/simplecTreeParser/output +1 -0
- data/samples/standard/simplecTreeParser/simplec.rb +18 -0
- data/samples/standard/treeparser/Lang.g +24 -0
- data/samples/standard/treeparser/LangDumpDecl.g +17 -0
- data/samples/standard/treeparser/input +1 -0
- data/samples/standard/treeparser/output +2 -0
- data/samples/standard/treeparser/treeparser.rb +18 -0
- data/samples/standard/tweak/Tweak.g +68 -0
- data/samples/standard/tweak/input +9 -0
- data/samples/standard/tweak/output +16 -0
- data/samples/standard/tweak/tweak.rb +13 -0
- data/samples/standard/xml/README +16 -0
- data/samples/standard/xml/XML.g +123 -0
- data/samples/standard/xml/input +21 -0
- data/samples/standard/xml/output +39 -0
- data/samples/standard/xml/xml.rb +9 -0
- data/templates/Ruby.stg +4 -4
- data/test/functional/ast-output/auto-ast.rb +0 -5
- data/test/functional/ast-output/rewrites.rb +4 -4
- data/test/unit/test-scope.rb +45 -0
- metadata +96 -8
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
# encoding: utf-8
|
|
3
|
+
$:.unshift( File.dirname( __FILE__ ) )
|
|
4
|
+
require 'SymbolTableLexer'
|
|
5
|
+
require 'SymbolTableParser'
|
|
6
|
+
|
|
7
|
+
for file in ARGV
|
|
8
|
+
input = ANTLR3::FileStream.new( file )
|
|
9
|
+
lexer = SymbolTable::Lexer.new( input )
|
|
10
|
+
parser = SymbolTable::Parser.new( lexer )
|
|
11
|
+
parser.prog
|
|
12
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
grammar SimpleC;
|
|
2
|
+
options {
|
|
3
|
+
language = Ruby;
|
|
4
|
+
output = AST;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
tokens {
|
|
8
|
+
VAR_DEF;
|
|
9
|
+
ARG_DEF;
|
|
10
|
+
FUNC_HDR;
|
|
11
|
+
FUNC_DECL;
|
|
12
|
+
FUNC_DEF;
|
|
13
|
+
BLOCK;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
program
|
|
17
|
+
: declaration+
|
|
18
|
+
;
|
|
19
|
+
|
|
20
|
+
declaration
|
|
21
|
+
: variable
|
|
22
|
+
| functionHeader ';' -> ^(FUNC_DECL functionHeader)
|
|
23
|
+
| functionHeader block -> ^(FUNC_DEF functionHeader block)
|
|
24
|
+
;
|
|
25
|
+
|
|
26
|
+
variable
|
|
27
|
+
: type declarator ';' -> ^(VAR_DEF type declarator)
|
|
28
|
+
;
|
|
29
|
+
|
|
30
|
+
declarator
|
|
31
|
+
: ID
|
|
32
|
+
;
|
|
33
|
+
|
|
34
|
+
functionHeader
|
|
35
|
+
: type ID '(' ( formalParameter ( ',' formalParameter )* )? ')'
|
|
36
|
+
-> ^(FUNC_HDR type ID formalParameter+)
|
|
37
|
+
;
|
|
38
|
+
|
|
39
|
+
formalParameter
|
|
40
|
+
: type declarator -> ^(ARG_DEF type declarator)
|
|
41
|
+
;
|
|
42
|
+
|
|
43
|
+
type
|
|
44
|
+
: 'int'
|
|
45
|
+
| 'char'
|
|
46
|
+
| 'void'
|
|
47
|
+
| ID
|
|
48
|
+
;
|
|
49
|
+
|
|
50
|
+
block
|
|
51
|
+
: lc='{'
|
|
52
|
+
variable*
|
|
53
|
+
stat*
|
|
54
|
+
'}'
|
|
55
|
+
-> ^(BLOCK[$lc,"BLOCK"] variable* stat*)
|
|
56
|
+
;
|
|
57
|
+
|
|
58
|
+
stat: forStat
|
|
59
|
+
| expr ';'!
|
|
60
|
+
| block
|
|
61
|
+
| assignStat ';'!
|
|
62
|
+
| ';'!
|
|
63
|
+
;
|
|
64
|
+
|
|
65
|
+
forStat
|
|
66
|
+
: 'for' '(' start=assignStat ';' expr ';' next=assignStat ')' block
|
|
67
|
+
-> ^('for' $start expr $next block)
|
|
68
|
+
;
|
|
69
|
+
|
|
70
|
+
assignStat
|
|
71
|
+
: ID EQ expr -> ^(EQ ID expr)
|
|
72
|
+
;
|
|
73
|
+
|
|
74
|
+
expr: condExpr
|
|
75
|
+
;
|
|
76
|
+
|
|
77
|
+
condExpr
|
|
78
|
+
: aexpr ( ('=='^ | '<'^) aexpr )?
|
|
79
|
+
;
|
|
80
|
+
|
|
81
|
+
aexpr
|
|
82
|
+
: atom ( '+'^ atom )*
|
|
83
|
+
;
|
|
84
|
+
|
|
85
|
+
atom
|
|
86
|
+
: ID
|
|
87
|
+
| INT
|
|
88
|
+
| '(' expr ')' -> expr
|
|
89
|
+
;
|
|
90
|
+
|
|
91
|
+
FOR : 'for' ;
|
|
92
|
+
INT_TYPE : 'int' ;
|
|
93
|
+
CHAR: 'char';
|
|
94
|
+
VOID: 'void';
|
|
95
|
+
|
|
96
|
+
ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
|
|
97
|
+
;
|
|
98
|
+
|
|
99
|
+
INT : ('0'..'9')+
|
|
100
|
+
;
|
|
101
|
+
|
|
102
|
+
EQ : '=' ;
|
|
103
|
+
EQEQ : '==' ;
|
|
104
|
+
LT : '<' ;
|
|
105
|
+
PLUS : '+' ;
|
|
106
|
+
|
|
107
|
+
WS : ( ' '
|
|
108
|
+
| '\t'
|
|
109
|
+
| '\r'
|
|
110
|
+
| '\n'
|
|
111
|
+
)+
|
|
112
|
+
{ $channel=HIDDEN }
|
|
113
|
+
;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
tree grammar SimpleCWalker;
|
|
2
|
+
options {
|
|
3
|
+
language = Ruby;
|
|
4
|
+
tokenVocab = SimpleC;
|
|
5
|
+
ASTLabelType = CommonTree;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
program
|
|
9
|
+
: declaration+
|
|
10
|
+
;
|
|
11
|
+
|
|
12
|
+
declaration
|
|
13
|
+
: variable
|
|
14
|
+
| ^(FUNC_DECL functionHeader)
|
|
15
|
+
| ^(FUNC_DEF functionHeader block)
|
|
16
|
+
;
|
|
17
|
+
|
|
18
|
+
variable
|
|
19
|
+
: ^(VAR_DEF type declarator)
|
|
20
|
+
;
|
|
21
|
+
|
|
22
|
+
declarator
|
|
23
|
+
: ID
|
|
24
|
+
;
|
|
25
|
+
|
|
26
|
+
functionHeader
|
|
27
|
+
: ^(FUNC_HDR type ID formalParameter+)
|
|
28
|
+
;
|
|
29
|
+
|
|
30
|
+
formalParameter
|
|
31
|
+
: ^(ARG_DEF type declarator)
|
|
32
|
+
;
|
|
33
|
+
|
|
34
|
+
type
|
|
35
|
+
: 'int'
|
|
36
|
+
| 'char'
|
|
37
|
+
| 'void'
|
|
38
|
+
| ID
|
|
39
|
+
;
|
|
40
|
+
|
|
41
|
+
block
|
|
42
|
+
: ^(BLOCK variable* stat*)
|
|
43
|
+
;
|
|
44
|
+
|
|
45
|
+
stat: forStat
|
|
46
|
+
| expr
|
|
47
|
+
| block
|
|
48
|
+
;
|
|
49
|
+
|
|
50
|
+
forStat
|
|
51
|
+
: ^('for' expr expr expr block)
|
|
52
|
+
;
|
|
53
|
+
|
|
54
|
+
expr: ^(EQEQ expr expr)
|
|
55
|
+
| ^(LT expr expr)
|
|
56
|
+
| ^(PLUS expr expr)
|
|
57
|
+
| ^(EQ ID expr)
|
|
58
|
+
| atom
|
|
59
|
+
;
|
|
60
|
+
|
|
61
|
+
atom
|
|
62
|
+
: ID
|
|
63
|
+
| INT
|
|
64
|
+
;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
tree=(VAR_DEF char c) (VAR_DEF int x) (FUNC_DECL (FUNC_HDR void bar (ARG_DEF int x))) (FUNC_DEF (FUNC_HDR int foo (ARG_DEF int y) (ARG_DEF char d)) (BLOCK (VAR_DEF int i) (for (= i 0) (< i 3) (= i (+ i 1)) (BLOCK (= x 3) (= y 5)))))
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
# encoding: utf-8
|
|
3
|
+
|
|
4
|
+
$:.unshift( File.dirname( __FILE__ ) )
|
|
5
|
+
require 'SimpleCLexer'
|
|
6
|
+
require 'SimpleCParser'
|
|
7
|
+
require 'SimpleCWalker'
|
|
8
|
+
|
|
9
|
+
for file in ARGV
|
|
10
|
+
input = ANTLR3::FileStream.new( file )
|
|
11
|
+
parser = SimpleC::Parser.new( input )
|
|
12
|
+
tree = parser.program.tree
|
|
13
|
+
puts( "tree: #{ tree.inspect }" )
|
|
14
|
+
nodes = ANTLR3::AST::CommonTreeNodeStream.new(
|
|
15
|
+
tree, :token_stream => parser.input
|
|
16
|
+
)
|
|
17
|
+
SimpleCWalker::TreeParser.new( nodes ).program
|
|
18
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
grammar Lang;
|
|
2
|
+
options {
|
|
3
|
+
language = Ruby;
|
|
4
|
+
output = AST;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
tokens {DECL;} // an imaginary node
|
|
8
|
+
|
|
9
|
+
start : decl ;
|
|
10
|
+
|
|
11
|
+
decl
|
|
12
|
+
: type ID ';' -> ^(DECL type ID)
|
|
13
|
+
;
|
|
14
|
+
|
|
15
|
+
type
|
|
16
|
+
: INT_TYPE // automatic tree construction builds a node for this rule
|
|
17
|
+
| FLOAT_TYPE
|
|
18
|
+
;
|
|
19
|
+
|
|
20
|
+
INT_TYPE : 'int' ;
|
|
21
|
+
FLOAT_TYPE : 'float' ;
|
|
22
|
+
ID : 'a'..'z'+ ;
|
|
23
|
+
INT : '0'..'9'+ ;
|
|
24
|
+
WS : (' '|'\n') {$channel=HIDDEN;} ;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
tree grammar LangDumpDecl;
|
|
2
|
+
|
|
3
|
+
options {
|
|
4
|
+
language = Ruby;
|
|
5
|
+
tokenVocab = Lang;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
decl : ^(DECL type declarator)
|
|
9
|
+
// label.start, label.start, label.text
|
|
10
|
+
{ puts( "int #{ $declarator.text }" ) }
|
|
11
|
+
;
|
|
12
|
+
|
|
13
|
+
type : INT_TYPE ;
|
|
14
|
+
|
|
15
|
+
declarator
|
|
16
|
+
: ID
|
|
17
|
+
;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
int a;
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
# encoding: utf-8
|
|
3
|
+
|
|
4
|
+
$:.unshift( File.dirname( __FILE__ ) )
|
|
5
|
+
require 'LangLexer'
|
|
6
|
+
require 'LangParser'
|
|
7
|
+
require 'LangDumpDecl'
|
|
8
|
+
|
|
9
|
+
for file in ARGV
|
|
10
|
+
input = ANTLR3::FileStream.new( file )
|
|
11
|
+
parser = Lang::Parser.new( input )
|
|
12
|
+
tree = parser.start.tree
|
|
13
|
+
puts( "tree: #{ tree.inspect }" )
|
|
14
|
+
nodes = ANTLR3::AST::CommonTreeNodeStream.new(
|
|
15
|
+
tree, :token_stream => parser.input
|
|
16
|
+
)
|
|
17
|
+
LangDumpDecl::TreeParser.new( nodes ).decl
|
|
18
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/** Convert the simple input to be java code; wrap in a class,
|
|
2
|
+
* convert method with "public void", add decls. This shows how to insert
|
|
3
|
+
* extra text into a stream of tokens and how to replace a token
|
|
4
|
+
* with some text. Calling toString() on the TokenRewriteStream
|
|
5
|
+
* in Main will print out the original input stream.
|
|
6
|
+
*
|
|
7
|
+
* Note that you can do the instructions in any order as the
|
|
8
|
+
* rewrite instructions just get queued up and executed upon toString().
|
|
9
|
+
*/
|
|
10
|
+
grammar Tweak;
|
|
11
|
+
options { language = Ruby; }
|
|
12
|
+
|
|
13
|
+
@init {
|
|
14
|
+
@input = ANTLR3::TokenRewriteStream.new( @input )
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
program
|
|
18
|
+
@init { start = @input.look }
|
|
19
|
+
: method+
|
|
20
|
+
{
|
|
21
|
+
@input.insert_before( start,"public class Wrapper {\n" )
|
|
22
|
+
# note the reference to the last token matched for method:
|
|
23
|
+
@input.insert_after( $method.stop, "\n}\n" )
|
|
24
|
+
}
|
|
25
|
+
;
|
|
26
|
+
|
|
27
|
+
method
|
|
28
|
+
: m='method' ID '(' ')' body
|
|
29
|
+
{ @input.replace( $m, "public void" ) }
|
|
30
|
+
;
|
|
31
|
+
|
|
32
|
+
body
|
|
33
|
+
// decls is on body's local variable stack but is visible to
|
|
34
|
+
// any rule that body calls such as stat. From other rules
|
|
35
|
+
// it is referenced as $body::decls
|
|
36
|
+
// From within rule body, you can use $decls shorthand
|
|
37
|
+
scope {
|
|
38
|
+
decls
|
|
39
|
+
}
|
|
40
|
+
@init {
|
|
41
|
+
$body::decls = Set.new
|
|
42
|
+
}
|
|
43
|
+
: lcurly='{' stat* '}'
|
|
44
|
+
{
|
|
45
|
+
# dump declarations for all identifiers seen in statement list
|
|
46
|
+
$body::decls.each { | i | @input.insert_after( $lcurly, "\nint #{ i };" ) }
|
|
47
|
+
}
|
|
48
|
+
;
|
|
49
|
+
|
|
50
|
+
stat: ID '=' expr ';' { $body::decls.add( $ID.text ) } // track left-hand-sides/
|
|
51
|
+
;
|
|
52
|
+
|
|
53
|
+
expr: mul ('+' mul)*
|
|
54
|
+
;
|
|
55
|
+
|
|
56
|
+
mul : atom ('*' atom)*
|
|
57
|
+
;
|
|
58
|
+
|
|
59
|
+
atom: ID
|
|
60
|
+
| INT
|
|
61
|
+
;
|
|
62
|
+
|
|
63
|
+
ID : ('a'..'z'|'A'..'Z')+ ;
|
|
64
|
+
|
|
65
|
+
INT : ('0'..'9')+ ;
|
|
66
|
+
|
|
67
|
+
WS : (' '|'\t'|'\n')+ {$channel=HIDDEN;}
|
|
68
|
+
;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
# encoding: utf-8
|
|
3
|
+
$:.unshift( File.dirname( __FILE__ ) )
|
|
4
|
+
require 'TweakLexer'
|
|
5
|
+
require 'TweakParser'
|
|
6
|
+
|
|
7
|
+
for file in ARGV
|
|
8
|
+
input = ANTLR3::FileStream.new( file )
|
|
9
|
+
lexer = Tweak::Lexer.new( input )
|
|
10
|
+
parser = Tweak::Parser.new( lexer )
|
|
11
|
+
parser.program
|
|
12
|
+
puts( parser.input.render )
|
|
13
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
The example input file has been slightly modified from the analogous
|
|
2
|
+
Java example: the non-ascii characters have been removed and replaced
|
|
3
|
+
by some ascii data.
|
|
4
|
+
|
|
5
|
+
The lexer would fail, if it tries to compare a byte >0x7f from the input
|
|
6
|
+
to a unicode constant.
|
|
7
|
+
It would work fine, if you feed unicode into the lexer, but then
|
|
8
|
+
sys.stdout.write() would probably fail, when it tries to print non-ascii
|
|
9
|
+
data.
|
|
10
|
+
|
|
11
|
+
It will also work with non-ascii input (the lexer operates on unicode()
|
|
12
|
+
strings), but then you'll have to do some more work (which I omitted for
|
|
13
|
+
simplicity):
|
|
14
|
+
- decode the input using the appropriate encoding into a unicode string.
|
|
15
|
+
- encode the data as you send it to the console or store it in a file.
|
|
16
|
+
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
lexer grammar XML;
|
|
2
|
+
options { language = Ruby; }
|
|
3
|
+
|
|
4
|
+
@members {
|
|
5
|
+
def quote(text)
|
|
6
|
+
text = text.gsub(/\"/, '\\"')
|
|
7
|
+
\%("#{ text }")
|
|
8
|
+
end
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
DOCUMENT
|
|
12
|
+
: XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
|
|
13
|
+
;
|
|
14
|
+
|
|
15
|
+
fragment DOCTYPE
|
|
16
|
+
:
|
|
17
|
+
'<!DOCTYPE' WS rootElementName=GENERIC_ID
|
|
18
|
+
{puts("ROOTELEMENT: " + $rootElementName.text)}
|
|
19
|
+
WS
|
|
20
|
+
(
|
|
21
|
+
( 'SYSTEM' WS sys1=VALUE
|
|
22
|
+
{puts("SYSTEM: " + $sys1.text)}
|
|
23
|
+
|
|
24
|
+
| 'PUBLIC' WS pub=VALUE WS sys2=VALUE
|
|
25
|
+
{puts("PUBLIC: " + $pub.text)}
|
|
26
|
+
{puts("SYSTEM: " + $sys2.text)}
|
|
27
|
+
)
|
|
28
|
+
( WS )?
|
|
29
|
+
)?
|
|
30
|
+
( dtd=INTERNAL_DTD
|
|
31
|
+
{puts("INTERNAL DTD: " + $dtd.text)}
|
|
32
|
+
)?
|
|
33
|
+
'>'
|
|
34
|
+
;
|
|
35
|
+
|
|
36
|
+
fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
|
|
37
|
+
|
|
38
|
+
fragment PI :
|
|
39
|
+
'<?' target=GENERIC_ID WS?
|
|
40
|
+
{puts("PI: " + $target.text)}
|
|
41
|
+
( ATTRIBUTE WS? )* '?>'
|
|
42
|
+
;
|
|
43
|
+
|
|
44
|
+
fragment XMLDECL :
|
|
45
|
+
'<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
|
|
46
|
+
{puts("XML declaration")}
|
|
47
|
+
( ATTRIBUTE WS? )* '?>'
|
|
48
|
+
;
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
fragment ELEMENT
|
|
52
|
+
: ( START_TAG
|
|
53
|
+
(ELEMENT
|
|
54
|
+
| t=PCDATA
|
|
55
|
+
{puts("PCDATA: " << quote($t.text))}
|
|
56
|
+
| t=CDATA
|
|
57
|
+
{puts("CDATA: " << quote($t.text))}
|
|
58
|
+
| t=COMMENT
|
|
59
|
+
{puts("Comment: " << quote($t.text))}
|
|
60
|
+
| pi=PI
|
|
61
|
+
)*
|
|
62
|
+
END_TAG
|
|
63
|
+
| EMPTY_ELEMENT
|
|
64
|
+
)
|
|
65
|
+
;
|
|
66
|
+
|
|
67
|
+
fragment START_TAG
|
|
68
|
+
: '<' WS? name=GENERIC_ID WS?
|
|
69
|
+
{puts("Start Tag: " + $name.text)}
|
|
70
|
+
( ATTRIBUTE WS? )* '>'
|
|
71
|
+
;
|
|
72
|
+
|
|
73
|
+
fragment EMPTY_ELEMENT
|
|
74
|
+
: '<' WS? name=GENERIC_ID WS?
|
|
75
|
+
{puts("Empty Element: " + $name.text)}
|
|
76
|
+
( ATTRIBUTE WS? )* '/>'
|
|
77
|
+
;
|
|
78
|
+
|
|
79
|
+
fragment ATTRIBUTE
|
|
80
|
+
: name=GENERIC_ID WS? '=' WS? value=VALUE
|
|
81
|
+
{puts("Attr: " + $name.text + " = "+ $value.text)}
|
|
82
|
+
;
|
|
83
|
+
|
|
84
|
+
fragment END_TAG
|
|
85
|
+
: '</' WS? name=GENERIC_ID WS? '>'
|
|
86
|
+
{puts("End Tag: " + $name.text)}
|
|
87
|
+
;
|
|
88
|
+
|
|
89
|
+
fragment COMMENT
|
|
90
|
+
: '<!--' (options {greedy=false;} : .)* '-->'
|
|
91
|
+
;
|
|
92
|
+
|
|
93
|
+
fragment CDATA
|
|
94
|
+
: '<![CDATA[' (options {greedy=false;} : .)* ']]>'
|
|
95
|
+
;
|
|
96
|
+
|
|
97
|
+
fragment PCDATA : (~'<')+ ;
|
|
98
|
+
|
|
99
|
+
fragment VALUE :
|
|
100
|
+
( '\"' (~'\"')* '\"'
|
|
101
|
+
| '\'' (~'\'')* '\''
|
|
102
|
+
)
|
|
103
|
+
;
|
|
104
|
+
|
|
105
|
+
fragment GENERIC_ID
|
|
106
|
+
: ( LETTER | '_' | ':')
|
|
107
|
+
( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
|
|
108
|
+
;
|
|
109
|
+
|
|
110
|
+
fragment LETTER
|
|
111
|
+
: 'a'..'z'
|
|
112
|
+
| 'A'..'Z'
|
|
113
|
+
;
|
|
114
|
+
|
|
115
|
+
fragment WS :
|
|
116
|
+
( ' '
|
|
117
|
+
| '\t'
|
|
118
|
+
| ( '\n'
|
|
119
|
+
| '\r\n'
|
|
120
|
+
| '\r'
|
|
121
|
+
)
|
|
122
|
+
)+
|
|
123
|
+
;
|