antlr3 1.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/ANTLR-LICENSE.txt +26 -0
- data/History.txt +66 -0
- data/README.txt +139 -0
- data/bin/antlr4ruby +33 -0
- data/java/RubyTarget.java +524 -0
- data/java/antlr-full-3.2.1.jar +0 -0
- data/lib/antlr3.rb +176 -0
- data/lib/antlr3/constants.rb +88 -0
- data/lib/antlr3/debug.rb +701 -0
- data/lib/antlr3/debug/event-hub.rb +210 -0
- data/lib/antlr3/debug/record-event-listener.rb +25 -0
- data/lib/antlr3/debug/rule-tracer.rb +55 -0
- data/lib/antlr3/debug/socket.rb +360 -0
- data/lib/antlr3/debug/trace-event-listener.rb +92 -0
- data/lib/antlr3/dfa.rb +247 -0
- data/lib/antlr3/dot.rb +174 -0
- data/lib/antlr3/error.rb +657 -0
- data/lib/antlr3/main.rb +561 -0
- data/lib/antlr3/modes/ast-builder.rb +41 -0
- data/lib/antlr3/modes/filter.rb +56 -0
- data/lib/antlr3/profile.rb +322 -0
- data/lib/antlr3/recognizers.rb +1280 -0
- data/lib/antlr3/streams.rb +985 -0
- data/lib/antlr3/streams/interactive.rb +91 -0
- data/lib/antlr3/streams/rewrite.rb +412 -0
- data/lib/antlr3/test/call-stack.rb +57 -0
- data/lib/antlr3/test/config.rb +23 -0
- data/lib/antlr3/test/core-extensions.rb +269 -0
- data/lib/antlr3/test/diff.rb +165 -0
- data/lib/antlr3/test/functional.rb +207 -0
- data/lib/antlr3/test/grammar.rb +371 -0
- data/lib/antlr3/token.rb +592 -0
- data/lib/antlr3/tree.rb +1415 -0
- data/lib/antlr3/tree/debug.rb +163 -0
- data/lib/antlr3/tree/visitor.rb +84 -0
- data/lib/antlr3/tree/wizard.rb +481 -0
- data/lib/antlr3/util.rb +149 -0
- data/lib/antlr3/version.rb +27 -0
- data/samples/ANTLRv3Grammar.g +621 -0
- data/samples/Cpp.g +749 -0
- data/templates/AST.stg +335 -0
- data/templates/ASTDbg.stg +40 -0
- data/templates/ASTParser.stg +153 -0
- data/templates/ASTTreeParser.stg +272 -0
- data/templates/Dbg.stg +192 -0
- data/templates/Ruby.stg +1514 -0
- data/test/functional/ast-output/auto-ast.rb +797 -0
- data/test/functional/ast-output/construction.rb +555 -0
- data/test/functional/ast-output/hetero-nodes.rb +753 -0
- data/test/functional/ast-output/rewrites.rb +1327 -0
- data/test/functional/ast-output/tree-rewrite.rb +1662 -0
- data/test/functional/debugging/debug-mode.rb +689 -0
- data/test/functional/debugging/profile-mode.rb +165 -0
- data/test/functional/debugging/rule-tracing.rb +74 -0
- data/test/functional/delegation/import.rb +379 -0
- data/test/functional/lexer/basic.rb +559 -0
- data/test/functional/lexer/filter-mode.rb +245 -0
- data/test/functional/lexer/nuances.rb +47 -0
- data/test/functional/lexer/properties.rb +104 -0
- data/test/functional/lexer/syn-pred.rb +32 -0
- data/test/functional/lexer/xml.rb +206 -0
- data/test/functional/main/main-scripts.rb +245 -0
- data/test/functional/parser/actions.rb +224 -0
- data/test/functional/parser/backtracking.rb +244 -0
- data/test/functional/parser/basic.rb +282 -0
- data/test/functional/parser/calc.rb +98 -0
- data/test/functional/parser/ll-star.rb +143 -0
- data/test/functional/parser/nuances.rb +165 -0
- data/test/functional/parser/predicates.rb +103 -0
- data/test/functional/parser/properties.rb +242 -0
- data/test/functional/parser/rule-methods.rb +132 -0
- data/test/functional/parser/scopes.rb +274 -0
- data/test/functional/token-rewrite/basic.rb +318 -0
- data/test/functional/token-rewrite/via-parser.rb +100 -0
- data/test/functional/tree-parser/basic.rb +750 -0
- data/test/unit/sample-input/file-stream-1 +2 -0
- data/test/unit/sample-input/teststreams.input2 +2 -0
- data/test/unit/test-dfa.rb +52 -0
- data/test/unit/test-exceptions.rb +44 -0
- data/test/unit/test-recognizers.rb +55 -0
- data/test/unit/test-scheme.rb +62 -0
- data/test/unit/test-streams.rb +459 -0
- data/test/unit/test-tree-wizard.rb +535 -0
- data/test/unit/test-trees.rb +854 -0
- metadata +205 -0
@@ -0,0 +1,245 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'antlr3/test/functional'
|
5
|
+
|
6
|
+
class TestFilterMode < ANTLR3::Test::Functional
|
7
|
+
|
8
|
+
inline_grammar(<<-'END')
|
9
|
+
lexer grammar Filter;
|
10
|
+
options {
|
11
|
+
language = Ruby;
|
12
|
+
filter=true;
|
13
|
+
}
|
14
|
+
|
15
|
+
IMPORT
|
16
|
+
: 'import' WS QIDStar WS? ';'
|
17
|
+
;
|
18
|
+
|
19
|
+
RETURN
|
20
|
+
: 'return' .* ';'
|
21
|
+
;
|
22
|
+
|
23
|
+
CLASS
|
24
|
+
: 'class' WS ID WS? ('extends' WS QID WS?)?
|
25
|
+
('implements' WS QID WS? (',' WS? QID WS?)*)? '{'
|
26
|
+
;
|
27
|
+
|
28
|
+
COMMENT
|
29
|
+
: '/*' .* '*/'
|
30
|
+
;
|
31
|
+
|
32
|
+
STRING
|
33
|
+
: '"' (options {greedy=false;}: ESC | .)* '"'
|
34
|
+
;
|
35
|
+
|
36
|
+
CHAR
|
37
|
+
: '\'' (options {greedy=false;}: ESC | .)* '\''
|
38
|
+
;
|
39
|
+
|
40
|
+
WS : (' '|'\t'|'\n')+
|
41
|
+
;
|
42
|
+
|
43
|
+
fragment
|
44
|
+
QID : ID ('.' ID)*
|
45
|
+
;
|
46
|
+
|
47
|
+
/** QID cannot see beyond end of token so using QID '.*'? somewhere won't
|
48
|
+
* ever match since k=1 look in the QID loop of '.' will make it loop.
|
49
|
+
* I made this rule to compensate.
|
50
|
+
*/
|
51
|
+
fragment
|
52
|
+
QIDStar
|
53
|
+
: ID ('.' ID)* '.*'?
|
54
|
+
;
|
55
|
+
|
56
|
+
fragment
|
57
|
+
TYPE: QID '[]'?
|
58
|
+
;
|
59
|
+
|
60
|
+
fragment
|
61
|
+
ARG : TYPE WS ID
|
62
|
+
;
|
63
|
+
|
64
|
+
fragment
|
65
|
+
ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
|
66
|
+
;
|
67
|
+
|
68
|
+
fragment
|
69
|
+
ESC : '\\' ('"'|'\''|'\\')
|
70
|
+
;
|
71
|
+
END
|
72
|
+
|
73
|
+
example "skipping tokens that aren't important with filter mode" do
|
74
|
+
input = <<-END.fixed_indent(0)
|
75
|
+
import org.antlr.runtime.*;
|
76
|
+
|
77
|
+
public class Main {
|
78
|
+
public static void main(String[] args) throws Exception {
|
79
|
+
for (int i=0; i<args.length; i++) {
|
80
|
+
CharStream input = new ANTLRFileStream(args[i]);
|
81
|
+
FuzzyJava lex = new FuzzyJava(input);
|
82
|
+
TokenStream tokens = new CommonTokenStream(lex);
|
83
|
+
tokens.toString();
|
84
|
+
//System.out.println(tokens);
|
85
|
+
}
|
86
|
+
}
|
87
|
+
}
|
88
|
+
END
|
89
|
+
|
90
|
+
lexer = Filter::Lexer.new( input )
|
91
|
+
tokens = lexer.map { |tk| tk }
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
class TestFuzzy < ANTLR3::Test::Functional
|
99
|
+
|
100
|
+
inline_grammar(<<-'END')
|
101
|
+
lexer grammar Fuzzy;
|
102
|
+
options {
|
103
|
+
language = Ruby;
|
104
|
+
filter=true;
|
105
|
+
}
|
106
|
+
|
107
|
+
@members {
|
108
|
+
include ANTLR3::Test::CaptureOutput
|
109
|
+
}
|
110
|
+
|
111
|
+
IMPORT
|
112
|
+
: 'import' WS name=QIDStar WS? ';'
|
113
|
+
;
|
114
|
+
|
115
|
+
/** Avoids having "return foo;" match as a field */
|
116
|
+
RETURN
|
117
|
+
: 'return' (options {greedy=false;}:.)* ';'
|
118
|
+
;
|
119
|
+
|
120
|
+
CLASS
|
121
|
+
: 'class' WS name=ID WS? ('extends' WS QID WS?)?
|
122
|
+
('implements' WS QID WS? (',' WS? QID WS?)*)? '{'
|
123
|
+
{
|
124
|
+
say("found class " << $name.text)
|
125
|
+
}
|
126
|
+
;
|
127
|
+
|
128
|
+
METHOD
|
129
|
+
: TYPE WS name=ID WS? '(' ( ARG WS? (',' WS? ARG WS?)* )? ')' WS?
|
130
|
+
('throws' WS QID WS? (',' WS? QID WS?)*)? '{'
|
131
|
+
{
|
132
|
+
say("found method " << $name.text)
|
133
|
+
}
|
134
|
+
;
|
135
|
+
|
136
|
+
FIELD
|
137
|
+
: TYPE WS name=ID '[]'? WS? (';'|'=')
|
138
|
+
{
|
139
|
+
say("found var " << $name.text)
|
140
|
+
}
|
141
|
+
;
|
142
|
+
|
143
|
+
STAT: ('if'|'while'|'switch'|'for') WS? '(' ;
|
144
|
+
|
145
|
+
CALL
|
146
|
+
: name=QID WS? '('
|
147
|
+
{
|
148
|
+
say("found call " << $name.text)
|
149
|
+
}
|
150
|
+
;
|
151
|
+
|
152
|
+
COMMENT
|
153
|
+
: '/*' (options {greedy=false;} : . )* '*/'
|
154
|
+
{
|
155
|
+
say("found comment " << self.text)
|
156
|
+
}
|
157
|
+
;
|
158
|
+
|
159
|
+
SL_COMMENT
|
160
|
+
: '//' (options {greedy=false;} : . )* '\n'
|
161
|
+
{
|
162
|
+
say("found // comment " << self.text)
|
163
|
+
}
|
164
|
+
;
|
165
|
+
|
166
|
+
STRING
|
167
|
+
: '"' (options {greedy=false;}: ESC | .)* '"'
|
168
|
+
;
|
169
|
+
|
170
|
+
CHAR
|
171
|
+
: '\'' (options {greedy=false;}: ESC | .)* '\''
|
172
|
+
;
|
173
|
+
|
174
|
+
WS : (' '|'\t'|'\n')+
|
175
|
+
;
|
176
|
+
|
177
|
+
fragment
|
178
|
+
QID : ID ('.' ID)*
|
179
|
+
;
|
180
|
+
|
181
|
+
/** QID cannot see beyond end of token so using QID '.*'? somewhere won't
|
182
|
+
* ever match since k=1 look in the QID loop of '.' will make it loop.
|
183
|
+
* I made this rule to compensate.
|
184
|
+
*/
|
185
|
+
fragment
|
186
|
+
QIDStar
|
187
|
+
: ID ('.' ID)* '.*'?
|
188
|
+
;
|
189
|
+
|
190
|
+
fragment
|
191
|
+
TYPE: QID '[]'?
|
192
|
+
;
|
193
|
+
|
194
|
+
fragment
|
195
|
+
ARG : TYPE WS ID
|
196
|
+
;
|
197
|
+
|
198
|
+
fragment
|
199
|
+
ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
|
200
|
+
;
|
201
|
+
|
202
|
+
fragment
|
203
|
+
ESC : '\\' ('"'|'\''|'\\')
|
204
|
+
;
|
205
|
+
END
|
206
|
+
|
207
|
+
example "fuzzy lexing with the filter mode option" do
|
208
|
+
input = <<-END.fixed_indent(0)
|
209
|
+
import org.antlr.runtime.*;
|
210
|
+
|
211
|
+
public class Main {
|
212
|
+
public static void main(String[] args) throws Exception {
|
213
|
+
for (int i=0; i<args.length; i++) {
|
214
|
+
CharStream input = new ANTLRFileStream(args[i]);
|
215
|
+
FuzzyJava lex = new FuzzyJava(input);
|
216
|
+
TokenStream tokens = new CommonTokenStream(lex);
|
217
|
+
tokens.toString();
|
218
|
+
//System.out.println(tokens);
|
219
|
+
}
|
220
|
+
}
|
221
|
+
}
|
222
|
+
END
|
223
|
+
|
224
|
+
expected_output = <<-END.fixed_indent(0)
|
225
|
+
found class Main
|
226
|
+
found method main
|
227
|
+
found var i
|
228
|
+
found var input
|
229
|
+
found call ANTLRFileStream
|
230
|
+
found var lex
|
231
|
+
found call FuzzyJava
|
232
|
+
found var tokens
|
233
|
+
found call CommonTokenStream
|
234
|
+
found call tokens.toString
|
235
|
+
found // comment //System.out.println(tokens);
|
236
|
+
END
|
237
|
+
|
238
|
+
lexer = Fuzzy::Lexer.new(input)
|
239
|
+
lexer.each { |tk| tk }
|
240
|
+
lexer.output.should == expected_output
|
241
|
+
end
|
242
|
+
|
243
|
+
|
244
|
+
end
|
245
|
+
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'antlr3/test/functional'
|
5
|
+
|
6
|
+
class TestBug80 < ANTLR3::Test::Functional
|
7
|
+
inline_grammar(<<-'END')
|
8
|
+
lexer grammar Bug80;
|
9
|
+
options { language = Ruby; }
|
10
|
+
|
11
|
+
ID_LIKE
|
12
|
+
: 'defined'
|
13
|
+
| {false}? Identifier
|
14
|
+
| Identifier
|
15
|
+
;
|
16
|
+
|
17
|
+
fragment
|
18
|
+
// with just 'a', output compiles
|
19
|
+
Identifier: 'a'..'z'+ ;
|
20
|
+
END
|
21
|
+
|
22
|
+
example "um... something" do
|
23
|
+
lexer = Bug80::Lexer.new( 'defined' )
|
24
|
+
tokens = lexer.each { |tk| tk }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
class TestEOF < ANTLR3::Test::Functional
|
30
|
+
|
31
|
+
inline_grammar(<<-'END')
|
32
|
+
lexer grammar EndOfFile;
|
33
|
+
|
34
|
+
options {
|
35
|
+
language = Ruby;
|
36
|
+
}
|
37
|
+
|
38
|
+
KEND: EOF;
|
39
|
+
SPACE: ' ';
|
40
|
+
END
|
41
|
+
|
42
|
+
example 'referencing EOF in a rule' do
|
43
|
+
lexer = EndOfFile::Lexer.new( " " )
|
44
|
+
tks = lexer.map { |tk| tk }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
@@ -0,0 +1,104 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'antlr3/test/functional'
|
5
|
+
|
6
|
+
class TestLexerRuleReference < ANTLR3::Test::Functional
|
7
|
+
|
8
|
+
inline_grammar(<<-'END')
|
9
|
+
lexer grammar RuleProperty;
|
10
|
+
options {
|
11
|
+
language = Ruby;
|
12
|
+
}
|
13
|
+
|
14
|
+
@lexer::init {
|
15
|
+
@properties = []
|
16
|
+
}
|
17
|
+
@lexer::members {
|
18
|
+
attr_reader :properties
|
19
|
+
}
|
20
|
+
|
21
|
+
IDENTIFIER:
|
22
|
+
('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
|
23
|
+
{
|
24
|
+
@properties << [$text, $type, $line, $pos, $index, $channel, $start, $stop]
|
25
|
+
}
|
26
|
+
;
|
27
|
+
WS: (' ' | '\n')+;
|
28
|
+
END
|
29
|
+
|
30
|
+
example "referencing lexer rule properties" do
|
31
|
+
lexer = RuleProperty::Lexer.new( "foobar _ab98 \n A12sdf" )
|
32
|
+
tokens = lexer.map { |tk| tk }
|
33
|
+
|
34
|
+
lexer.properties.should have(3).things
|
35
|
+
text, type, line, pos, index, channel, start, stop = lexer.properties[0]
|
36
|
+
text.should == 'foobar'
|
37
|
+
type.should == RuleProperty::TokenData::IDENTIFIER
|
38
|
+
line.should == 1
|
39
|
+
pos.should == 0
|
40
|
+
index.should == -1
|
41
|
+
channel.should == ANTLR3::DEFAULT_CHANNEL
|
42
|
+
start.should == 0
|
43
|
+
stop.should == 5
|
44
|
+
|
45
|
+
text, type, line, pos, index, channel, start, stop = lexer.properties[1]
|
46
|
+
text.should == '_ab98'
|
47
|
+
type.should == RuleProperty::TokenData::IDENTIFIER
|
48
|
+
line.should == 1
|
49
|
+
pos.should == 7
|
50
|
+
index.should == -1
|
51
|
+
channel.should == ANTLR3::DEFAULT_CHANNEL
|
52
|
+
start.should == 7
|
53
|
+
stop.should == 11
|
54
|
+
|
55
|
+
lexer.properties.should have(3).things
|
56
|
+
text, type, line, pos, index, channel, start, stop = lexer.properties[2]
|
57
|
+
text.should == 'A12sdf'
|
58
|
+
type.should == RuleProperty::TokenData::IDENTIFIER
|
59
|
+
line.should == 2
|
60
|
+
pos.should == 1
|
61
|
+
index.should == -1
|
62
|
+
channel.should == ANTLR3::DEFAULT_CHANNEL
|
63
|
+
start.should == 15
|
64
|
+
stop.should == 20
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
class TestLexerRuleLabel < ANTLR3::Test::Functional
|
71
|
+
inline_grammar(<<-'END')
|
72
|
+
lexer grammar LexerRuleLabel;
|
73
|
+
options {
|
74
|
+
language = Ruby;
|
75
|
+
}
|
76
|
+
|
77
|
+
@members { attr_reader :token_text }
|
78
|
+
|
79
|
+
A: 'a'..'z' WS '0'..'9'
|
80
|
+
{
|
81
|
+
@token_text = $WS.text
|
82
|
+
}
|
83
|
+
;
|
84
|
+
|
85
|
+
fragment WS :
|
86
|
+
( ' '
|
87
|
+
| '\t'
|
88
|
+
| ( '\n'
|
89
|
+
| '\r\n'
|
90
|
+
| '\r'
|
91
|
+
)
|
92
|
+
)+
|
93
|
+
{ $channel = HIDDEN }
|
94
|
+
;
|
95
|
+
END
|
96
|
+
|
97
|
+
example "referencing other token rule values with labels" do
|
98
|
+
lexer = LexerRuleLabel::Lexer.new 'a 2'
|
99
|
+
lexer.next_token
|
100
|
+
lexer.token_text.should == ' '
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'antlr3/test/functional'
|
5
|
+
|
6
|
+
class TestSyntacticPredicate < ANTLR3::Test::Functional
|
7
|
+
inline_grammar(<<-'END')
|
8
|
+
lexer grammar SyntacticPredicateGate;
|
9
|
+
options {
|
10
|
+
language = Ruby;
|
11
|
+
}
|
12
|
+
|
13
|
+
FOO
|
14
|
+
: ('ab')=> A
|
15
|
+
| ('ac')=> B
|
16
|
+
;
|
17
|
+
|
18
|
+
fragment
|
19
|
+
A: 'a';
|
20
|
+
|
21
|
+
fragment
|
22
|
+
B: 'a';
|
23
|
+
END
|
24
|
+
|
25
|
+
example 'gating syntactic predicate rule' do
|
26
|
+
lexer = SyntacticPredicateGate::Lexer.new( 'ac' )
|
27
|
+
token = lexer.next_token
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,206 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'antlr3/test/functional'
|
5
|
+
|
6
|
+
class XMLLexerTest < ANTLR3::Test::Functional
|
7
|
+
inline_grammar(<<-'END')
|
8
|
+
lexer grammar XML;
|
9
|
+
options { language = Ruby; }
|
10
|
+
|
11
|
+
@members {
|
12
|
+
include ANTLR3::Test::CaptureOutput
|
13
|
+
include ANTLR3::Test::RaiseErrors
|
14
|
+
|
15
|
+
def quote(text)
|
16
|
+
text = text.gsub(/\"/, '\\"')
|
17
|
+
\%("#{ text }")
|
18
|
+
end
|
19
|
+
}
|
20
|
+
|
21
|
+
DOCUMENT
|
22
|
+
: XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
|
23
|
+
;
|
24
|
+
|
25
|
+
fragment DOCTYPE
|
26
|
+
:
|
27
|
+
'<!DOCTYPE' WS rootElementName=GENERIC_ID
|
28
|
+
{say("ROOTELEMENT: " + $rootElementName.text)}
|
29
|
+
WS
|
30
|
+
(
|
31
|
+
( 'SYSTEM' WS sys1=VALUE
|
32
|
+
{say("SYSTEM: " + $sys1.text)}
|
33
|
+
|
34
|
+
| 'PUBLIC' WS pub=VALUE WS sys2=VALUE
|
35
|
+
{say("PUBLIC: " + $pub.text)}
|
36
|
+
{say("SYSTEM: " + $sys2.text)}
|
37
|
+
)
|
38
|
+
( WS )?
|
39
|
+
)?
|
40
|
+
( dtd=INTERNAL_DTD
|
41
|
+
{say("INTERNAL DTD: " + $dtd.text)}
|
42
|
+
)?
|
43
|
+
'>'
|
44
|
+
;
|
45
|
+
|
46
|
+
fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
|
47
|
+
|
48
|
+
fragment PI :
|
49
|
+
'<?' target=GENERIC_ID WS?
|
50
|
+
{say("PI: " + $target.text)}
|
51
|
+
( ATTRIBUTE WS? )* '?>'
|
52
|
+
;
|
53
|
+
|
54
|
+
fragment XMLDECL :
|
55
|
+
'<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
|
56
|
+
{say("XML declaration")}
|
57
|
+
( ATTRIBUTE WS? )* '?>'
|
58
|
+
;
|
59
|
+
|
60
|
+
|
61
|
+
fragment ELEMENT
|
62
|
+
: ( START_TAG
|
63
|
+
(ELEMENT
|
64
|
+
| t=PCDATA
|
65
|
+
{say("PCDATA: " << quote($t.text))}
|
66
|
+
| t=CDATA
|
67
|
+
{say("CDATA: " << quote($t.text))}
|
68
|
+
| t=COMMENT
|
69
|
+
{say("Comment: " << quote($t.text))}
|
70
|
+
| pi=PI
|
71
|
+
)*
|
72
|
+
END_TAG
|
73
|
+
| EMPTY_ELEMENT
|
74
|
+
)
|
75
|
+
;
|
76
|
+
|
77
|
+
fragment START_TAG
|
78
|
+
: '<' WS? name=GENERIC_ID WS?
|
79
|
+
{say("Start Tag: " + $name.text)}
|
80
|
+
( ATTRIBUTE WS? )* '>'
|
81
|
+
;
|
82
|
+
|
83
|
+
fragment EMPTY_ELEMENT
|
84
|
+
: '<' WS? name=GENERIC_ID WS?
|
85
|
+
{say("Empty Element: " + $name.text)}
|
86
|
+
( ATTRIBUTE WS? )* '/>'
|
87
|
+
;
|
88
|
+
|
89
|
+
fragment ATTRIBUTE
|
90
|
+
: name=GENERIC_ID WS? '=' WS? value=VALUE
|
91
|
+
{say("Attr: " + $name.text + " = "+ $value.text)}
|
92
|
+
;
|
93
|
+
|
94
|
+
fragment END_TAG
|
95
|
+
: '</' WS? name=GENERIC_ID WS? '>'
|
96
|
+
{say("End Tag: " + $name.text)}
|
97
|
+
;
|
98
|
+
|
99
|
+
fragment COMMENT
|
100
|
+
: '<!--' (options {greedy=false;} : .)* '-->'
|
101
|
+
;
|
102
|
+
|
103
|
+
fragment CDATA
|
104
|
+
: '<![CDATA[' (options {greedy=false;} : .)* ']]>'
|
105
|
+
;
|
106
|
+
|
107
|
+
fragment PCDATA : (~'<')+ ;
|
108
|
+
|
109
|
+
fragment VALUE :
|
110
|
+
( '\"' (~'\"')* '\"'
|
111
|
+
| '\'' (~'\'')* '\''
|
112
|
+
)
|
113
|
+
;
|
114
|
+
|
115
|
+
fragment GENERIC_ID
|
116
|
+
: ( LETTER | '_' | ':')
|
117
|
+
( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
|
118
|
+
;
|
119
|
+
|
120
|
+
fragment LETTER
|
121
|
+
: 'a'..'z'
|
122
|
+
| 'A'..'Z'
|
123
|
+
;
|
124
|
+
|
125
|
+
fragment WS :
|
126
|
+
( ' '
|
127
|
+
| '\t'
|
128
|
+
| ( '\n'
|
129
|
+
| '\r\n'
|
130
|
+
| '\r'
|
131
|
+
)
|
132
|
+
)+
|
133
|
+
;
|
134
|
+
END
|
135
|
+
|
136
|
+
it "should be valid" do
|
137
|
+
lexer = XML::Lexer.new(<<-'END'.fixed_indent(0))
|
138
|
+
<?xml version='1.0'?>
|
139
|
+
<!DOCTYPE component [
|
140
|
+
<!ELEMENT component (PCDATA|sub)*>
|
141
|
+
<!ATTLIST component
|
142
|
+
attr CDATA #IMPLIED
|
143
|
+
attr2 CDATA #IMPLIED
|
144
|
+
>
|
145
|
+
<!ELMENT sub EMPTY>
|
146
|
+
|
147
|
+
]>
|
148
|
+
<component attr="val'ue" attr2='val"ue'>
|
149
|
+
<!-- This is a comment -->
|
150
|
+
Text
|
151
|
+
<![CDATA[huhu]]>
|
152
|
+
öäüß
|
153
|
+
&
|
154
|
+
<
|
155
|
+
<?xtal cursor='11'?>
|
156
|
+
<sub/>
|
157
|
+
<sub></sub>
|
158
|
+
</component>
|
159
|
+
END
|
160
|
+
|
161
|
+
lexer.map { |tk| tk }
|
162
|
+
|
163
|
+
lexer.output.should == <<-'END'.fixed_indent(0)
|
164
|
+
XML declaration
|
165
|
+
Attr: version = '1.0'
|
166
|
+
ROOTELEMENT: component
|
167
|
+
INTERNAL DTD: [
|
168
|
+
<!ELEMENT component (PCDATA|sub)*>
|
169
|
+
<!ATTLIST component
|
170
|
+
attr CDATA #IMPLIED
|
171
|
+
attr2 CDATA #IMPLIED
|
172
|
+
>
|
173
|
+
<!ELMENT sub EMPTY>
|
174
|
+
|
175
|
+
]
|
176
|
+
Start Tag: component
|
177
|
+
Attr: attr = "val'ue"
|
178
|
+
Attr: attr2 = 'val"ue'
|
179
|
+
PCDATA: "
|
180
|
+
"
|
181
|
+
Comment: "<!-- This is a comment -->"
|
182
|
+
PCDATA: "
|
183
|
+
Text
|
184
|
+
"
|
185
|
+
CDATA: "<![CDATA[huhu]]>"
|
186
|
+
PCDATA: "
|
187
|
+
öäüß
|
188
|
+
&
|
189
|
+
<
|
190
|
+
"
|
191
|
+
PI: xtal
|
192
|
+
Attr: cursor = '11'
|
193
|
+
PCDATA: "
|
194
|
+
"
|
195
|
+
Empty Element: sub
|
196
|
+
PCDATA: "
|
197
|
+
"
|
198
|
+
Start Tag: sub
|
199
|
+
End Tag: sub
|
200
|
+
PCDATA: "
|
201
|
+
"
|
202
|
+
End Tag: component
|
203
|
+
END
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|