antlr3 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ANTLR-LICENSE.txt +26 -0
- data/History.txt +66 -0
- data/README.txt +139 -0
- data/bin/antlr4ruby +33 -0
- data/java/RubyTarget.java +524 -0
- data/java/antlr-full-3.2.1.jar +0 -0
- data/lib/antlr3.rb +176 -0
- data/lib/antlr3/constants.rb +88 -0
- data/lib/antlr3/debug.rb +701 -0
- data/lib/antlr3/debug/event-hub.rb +210 -0
- data/lib/antlr3/debug/record-event-listener.rb +25 -0
- data/lib/antlr3/debug/rule-tracer.rb +55 -0
- data/lib/antlr3/debug/socket.rb +360 -0
- data/lib/antlr3/debug/trace-event-listener.rb +92 -0
- data/lib/antlr3/dfa.rb +247 -0
- data/lib/antlr3/dot.rb +174 -0
- data/lib/antlr3/error.rb +657 -0
- data/lib/antlr3/main.rb +561 -0
- data/lib/antlr3/modes/ast-builder.rb +41 -0
- data/lib/antlr3/modes/filter.rb +56 -0
- data/lib/antlr3/profile.rb +322 -0
- data/lib/antlr3/recognizers.rb +1280 -0
- data/lib/antlr3/streams.rb +985 -0
- data/lib/antlr3/streams/interactive.rb +91 -0
- data/lib/antlr3/streams/rewrite.rb +412 -0
- data/lib/antlr3/test/call-stack.rb +57 -0
- data/lib/antlr3/test/config.rb +23 -0
- data/lib/antlr3/test/core-extensions.rb +269 -0
- data/lib/antlr3/test/diff.rb +165 -0
- data/lib/antlr3/test/functional.rb +207 -0
- data/lib/antlr3/test/grammar.rb +371 -0
- data/lib/antlr3/token.rb +592 -0
- data/lib/antlr3/tree.rb +1415 -0
- data/lib/antlr3/tree/debug.rb +163 -0
- data/lib/antlr3/tree/visitor.rb +84 -0
- data/lib/antlr3/tree/wizard.rb +481 -0
- data/lib/antlr3/util.rb +149 -0
- data/lib/antlr3/version.rb +27 -0
- data/samples/ANTLRv3Grammar.g +621 -0
- data/samples/Cpp.g +749 -0
- data/templates/AST.stg +335 -0
- data/templates/ASTDbg.stg +40 -0
- data/templates/ASTParser.stg +153 -0
- data/templates/ASTTreeParser.stg +272 -0
- data/templates/Dbg.stg +192 -0
- data/templates/Ruby.stg +1514 -0
- data/test/functional/ast-output/auto-ast.rb +797 -0
- data/test/functional/ast-output/construction.rb +555 -0
- data/test/functional/ast-output/hetero-nodes.rb +753 -0
- data/test/functional/ast-output/rewrites.rb +1327 -0
- data/test/functional/ast-output/tree-rewrite.rb +1662 -0
- data/test/functional/debugging/debug-mode.rb +689 -0
- data/test/functional/debugging/profile-mode.rb +165 -0
- data/test/functional/debugging/rule-tracing.rb +74 -0
- data/test/functional/delegation/import.rb +379 -0
- data/test/functional/lexer/basic.rb +559 -0
- data/test/functional/lexer/filter-mode.rb +245 -0
- data/test/functional/lexer/nuances.rb +47 -0
- data/test/functional/lexer/properties.rb +104 -0
- data/test/functional/lexer/syn-pred.rb +32 -0
- data/test/functional/lexer/xml.rb +206 -0
- data/test/functional/main/main-scripts.rb +245 -0
- data/test/functional/parser/actions.rb +224 -0
- data/test/functional/parser/backtracking.rb +244 -0
- data/test/functional/parser/basic.rb +282 -0
- data/test/functional/parser/calc.rb +98 -0
- data/test/functional/parser/ll-star.rb +143 -0
- data/test/functional/parser/nuances.rb +165 -0
- data/test/functional/parser/predicates.rb +103 -0
- data/test/functional/parser/properties.rb +242 -0
- data/test/functional/parser/rule-methods.rb +132 -0
- data/test/functional/parser/scopes.rb +274 -0
- data/test/functional/token-rewrite/basic.rb +318 -0
- data/test/functional/token-rewrite/via-parser.rb +100 -0
- data/test/functional/tree-parser/basic.rb +750 -0
- data/test/unit/sample-input/file-stream-1 +2 -0
- data/test/unit/sample-input/teststreams.input2 +2 -0
- data/test/unit/test-dfa.rb +52 -0
- data/test/unit/test-exceptions.rb +44 -0
- data/test/unit/test-recognizers.rb +55 -0
- data/test/unit/test-scheme.rb +62 -0
- data/test/unit/test-streams.rb +459 -0
- data/test/unit/test-tree-wizard.rb +535 -0
- data/test/unit/test-trees.rb +854 -0
- metadata +205 -0
@@ -0,0 +1,245 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'antlr3/test/functional'
|
5
|
+
|
6
|
+
class TestFilterMode < ANTLR3::Test::Functional
|
7
|
+
|
8
|
+
inline_grammar(<<-'END')
|
9
|
+
lexer grammar Filter;
|
10
|
+
options {
|
11
|
+
language = Ruby;
|
12
|
+
filter=true;
|
13
|
+
}
|
14
|
+
|
15
|
+
IMPORT
|
16
|
+
: 'import' WS QIDStar WS? ';'
|
17
|
+
;
|
18
|
+
|
19
|
+
RETURN
|
20
|
+
: 'return' .* ';'
|
21
|
+
;
|
22
|
+
|
23
|
+
CLASS
|
24
|
+
: 'class' WS ID WS? ('extends' WS QID WS?)?
|
25
|
+
('implements' WS QID WS? (',' WS? QID WS?)*)? '{'
|
26
|
+
;
|
27
|
+
|
28
|
+
COMMENT
|
29
|
+
: '/*' .* '*/'
|
30
|
+
;
|
31
|
+
|
32
|
+
STRING
|
33
|
+
: '"' (options {greedy=false;}: ESC | .)* '"'
|
34
|
+
;
|
35
|
+
|
36
|
+
CHAR
|
37
|
+
: '\'' (options {greedy=false;}: ESC | .)* '\''
|
38
|
+
;
|
39
|
+
|
40
|
+
WS : (' '|'\t'|'\n')+
|
41
|
+
;
|
42
|
+
|
43
|
+
fragment
|
44
|
+
QID : ID ('.' ID)*
|
45
|
+
;
|
46
|
+
|
47
|
+
/** QID cannot see beyond end of token so using QID '.*'? somewhere won't
|
48
|
+
* ever match since k=1 look in the QID loop of '.' will make it loop.
|
49
|
+
* I made this rule to compensate.
|
50
|
+
*/
|
51
|
+
fragment
|
52
|
+
QIDStar
|
53
|
+
: ID ('.' ID)* '.*'?
|
54
|
+
;
|
55
|
+
|
56
|
+
fragment
|
57
|
+
TYPE: QID '[]'?
|
58
|
+
;
|
59
|
+
|
60
|
+
fragment
|
61
|
+
ARG : TYPE WS ID
|
62
|
+
;
|
63
|
+
|
64
|
+
fragment
|
65
|
+
ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
|
66
|
+
;
|
67
|
+
|
68
|
+
fragment
|
69
|
+
ESC : '\\' ('"'|'\''|'\\')
|
70
|
+
;
|
71
|
+
END
|
72
|
+
|
73
|
+
example "skipping tokens that aren't important with filter mode" do
|
74
|
+
input = <<-END.fixed_indent(0)
|
75
|
+
import org.antlr.runtime.*;
|
76
|
+
|
77
|
+
public class Main {
|
78
|
+
public static void main(String[] args) throws Exception {
|
79
|
+
for (int i=0; i<args.length; i++) {
|
80
|
+
CharStream input = new ANTLRFileStream(args[i]);
|
81
|
+
FuzzyJava lex = new FuzzyJava(input);
|
82
|
+
TokenStream tokens = new CommonTokenStream(lex);
|
83
|
+
tokens.toString();
|
84
|
+
//System.out.println(tokens);
|
85
|
+
}
|
86
|
+
}
|
87
|
+
}
|
88
|
+
END
|
89
|
+
|
90
|
+
lexer = Filter::Lexer.new( input )
|
91
|
+
tokens = lexer.map { |tk| tk }
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
class TestFuzzy < ANTLR3::Test::Functional
|
99
|
+
|
100
|
+
inline_grammar(<<-'END')
|
101
|
+
lexer grammar Fuzzy;
|
102
|
+
options {
|
103
|
+
language = Ruby;
|
104
|
+
filter=true;
|
105
|
+
}
|
106
|
+
|
107
|
+
@members {
|
108
|
+
include ANTLR3::Test::CaptureOutput
|
109
|
+
}
|
110
|
+
|
111
|
+
IMPORT
|
112
|
+
: 'import' WS name=QIDStar WS? ';'
|
113
|
+
;
|
114
|
+
|
115
|
+
/** Avoids having "return foo;" match as a field */
|
116
|
+
RETURN
|
117
|
+
: 'return' (options {greedy=false;}:.)* ';'
|
118
|
+
;
|
119
|
+
|
120
|
+
CLASS
|
121
|
+
: 'class' WS name=ID WS? ('extends' WS QID WS?)?
|
122
|
+
('implements' WS QID WS? (',' WS? QID WS?)*)? '{'
|
123
|
+
{
|
124
|
+
say("found class " << $name.text)
|
125
|
+
}
|
126
|
+
;
|
127
|
+
|
128
|
+
METHOD
|
129
|
+
: TYPE WS name=ID WS? '(' ( ARG WS? (',' WS? ARG WS?)* )? ')' WS?
|
130
|
+
('throws' WS QID WS? (',' WS? QID WS?)*)? '{'
|
131
|
+
{
|
132
|
+
say("found method " << $name.text)
|
133
|
+
}
|
134
|
+
;
|
135
|
+
|
136
|
+
FIELD
|
137
|
+
: TYPE WS name=ID '[]'? WS? (';'|'=')
|
138
|
+
{
|
139
|
+
say("found var " << $name.text)
|
140
|
+
}
|
141
|
+
;
|
142
|
+
|
143
|
+
STAT: ('if'|'while'|'switch'|'for') WS? '(' ;
|
144
|
+
|
145
|
+
CALL
|
146
|
+
: name=QID WS? '('
|
147
|
+
{
|
148
|
+
say("found call " << $name.text)
|
149
|
+
}
|
150
|
+
;
|
151
|
+
|
152
|
+
COMMENT
|
153
|
+
: '/*' (options {greedy=false;} : . )* '*/'
|
154
|
+
{
|
155
|
+
say("found comment " << self.text)
|
156
|
+
}
|
157
|
+
;
|
158
|
+
|
159
|
+
SL_COMMENT
|
160
|
+
: '//' (options {greedy=false;} : . )* '\n'
|
161
|
+
{
|
162
|
+
say("found // comment " << self.text)
|
163
|
+
}
|
164
|
+
;
|
165
|
+
|
166
|
+
STRING
|
167
|
+
: '"' (options {greedy=false;}: ESC | .)* '"'
|
168
|
+
;
|
169
|
+
|
170
|
+
CHAR
|
171
|
+
: '\'' (options {greedy=false;}: ESC | .)* '\''
|
172
|
+
;
|
173
|
+
|
174
|
+
WS : (' '|'\t'|'\n')+
|
175
|
+
;
|
176
|
+
|
177
|
+
fragment
|
178
|
+
QID : ID ('.' ID)*
|
179
|
+
;
|
180
|
+
|
181
|
+
/** QID cannot see beyond end of token so using QID '.*'? somewhere won't
|
182
|
+
* ever match since k=1 look in the QID loop of '.' will make it loop.
|
183
|
+
* I made this rule to compensate.
|
184
|
+
*/
|
185
|
+
fragment
|
186
|
+
QIDStar
|
187
|
+
: ID ('.' ID)* '.*'?
|
188
|
+
;
|
189
|
+
|
190
|
+
fragment
|
191
|
+
TYPE: QID '[]'?
|
192
|
+
;
|
193
|
+
|
194
|
+
fragment
|
195
|
+
ARG : TYPE WS ID
|
196
|
+
;
|
197
|
+
|
198
|
+
fragment
|
199
|
+
ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
|
200
|
+
;
|
201
|
+
|
202
|
+
fragment
|
203
|
+
ESC : '\\' ('"'|'\''|'\\')
|
204
|
+
;
|
205
|
+
END
|
206
|
+
|
207
|
+
example "fuzzy lexing with the filter mode option" do
|
208
|
+
input = <<-END.fixed_indent(0)
|
209
|
+
import org.antlr.runtime.*;
|
210
|
+
|
211
|
+
public class Main {
|
212
|
+
public static void main(String[] args) throws Exception {
|
213
|
+
for (int i=0; i<args.length; i++) {
|
214
|
+
CharStream input = new ANTLRFileStream(args[i]);
|
215
|
+
FuzzyJava lex = new FuzzyJava(input);
|
216
|
+
TokenStream tokens = new CommonTokenStream(lex);
|
217
|
+
tokens.toString();
|
218
|
+
//System.out.println(tokens);
|
219
|
+
}
|
220
|
+
}
|
221
|
+
}
|
222
|
+
END
|
223
|
+
|
224
|
+
expected_output = <<-END.fixed_indent(0)
|
225
|
+
found class Main
|
226
|
+
found method main
|
227
|
+
found var i
|
228
|
+
found var input
|
229
|
+
found call ANTLRFileStream
|
230
|
+
found var lex
|
231
|
+
found call FuzzyJava
|
232
|
+
found var tokens
|
233
|
+
found call CommonTokenStream
|
234
|
+
found call tokens.toString
|
235
|
+
found // comment //System.out.println(tokens);
|
236
|
+
END
|
237
|
+
|
238
|
+
lexer = Fuzzy::Lexer.new(input)
|
239
|
+
lexer.each { |tk| tk }
|
240
|
+
lexer.output.should == expected_output
|
241
|
+
end
|
242
|
+
|
243
|
+
|
244
|
+
end
|
245
|
+
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'antlr3/test/functional'
|
5
|
+
|
6
|
+
class TestBug80 < ANTLR3::Test::Functional
|
7
|
+
inline_grammar(<<-'END')
|
8
|
+
lexer grammar Bug80;
|
9
|
+
options { language = Ruby; }
|
10
|
+
|
11
|
+
ID_LIKE
|
12
|
+
: 'defined'
|
13
|
+
| {false}? Identifier
|
14
|
+
| Identifier
|
15
|
+
;
|
16
|
+
|
17
|
+
fragment
|
18
|
+
// with just 'a', output compiles
|
19
|
+
Identifier: 'a'..'z'+ ;
|
20
|
+
END
|
21
|
+
|
22
|
+
example "um... something" do
|
23
|
+
lexer = Bug80::Lexer.new( 'defined' )
|
24
|
+
tokens = lexer.each { |tk| tk }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
class TestEOF < ANTLR3::Test::Functional
|
30
|
+
|
31
|
+
inline_grammar(<<-'END')
|
32
|
+
lexer grammar EndOfFile;
|
33
|
+
|
34
|
+
options {
|
35
|
+
language = Ruby;
|
36
|
+
}
|
37
|
+
|
38
|
+
KEND: EOF;
|
39
|
+
SPACE: ' ';
|
40
|
+
END
|
41
|
+
|
42
|
+
example 'referencing EOF in a rule' do
|
43
|
+
lexer = EndOfFile::Lexer.new( " " )
|
44
|
+
tks = lexer.map { |tk| tk }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
@@ -0,0 +1,104 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'antlr3/test/functional'
|
5
|
+
|
6
|
+
class TestLexerRuleReference < ANTLR3::Test::Functional
|
7
|
+
|
8
|
+
inline_grammar(<<-'END')
|
9
|
+
lexer grammar RuleProperty;
|
10
|
+
options {
|
11
|
+
language = Ruby;
|
12
|
+
}
|
13
|
+
|
14
|
+
@lexer::init {
|
15
|
+
@properties = []
|
16
|
+
}
|
17
|
+
@lexer::members {
|
18
|
+
attr_reader :properties
|
19
|
+
}
|
20
|
+
|
21
|
+
IDENTIFIER:
|
22
|
+
('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
|
23
|
+
{
|
24
|
+
@properties << [$text, $type, $line, $pos, $index, $channel, $start, $stop]
|
25
|
+
}
|
26
|
+
;
|
27
|
+
WS: (' ' | '\n')+;
|
28
|
+
END
|
29
|
+
|
30
|
+
example "referencing lexer rule properties" do
|
31
|
+
lexer = RuleProperty::Lexer.new( "foobar _ab98 \n A12sdf" )
|
32
|
+
tokens = lexer.map { |tk| tk }
|
33
|
+
|
34
|
+
lexer.properties.should have(3).things
|
35
|
+
text, type, line, pos, index, channel, start, stop = lexer.properties[0]
|
36
|
+
text.should == 'foobar'
|
37
|
+
type.should == RuleProperty::TokenData::IDENTIFIER
|
38
|
+
line.should == 1
|
39
|
+
pos.should == 0
|
40
|
+
index.should == -1
|
41
|
+
channel.should == ANTLR3::DEFAULT_CHANNEL
|
42
|
+
start.should == 0
|
43
|
+
stop.should == 5
|
44
|
+
|
45
|
+
text, type, line, pos, index, channel, start, stop = lexer.properties[1]
|
46
|
+
text.should == '_ab98'
|
47
|
+
type.should == RuleProperty::TokenData::IDENTIFIER
|
48
|
+
line.should == 1
|
49
|
+
pos.should == 7
|
50
|
+
index.should == -1
|
51
|
+
channel.should == ANTLR3::DEFAULT_CHANNEL
|
52
|
+
start.should == 7
|
53
|
+
stop.should == 11
|
54
|
+
|
55
|
+
lexer.properties.should have(3).things
|
56
|
+
text, type, line, pos, index, channel, start, stop = lexer.properties[2]
|
57
|
+
text.should == 'A12sdf'
|
58
|
+
type.should == RuleProperty::TokenData::IDENTIFIER
|
59
|
+
line.should == 2
|
60
|
+
pos.should == 1
|
61
|
+
index.should == -1
|
62
|
+
channel.should == ANTLR3::DEFAULT_CHANNEL
|
63
|
+
start.should == 15
|
64
|
+
stop.should == 20
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
class TestLexerRuleLabel < ANTLR3::Test::Functional
|
71
|
+
inline_grammar(<<-'END')
|
72
|
+
lexer grammar LexerRuleLabel;
|
73
|
+
options {
|
74
|
+
language = Ruby;
|
75
|
+
}
|
76
|
+
|
77
|
+
@members { attr_reader :token_text }
|
78
|
+
|
79
|
+
A: 'a'..'z' WS '0'..'9'
|
80
|
+
{
|
81
|
+
@token_text = $WS.text
|
82
|
+
}
|
83
|
+
;
|
84
|
+
|
85
|
+
fragment WS :
|
86
|
+
( ' '
|
87
|
+
| '\t'
|
88
|
+
| ( '\n'
|
89
|
+
| '\r\n'
|
90
|
+
| '\r'
|
91
|
+
)
|
92
|
+
)+
|
93
|
+
{ $channel = HIDDEN }
|
94
|
+
;
|
95
|
+
END
|
96
|
+
|
97
|
+
example "referencing other token rule values with labels" do
|
98
|
+
lexer = LexerRuleLabel::Lexer.new 'a 2'
|
99
|
+
lexer.next_token
|
100
|
+
lexer.token_text.should == ' '
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'antlr3/test/functional'
|
5
|
+
|
6
|
+
class TestSyntacticPredicate < ANTLR3::Test::Functional
|
7
|
+
inline_grammar(<<-'END')
|
8
|
+
lexer grammar SyntacticPredicateGate;
|
9
|
+
options {
|
10
|
+
language = Ruby;
|
11
|
+
}
|
12
|
+
|
13
|
+
FOO
|
14
|
+
: ('ab')=> A
|
15
|
+
| ('ac')=> B
|
16
|
+
;
|
17
|
+
|
18
|
+
fragment
|
19
|
+
A: 'a';
|
20
|
+
|
21
|
+
fragment
|
22
|
+
B: 'a';
|
23
|
+
END
|
24
|
+
|
25
|
+
example 'gating syntactic predicate rule' do
|
26
|
+
lexer = SyntacticPredicateGate::Lexer.new( 'ac' )
|
27
|
+
token = lexer.next_token
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,206 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'antlr3/test/functional'
|
5
|
+
|
6
|
+
class XMLLexerTest < ANTLR3::Test::Functional
|
7
|
+
inline_grammar(<<-'END')
|
8
|
+
lexer grammar XML;
|
9
|
+
options { language = Ruby; }
|
10
|
+
|
11
|
+
@members {
|
12
|
+
include ANTLR3::Test::CaptureOutput
|
13
|
+
include ANTLR3::Test::RaiseErrors
|
14
|
+
|
15
|
+
def quote(text)
|
16
|
+
text = text.gsub(/\"/, '\\"')
|
17
|
+
\%("#{ text }")
|
18
|
+
end
|
19
|
+
}
|
20
|
+
|
21
|
+
DOCUMENT
|
22
|
+
: XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
|
23
|
+
;
|
24
|
+
|
25
|
+
fragment DOCTYPE
|
26
|
+
:
|
27
|
+
'<!DOCTYPE' WS rootElementName=GENERIC_ID
|
28
|
+
{say("ROOTELEMENT: " + $rootElementName.text)}
|
29
|
+
WS
|
30
|
+
(
|
31
|
+
( 'SYSTEM' WS sys1=VALUE
|
32
|
+
{say("SYSTEM: " + $sys1.text)}
|
33
|
+
|
34
|
+
| 'PUBLIC' WS pub=VALUE WS sys2=VALUE
|
35
|
+
{say("PUBLIC: " + $pub.text)}
|
36
|
+
{say("SYSTEM: " + $sys2.text)}
|
37
|
+
)
|
38
|
+
( WS )?
|
39
|
+
)?
|
40
|
+
( dtd=INTERNAL_DTD
|
41
|
+
{say("INTERNAL DTD: " + $dtd.text)}
|
42
|
+
)?
|
43
|
+
'>'
|
44
|
+
;
|
45
|
+
|
46
|
+
fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
|
47
|
+
|
48
|
+
fragment PI :
|
49
|
+
'<?' target=GENERIC_ID WS?
|
50
|
+
{say("PI: " + $target.text)}
|
51
|
+
( ATTRIBUTE WS? )* '?>'
|
52
|
+
;
|
53
|
+
|
54
|
+
fragment XMLDECL :
|
55
|
+
'<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
|
56
|
+
{say("XML declaration")}
|
57
|
+
( ATTRIBUTE WS? )* '?>'
|
58
|
+
;
|
59
|
+
|
60
|
+
|
61
|
+
fragment ELEMENT
|
62
|
+
: ( START_TAG
|
63
|
+
(ELEMENT
|
64
|
+
| t=PCDATA
|
65
|
+
{say("PCDATA: " << quote($t.text))}
|
66
|
+
| t=CDATA
|
67
|
+
{say("CDATA: " << quote($t.text))}
|
68
|
+
| t=COMMENT
|
69
|
+
{say("Comment: " << quote($t.text))}
|
70
|
+
| pi=PI
|
71
|
+
)*
|
72
|
+
END_TAG
|
73
|
+
| EMPTY_ELEMENT
|
74
|
+
)
|
75
|
+
;
|
76
|
+
|
77
|
+
fragment START_TAG
|
78
|
+
: '<' WS? name=GENERIC_ID WS?
|
79
|
+
{say("Start Tag: " + $name.text)}
|
80
|
+
( ATTRIBUTE WS? )* '>'
|
81
|
+
;
|
82
|
+
|
83
|
+
fragment EMPTY_ELEMENT
|
84
|
+
: '<' WS? name=GENERIC_ID WS?
|
85
|
+
{say("Empty Element: " + $name.text)}
|
86
|
+
( ATTRIBUTE WS? )* '/>'
|
87
|
+
;
|
88
|
+
|
89
|
+
fragment ATTRIBUTE
|
90
|
+
: name=GENERIC_ID WS? '=' WS? value=VALUE
|
91
|
+
{say("Attr: " + $name.text + " = "+ $value.text)}
|
92
|
+
;
|
93
|
+
|
94
|
+
fragment END_TAG
|
95
|
+
: '</' WS? name=GENERIC_ID WS? '>'
|
96
|
+
{say("End Tag: " + $name.text)}
|
97
|
+
;
|
98
|
+
|
99
|
+
fragment COMMENT
|
100
|
+
: '<!--' (options {greedy=false;} : .)* '-->'
|
101
|
+
;
|
102
|
+
|
103
|
+
fragment CDATA
|
104
|
+
: '<![CDATA[' (options {greedy=false;} : .)* ']]>'
|
105
|
+
;
|
106
|
+
|
107
|
+
fragment PCDATA : (~'<')+ ;
|
108
|
+
|
109
|
+
fragment VALUE :
|
110
|
+
( '\"' (~'\"')* '\"'
|
111
|
+
| '\'' (~'\'')* '\''
|
112
|
+
)
|
113
|
+
;
|
114
|
+
|
115
|
+
fragment GENERIC_ID
|
116
|
+
: ( LETTER | '_' | ':')
|
117
|
+
( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
|
118
|
+
;
|
119
|
+
|
120
|
+
fragment LETTER
|
121
|
+
: 'a'..'z'
|
122
|
+
| 'A'..'Z'
|
123
|
+
;
|
124
|
+
|
125
|
+
fragment WS :
|
126
|
+
( ' '
|
127
|
+
| '\t'
|
128
|
+
| ( '\n'
|
129
|
+
| '\r\n'
|
130
|
+
| '\r'
|
131
|
+
)
|
132
|
+
)+
|
133
|
+
;
|
134
|
+
END
|
135
|
+
|
136
|
+
it "should be valid" do
|
137
|
+
lexer = XML::Lexer.new(<<-'END'.fixed_indent(0))
|
138
|
+
<?xml version='1.0'?>
|
139
|
+
<!DOCTYPE component [
|
140
|
+
<!ELEMENT component (PCDATA|sub)*>
|
141
|
+
<!ATTLIST component
|
142
|
+
attr CDATA #IMPLIED
|
143
|
+
attr2 CDATA #IMPLIED
|
144
|
+
>
|
145
|
+
<!ELMENT sub EMPTY>
|
146
|
+
|
147
|
+
]>
|
148
|
+
<component attr="val'ue" attr2='val"ue'>
|
149
|
+
<!-- This is a comment -->
|
150
|
+
Text
|
151
|
+
<![CDATA[huhu]]>
|
152
|
+
öäüß
|
153
|
+
&
|
154
|
+
<
|
155
|
+
<?xtal cursor='11'?>
|
156
|
+
<sub/>
|
157
|
+
<sub></sub>
|
158
|
+
</component>
|
159
|
+
END
|
160
|
+
|
161
|
+
lexer.map { |tk| tk }
|
162
|
+
|
163
|
+
lexer.output.should == <<-'END'.fixed_indent(0)
|
164
|
+
XML declaration
|
165
|
+
Attr: version = '1.0'
|
166
|
+
ROOTELEMENT: component
|
167
|
+
INTERNAL DTD: [
|
168
|
+
<!ELEMENT component (PCDATA|sub)*>
|
169
|
+
<!ATTLIST component
|
170
|
+
attr CDATA #IMPLIED
|
171
|
+
attr2 CDATA #IMPLIED
|
172
|
+
>
|
173
|
+
<!ELMENT sub EMPTY>
|
174
|
+
|
175
|
+
]
|
176
|
+
Start Tag: component
|
177
|
+
Attr: attr = "val'ue"
|
178
|
+
Attr: attr2 = 'val"ue'
|
179
|
+
PCDATA: "
|
180
|
+
"
|
181
|
+
Comment: "<!-- This is a comment -->"
|
182
|
+
PCDATA: "
|
183
|
+
Text
|
184
|
+
"
|
185
|
+
CDATA: "<![CDATA[huhu]]>"
|
186
|
+
PCDATA: "
|
187
|
+
öäüß
|
188
|
+
&
|
189
|
+
<
|
190
|
+
"
|
191
|
+
PI: xtal
|
192
|
+
Attr: cursor = '11'
|
193
|
+
PCDATA: "
|
194
|
+
"
|
195
|
+
Empty Element: sub
|
196
|
+
PCDATA: "
|
197
|
+
"
|
198
|
+
Start Tag: sub
|
199
|
+
End Tag: sub
|
200
|
+
PCDATA: "
|
201
|
+
"
|
202
|
+
End Tag: component
|
203
|
+
END
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|