antlr3 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ANTLR-LICENSE.txt +26 -0
- data/History.txt +66 -0
- data/README.txt +139 -0
- data/bin/antlr4ruby +33 -0
- data/java/RubyTarget.java +524 -0
- data/java/antlr-full-3.2.1.jar +0 -0
- data/lib/antlr3.rb +176 -0
- data/lib/antlr3/constants.rb +88 -0
- data/lib/antlr3/debug.rb +701 -0
- data/lib/antlr3/debug/event-hub.rb +210 -0
- data/lib/antlr3/debug/record-event-listener.rb +25 -0
- data/lib/antlr3/debug/rule-tracer.rb +55 -0
- data/lib/antlr3/debug/socket.rb +360 -0
- data/lib/antlr3/debug/trace-event-listener.rb +92 -0
- data/lib/antlr3/dfa.rb +247 -0
- data/lib/antlr3/dot.rb +174 -0
- data/lib/antlr3/error.rb +657 -0
- data/lib/antlr3/main.rb +561 -0
- data/lib/antlr3/modes/ast-builder.rb +41 -0
- data/lib/antlr3/modes/filter.rb +56 -0
- data/lib/antlr3/profile.rb +322 -0
- data/lib/antlr3/recognizers.rb +1280 -0
- data/lib/antlr3/streams.rb +985 -0
- data/lib/antlr3/streams/interactive.rb +91 -0
- data/lib/antlr3/streams/rewrite.rb +412 -0
- data/lib/antlr3/test/call-stack.rb +57 -0
- data/lib/antlr3/test/config.rb +23 -0
- data/lib/antlr3/test/core-extensions.rb +269 -0
- data/lib/antlr3/test/diff.rb +165 -0
- data/lib/antlr3/test/functional.rb +207 -0
- data/lib/antlr3/test/grammar.rb +371 -0
- data/lib/antlr3/token.rb +592 -0
- data/lib/antlr3/tree.rb +1415 -0
- data/lib/antlr3/tree/debug.rb +163 -0
- data/lib/antlr3/tree/visitor.rb +84 -0
- data/lib/antlr3/tree/wizard.rb +481 -0
- data/lib/antlr3/util.rb +149 -0
- data/lib/antlr3/version.rb +27 -0
- data/samples/ANTLRv3Grammar.g +621 -0
- data/samples/Cpp.g +749 -0
- data/templates/AST.stg +335 -0
- data/templates/ASTDbg.stg +40 -0
- data/templates/ASTParser.stg +153 -0
- data/templates/ASTTreeParser.stg +272 -0
- data/templates/Dbg.stg +192 -0
- data/templates/Ruby.stg +1514 -0
- data/test/functional/ast-output/auto-ast.rb +797 -0
- data/test/functional/ast-output/construction.rb +555 -0
- data/test/functional/ast-output/hetero-nodes.rb +753 -0
- data/test/functional/ast-output/rewrites.rb +1327 -0
- data/test/functional/ast-output/tree-rewrite.rb +1662 -0
- data/test/functional/debugging/debug-mode.rb +689 -0
- data/test/functional/debugging/profile-mode.rb +165 -0
- data/test/functional/debugging/rule-tracing.rb +74 -0
- data/test/functional/delegation/import.rb +379 -0
- data/test/functional/lexer/basic.rb +559 -0
- data/test/functional/lexer/filter-mode.rb +245 -0
- data/test/functional/lexer/nuances.rb +47 -0
- data/test/functional/lexer/properties.rb +104 -0
- data/test/functional/lexer/syn-pred.rb +32 -0
- data/test/functional/lexer/xml.rb +206 -0
- data/test/functional/main/main-scripts.rb +245 -0
- data/test/functional/parser/actions.rb +224 -0
- data/test/functional/parser/backtracking.rb +244 -0
- data/test/functional/parser/basic.rb +282 -0
- data/test/functional/parser/calc.rb +98 -0
- data/test/functional/parser/ll-star.rb +143 -0
- data/test/functional/parser/nuances.rb +165 -0
- data/test/functional/parser/predicates.rb +103 -0
- data/test/functional/parser/properties.rb +242 -0
- data/test/functional/parser/rule-methods.rb +132 -0
- data/test/functional/parser/scopes.rb +274 -0
- data/test/functional/token-rewrite/basic.rb +318 -0
- data/test/functional/token-rewrite/via-parser.rb +100 -0
- data/test/functional/tree-parser/basic.rb +750 -0
- data/test/unit/sample-input/file-stream-1 +2 -0
- data/test/unit/sample-input/teststreams.input2 +2 -0
- data/test/unit/test-dfa.rb +52 -0
- data/test/unit/test-exceptions.rb +44 -0
- data/test/unit/test-recognizers.rb +55 -0
- data/test/unit/test-scheme.rb +62 -0
- data/test/unit/test-streams.rb +459 -0
- data/test/unit/test-tree-wizard.rb +535 -0
- data/test/unit/test-trees.rb +854 -0
- metadata +205 -0
| @@ -0,0 +1,245 @@ | |
| 1 | 
            +
            #!/usr/bin/ruby
         | 
| 2 | 
            +
            # encoding: utf-8
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            require 'antlr3/test/functional'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            class TestFilterMode < ANTLR3::Test::Functional
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              inline_grammar(<<-'END')
         | 
| 9 | 
            +
                lexer grammar Filter;
         | 
| 10 | 
            +
                options {
         | 
| 11 | 
            +
                    language = Ruby;
         | 
| 12 | 
            +
                    filter=true;
         | 
| 13 | 
            +
                }
         | 
| 14 | 
            +
                
         | 
| 15 | 
            +
                IMPORT
         | 
| 16 | 
            +
                  :  'import' WS QIDStar WS? ';'
         | 
| 17 | 
            +
                  ;
         | 
| 18 | 
            +
                  
         | 
| 19 | 
            +
                RETURN
         | 
| 20 | 
            +
                  :  'return' .* ';'
         | 
| 21 | 
            +
                  ;
         | 
| 22 | 
            +
                
         | 
| 23 | 
            +
                CLASS
         | 
| 24 | 
            +
                  :  'class' WS ID WS? ('extends' WS QID WS?)?
         | 
| 25 | 
            +
                    ('implements' WS QID WS? (',' WS? QID WS?)*)? '{'
         | 
| 26 | 
            +
                  ;
         | 
| 27 | 
            +
                  
         | 
| 28 | 
            +
                COMMENT
         | 
| 29 | 
            +
                    :   '/*' .* '*/'
         | 
| 30 | 
            +
                    ;
         | 
| 31 | 
            +
                
         | 
| 32 | 
            +
                STRING
         | 
| 33 | 
            +
                    :  '"' (options {greedy=false;}: ESC | .)* '"'
         | 
| 34 | 
            +
                  ;
         | 
| 35 | 
            +
                
         | 
| 36 | 
            +
                CHAR
         | 
| 37 | 
            +
                  :  '\'' (options {greedy=false;}: ESC | .)* '\''
         | 
| 38 | 
            +
                  ;
         | 
| 39 | 
            +
                
         | 
| 40 | 
            +
                WS  :   (' '|'\t'|'\n')+
         | 
| 41 | 
            +
                    ;
         | 
| 42 | 
            +
                
         | 
| 43 | 
            +
                fragment
         | 
| 44 | 
            +
                QID :  ID ('.' ID)*
         | 
| 45 | 
            +
                  ;
         | 
| 46 | 
            +
                  
         | 
| 47 | 
            +
                /** QID cannot see beyond end of token so using QID '.*'? somewhere won't
         | 
| 48 | 
            +
                 *  ever match since k=1 look in the QID loop of '.' will make it loop.
         | 
| 49 | 
            +
                 *  I made this rule to compensate.
         | 
| 50 | 
            +
                 */
         | 
| 51 | 
            +
                fragment
         | 
| 52 | 
            +
                QIDStar
         | 
| 53 | 
            +
                  :  ID ('.' ID)* '.*'?
         | 
| 54 | 
            +
                  ;
         | 
| 55 | 
            +
                
         | 
| 56 | 
            +
                fragment
         | 
| 57 | 
            +
                TYPE:   QID '[]'?
         | 
| 58 | 
            +
                    ;
         | 
| 59 | 
            +
                    
         | 
| 60 | 
            +
                fragment
         | 
| 61 | 
            +
                ARG :   TYPE WS ID
         | 
| 62 | 
            +
                    ;
         | 
| 63 | 
            +
                
         | 
| 64 | 
            +
                fragment
         | 
| 65 | 
            +
                ID  :   ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
         | 
| 66 | 
            +
                    ;
         | 
| 67 | 
            +
                
         | 
| 68 | 
            +
                fragment
         | 
| 69 | 
            +
                ESC  :  '\\' ('"'|'\''|'\\')
         | 
| 70 | 
            +
                  ;
         | 
| 71 | 
            +
              END
         | 
| 72 | 
            +
             | 
| 73 | 
            +
              example "skipping tokens that aren't important with filter mode" do
         | 
| 74 | 
            +
                input = <<-END.fixed_indent(0)
         | 
| 75 | 
            +
                  import org.antlr.runtime.*;
         | 
| 76 | 
            +
                  
         | 
| 77 | 
            +
                  public class Main {
         | 
| 78 | 
            +
                    public static void main(String[] args) throws Exception {
         | 
| 79 | 
            +
                        for (int i=0; i<args.length; i++) {
         | 
| 80 | 
            +
                      CharStream input = new ANTLRFileStream(args[i]);
         | 
| 81 | 
            +
                      FuzzyJava lex = new FuzzyJava(input);
         | 
| 82 | 
            +
                      TokenStream tokens = new CommonTokenStream(lex);
         | 
| 83 | 
            +
                      tokens.toString();
         | 
| 84 | 
            +
                      //System.out.println(tokens);
         | 
| 85 | 
            +
                        }
         | 
| 86 | 
            +
                    }
         | 
| 87 | 
            +
                  }
         | 
| 88 | 
            +
                END
         | 
| 89 | 
            +
                
         | 
| 90 | 
            +
                lexer = Filter::Lexer.new( input )
         | 
| 91 | 
            +
                tokens = lexer.map { |tk| tk }
         | 
| 92 | 
            +
              end
         | 
| 93 | 
            +
              
         | 
| 94 | 
            +
             | 
| 95 | 
            +
            end
         | 
| 96 | 
            +
             | 
| 97 | 
            +
             | 
| 98 | 
            +
            class TestFuzzy < ANTLR3::Test::Functional
         | 
| 99 | 
            +
             | 
| 100 | 
            +
              inline_grammar(<<-'END')
         | 
| 101 | 
            +
                lexer grammar Fuzzy;
         | 
| 102 | 
            +
                options {
         | 
| 103 | 
            +
                    language = Ruby;
         | 
| 104 | 
            +
                    filter=true;
         | 
| 105 | 
            +
                }
         | 
| 106 | 
            +
                
         | 
| 107 | 
            +
                @members {
         | 
| 108 | 
            +
                  include ANTLR3::Test::CaptureOutput
         | 
| 109 | 
            +
                }
         | 
| 110 | 
            +
                
         | 
| 111 | 
            +
                IMPORT
         | 
| 112 | 
            +
                  :  'import' WS name=QIDStar WS? ';'
         | 
| 113 | 
            +
                  ;
         | 
| 114 | 
            +
                  
         | 
| 115 | 
            +
                /** Avoids having "return foo;" match as a field */
         | 
| 116 | 
            +
                RETURN
         | 
| 117 | 
            +
                  :  'return' (options {greedy=false;}:.)* ';'
         | 
| 118 | 
            +
                  ;
         | 
| 119 | 
            +
                
         | 
| 120 | 
            +
                CLASS
         | 
| 121 | 
            +
                  :  'class' WS name=ID WS? ('extends' WS QID WS?)?
         | 
| 122 | 
            +
                    ('implements' WS QID WS? (',' WS? QID WS?)*)? '{'
         | 
| 123 | 
            +
                    {  
         | 
| 124 | 
            +
                      say("found class " << $name.text)  
         | 
| 125 | 
            +
                    }
         | 
| 126 | 
            +
                  ;
         | 
| 127 | 
            +
                  
         | 
| 128 | 
            +
                METHOD
         | 
| 129 | 
            +
                    :   TYPE WS name=ID WS? '(' ( ARG WS? (',' WS? ARG WS?)* )? ')' WS? 
         | 
| 130 | 
            +
                       ('throws' WS QID WS? (',' WS? QID WS?)*)? '{'
         | 
| 131 | 
            +
                        {
         | 
| 132 | 
            +
                          say("found method " << $name.text)
         | 
| 133 | 
            +
                        }
         | 
| 134 | 
            +
                    ;
         | 
| 135 | 
            +
                
         | 
| 136 | 
            +
                FIELD
         | 
| 137 | 
            +
                    :   TYPE WS name=ID '[]'? WS? (';'|'=')
         | 
| 138 | 
            +
                        {
         | 
| 139 | 
            +
                          say("found var " << $name.text)
         | 
| 140 | 
            +
                        }
         | 
| 141 | 
            +
                    ;
         | 
| 142 | 
            +
                
         | 
| 143 | 
            +
                STAT:  ('if'|'while'|'switch'|'for') WS? '(' ;
         | 
| 144 | 
            +
                  
         | 
| 145 | 
            +
                CALL
         | 
| 146 | 
            +
                    :   name=QID WS? '('
         | 
| 147 | 
            +
                        {
         | 
| 148 | 
            +
                          say("found call " << $name.text)
         | 
| 149 | 
            +
                        }
         | 
| 150 | 
            +
                    ;
         | 
| 151 | 
            +
                
         | 
| 152 | 
            +
                COMMENT
         | 
| 153 | 
            +
                    :   '/*' (options {greedy=false;} : . )* '*/'
         | 
| 154 | 
            +
                        {
         | 
| 155 | 
            +
                          say("found comment " << self.text)
         | 
| 156 | 
            +
                        }
         | 
| 157 | 
            +
                    ;
         | 
| 158 | 
            +
                
         | 
| 159 | 
            +
                SL_COMMENT
         | 
| 160 | 
            +
                    :   '//' (options {greedy=false;} : . )* '\n'
         | 
| 161 | 
            +
                        {
         | 
| 162 | 
            +
                          say("found // comment " << self.text)
         | 
| 163 | 
            +
                        }
         | 
| 164 | 
            +
                    ;
         | 
| 165 | 
            +
                  
         | 
| 166 | 
            +
                STRING
         | 
| 167 | 
            +
                  :  '"' (options {greedy=false;}: ESC | .)* '"'
         | 
| 168 | 
            +
                  ;
         | 
| 169 | 
            +
                
         | 
| 170 | 
            +
                CHAR
         | 
| 171 | 
            +
                  :  '\'' (options {greedy=false;}: ESC | .)* '\''
         | 
| 172 | 
            +
                  ;
         | 
| 173 | 
            +
                
         | 
| 174 | 
            +
                WS  :   (' '|'\t'|'\n')+
         | 
| 175 | 
            +
                    ;
         | 
| 176 | 
            +
                
         | 
| 177 | 
            +
                fragment
         | 
| 178 | 
            +
                QID :  ID ('.' ID)*
         | 
| 179 | 
            +
                  ;
         | 
| 180 | 
            +
                  
         | 
| 181 | 
            +
                /** QID cannot see beyond end of token so using QID '.*'? somewhere won't
         | 
| 182 | 
            +
                 *  ever match since k=1 look in the QID loop of '.' will make it loop.
         | 
| 183 | 
            +
                 *  I made this rule to compensate.
         | 
| 184 | 
            +
                 */
         | 
| 185 | 
            +
                fragment
         | 
| 186 | 
            +
                QIDStar
         | 
| 187 | 
            +
                  :  ID ('.' ID)* '.*'?
         | 
| 188 | 
            +
                  ;
         | 
| 189 | 
            +
                
         | 
| 190 | 
            +
                fragment
         | 
| 191 | 
            +
                TYPE:   QID '[]'?
         | 
| 192 | 
            +
                    ;
         | 
| 193 | 
            +
                    
         | 
| 194 | 
            +
                fragment
         | 
| 195 | 
            +
                ARG :   TYPE WS ID
         | 
| 196 | 
            +
                    ;
         | 
| 197 | 
            +
                
         | 
| 198 | 
            +
                fragment
         | 
| 199 | 
            +
                ID  :   ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
         | 
| 200 | 
            +
                    ;
         | 
| 201 | 
            +
                
         | 
| 202 | 
            +
                fragment
         | 
| 203 | 
            +
                ESC  :  '\\' ('"'|'\''|'\\')
         | 
| 204 | 
            +
                  ;
         | 
| 205 | 
            +
              END
         | 
| 206 | 
            +
              
         | 
| 207 | 
            +
              example "fuzzy lexing with the filter mode option" do
         | 
| 208 | 
            +
                input = <<-END.fixed_indent(0)
         | 
| 209 | 
            +
                  import org.antlr.runtime.*;
         | 
| 210 | 
            +
                  
         | 
| 211 | 
            +
                  public class Main {
         | 
| 212 | 
            +
                    public static void main(String[] args) throws Exception {
         | 
| 213 | 
            +
                        for (int i=0; i<args.length; i++) {
         | 
| 214 | 
            +
                      CharStream input = new ANTLRFileStream(args[i]);
         | 
| 215 | 
            +
                      FuzzyJava lex = new FuzzyJava(input);
         | 
| 216 | 
            +
                      TokenStream tokens = new CommonTokenStream(lex);
         | 
| 217 | 
            +
                      tokens.toString();
         | 
| 218 | 
            +
                      //System.out.println(tokens);
         | 
| 219 | 
            +
                        }
         | 
| 220 | 
            +
                    }
         | 
| 221 | 
            +
                  }
         | 
| 222 | 
            +
                END
         | 
| 223 | 
            +
                
         | 
| 224 | 
            +
                expected_output = <<-END.fixed_indent(0)
         | 
| 225 | 
            +
                  found class Main
         | 
| 226 | 
            +
                  found method main
         | 
| 227 | 
            +
                  found var i
         | 
| 228 | 
            +
                  found var input
         | 
| 229 | 
            +
                  found call ANTLRFileStream
         | 
| 230 | 
            +
                  found var lex
         | 
| 231 | 
            +
                  found call FuzzyJava
         | 
| 232 | 
            +
                  found var tokens
         | 
| 233 | 
            +
                  found call CommonTokenStream
         | 
| 234 | 
            +
                  found call tokens.toString
         | 
| 235 | 
            +
                  found // comment //System.out.println(tokens);
         | 
| 236 | 
            +
                END
         | 
| 237 | 
            +
                
         | 
| 238 | 
            +
                lexer = Fuzzy::Lexer.new(input)
         | 
| 239 | 
            +
                lexer.each { |tk| tk }
         | 
| 240 | 
            +
                lexer.output.should == expected_output
         | 
| 241 | 
            +
              end
         | 
| 242 | 
            +
             | 
| 243 | 
            +
             | 
| 244 | 
            +
            end
         | 
| 245 | 
            +
             | 
| @@ -0,0 +1,47 @@ | |
| 1 | 
            +
            #!/usr/bin/ruby
         | 
| 2 | 
            +
            # encoding: utf-8
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            require 'antlr3/test/functional'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            class TestBug80 < ANTLR3::Test::Functional
         | 
| 7 | 
            +
              inline_grammar(<<-'END')
         | 
| 8 | 
            +
                lexer grammar Bug80;
         | 
| 9 | 
            +
                options { language = Ruby; }
         | 
| 10 | 
            +
                 
         | 
| 11 | 
            +
                ID_LIKE
         | 
| 12 | 
            +
                    : 'defined' 
         | 
| 13 | 
            +
                    | {false}? Identifier 
         | 
| 14 | 
            +
                    | Identifier 
         | 
| 15 | 
            +
                    ; 
         | 
| 16 | 
            +
                 
         | 
| 17 | 
            +
                fragment
         | 
| 18 | 
            +
                // with just 'a', output compiles
         | 
| 19 | 
            +
                Identifier: 'a'..'z'+ ;
         | 
| 20 | 
            +
              END
         | 
| 21 | 
            +
              
         | 
| 22 | 
            +
              example "um... something" do
         | 
| 23 | 
            +
                lexer = Bug80::Lexer.new( 'defined' )
         | 
| 24 | 
            +
                tokens = lexer.each { |tk| tk }
         | 
| 25 | 
            +
              end
         | 
| 26 | 
            +
            end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
             | 
| 29 | 
            +
            class TestEOF < ANTLR3::Test::Functional
         | 
| 30 | 
            +
             | 
| 31 | 
            +
              inline_grammar(<<-'END')
         | 
| 32 | 
            +
                lexer grammar EndOfFile;
         | 
| 33 | 
            +
                
         | 
| 34 | 
            +
                options {
         | 
| 35 | 
            +
                  language = Ruby;
         | 
| 36 | 
            +
                }
         | 
| 37 | 
            +
                
         | 
| 38 | 
            +
                KEND: EOF;
         | 
| 39 | 
            +
                SPACE: ' ';
         | 
| 40 | 
            +
              END
         | 
| 41 | 
            +
              
         | 
| 42 | 
            +
              example 'referencing EOF in a rule' do
         | 
| 43 | 
            +
                lexer = EndOfFile::Lexer.new( " " )
         | 
| 44 | 
            +
                tks = lexer.map { |tk| tk }
         | 
| 45 | 
            +
              end
         | 
| 46 | 
            +
            end
         | 
| 47 | 
            +
             | 
| @@ -0,0 +1,104 @@ | |
| 1 | 
            +
            #!/usr/bin/ruby
         | 
| 2 | 
            +
            # encoding: utf-8
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            require 'antlr3/test/functional'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            class TestLexerRuleReference < ANTLR3::Test::Functional
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              inline_grammar(<<-'END')
         | 
| 9 | 
            +
                lexer grammar RuleProperty;
         | 
| 10 | 
            +
                options {
         | 
| 11 | 
            +
                  language = Ruby;
         | 
| 12 | 
            +
                }
         | 
| 13 | 
            +
                
         | 
| 14 | 
            +
                @lexer::init {
         | 
| 15 | 
            +
                  @properties = []
         | 
| 16 | 
            +
                }
         | 
| 17 | 
            +
                @lexer::members {
         | 
| 18 | 
            +
                  attr_reader :properties
         | 
| 19 | 
            +
                }
         | 
| 20 | 
            +
                
         | 
| 21 | 
            +
                IDENTIFIER: 
         | 
| 22 | 
            +
                        ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
         | 
| 23 | 
            +
                        {
         | 
| 24 | 
            +
                          @properties << [$text, $type, $line, $pos, $index, $channel, $start, $stop]
         | 
| 25 | 
            +
                        }
         | 
| 26 | 
            +
                    ;
         | 
| 27 | 
            +
                WS: (' ' | '\n')+;
         | 
| 28 | 
            +
              END
         | 
| 29 | 
            +
             | 
| 30 | 
            +
              example "referencing lexer rule properties" do
         | 
| 31 | 
            +
                lexer = RuleProperty::Lexer.new( "foobar _ab98 \n A12sdf" )
         | 
| 32 | 
            +
                tokens = lexer.map { |tk| tk }
         | 
| 33 | 
            +
                
         | 
| 34 | 
            +
                lexer.properties.should have(3).things
         | 
| 35 | 
            +
                text, type, line, pos, index, channel, start, stop = lexer.properties[0]
         | 
| 36 | 
            +
                text.should == 'foobar'
         | 
| 37 | 
            +
                type.should == RuleProperty::TokenData::IDENTIFIER
         | 
| 38 | 
            +
                line.should == 1
         | 
| 39 | 
            +
                pos.should == 0
         | 
| 40 | 
            +
                index.should == -1
         | 
| 41 | 
            +
                channel.should == ANTLR3::DEFAULT_CHANNEL
         | 
| 42 | 
            +
                start.should == 0
         | 
| 43 | 
            +
                stop.should == 5
         | 
| 44 | 
            +
                
         | 
| 45 | 
            +
                text, type, line, pos, index, channel, start, stop = lexer.properties[1]
         | 
| 46 | 
            +
                text.should == '_ab98'
         | 
| 47 | 
            +
                type.should == RuleProperty::TokenData::IDENTIFIER
         | 
| 48 | 
            +
                line.should == 1
         | 
| 49 | 
            +
                pos.should == 7
         | 
| 50 | 
            +
                index.should == -1
         | 
| 51 | 
            +
                channel.should == ANTLR3::DEFAULT_CHANNEL
         | 
| 52 | 
            +
                start.should == 7
         | 
| 53 | 
            +
                stop.should == 11
         | 
| 54 | 
            +
                
         | 
| 55 | 
            +
                lexer.properties.should have(3).things
         | 
| 56 | 
            +
                text, type, line, pos, index, channel, start, stop = lexer.properties[2]
         | 
| 57 | 
            +
                text.should == 'A12sdf'
         | 
| 58 | 
            +
                type.should == RuleProperty::TokenData::IDENTIFIER
         | 
| 59 | 
            +
                line.should == 2
         | 
| 60 | 
            +
                pos.should == 1
         | 
| 61 | 
            +
                index.should == -1
         | 
| 62 | 
            +
                channel.should == ANTLR3::DEFAULT_CHANNEL
         | 
| 63 | 
            +
                start.should == 15
         | 
| 64 | 
            +
                stop.should == 20
         | 
| 65 | 
            +
              end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
             | 
| 68 | 
            +
            end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            class TestLexerRuleLabel < ANTLR3::Test::Functional
         | 
| 71 | 
            +
              inline_grammar(<<-'END')
         | 
| 72 | 
            +
                lexer grammar LexerRuleLabel;
         | 
| 73 | 
            +
                options {
         | 
| 74 | 
            +
                  language = Ruby;
         | 
| 75 | 
            +
                }
         | 
| 76 | 
            +
                
         | 
| 77 | 
            +
                @members { attr_reader :token_text }
         | 
| 78 | 
            +
                
         | 
| 79 | 
            +
                A: 'a'..'z' WS '0'..'9'
         | 
| 80 | 
            +
                        {
         | 
| 81 | 
            +
                          @token_text = $WS.text
         | 
| 82 | 
            +
                        }
         | 
| 83 | 
            +
                    ;
         | 
| 84 | 
            +
                
         | 
| 85 | 
            +
                fragment WS  :
         | 
| 86 | 
            +
                        (   ' '
         | 
| 87 | 
            +
                        |   '\t'
         | 
| 88 | 
            +
                        |  ( '\n'
         | 
| 89 | 
            +
                            |	'\r\n'
         | 
| 90 | 
            +
                            |	'\r'
         | 
| 91 | 
            +
                            )
         | 
| 92 | 
            +
                        )+
         | 
| 93 | 
            +
                        { $channel = HIDDEN }
         | 
| 94 | 
            +
                    ;
         | 
| 95 | 
            +
              END
         | 
| 96 | 
            +
              
         | 
| 97 | 
            +
              example "referencing other token rule values with labels" do
         | 
| 98 | 
            +
                lexer = LexerRuleLabel::Lexer.new 'a  2'
         | 
| 99 | 
            +
                lexer.next_token
         | 
| 100 | 
            +
                lexer.token_text.should == '  '
         | 
| 101 | 
            +
              end
         | 
| 102 | 
            +
             | 
| 103 | 
            +
            end
         | 
| 104 | 
            +
             | 
| @@ -0,0 +1,32 @@ | |
| 1 | 
            +
            #!/usr/bin/ruby
         | 
| 2 | 
            +
            # encoding: utf-8
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            require 'antlr3/test/functional'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            class TestSyntacticPredicate < ANTLR3::Test::Functional
         | 
| 7 | 
            +
              inline_grammar(<<-'END')
         | 
| 8 | 
            +
                lexer grammar SyntacticPredicateGate;
         | 
| 9 | 
            +
                options {
         | 
| 10 | 
            +
                  language = Ruby;
         | 
| 11 | 
            +
                }
         | 
| 12 | 
            +
                
         | 
| 13 | 
            +
                FOO
         | 
| 14 | 
            +
                  : ('ab')=> A
         | 
| 15 | 
            +
                  | ('ac')=> B
         | 
| 16 | 
            +
                  ;
         | 
| 17 | 
            +
                
         | 
| 18 | 
            +
                fragment
         | 
| 19 | 
            +
                A: 'a';
         | 
| 20 | 
            +
                
         | 
| 21 | 
            +
                fragment
         | 
| 22 | 
            +
                B: 'a';
         | 
| 23 | 
            +
              END
         | 
| 24 | 
            +
             | 
| 25 | 
            +
              example 'gating syntactic predicate rule' do
         | 
| 26 | 
            +
                lexer = SyntacticPredicateGate::Lexer.new( 'ac' )
         | 
| 27 | 
            +
                token = lexer.next_token
         | 
| 28 | 
            +
              end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
             | 
| 31 | 
            +
            end
         | 
| 32 | 
            +
             | 
| @@ -0,0 +1,206 @@ | |
| 1 | 
            +
            #!/usr/bin/ruby
         | 
| 2 | 
            +
            # encoding: utf-8
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            require 'antlr3/test/functional'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            class XMLLexerTest < ANTLR3::Test::Functional
         | 
| 7 | 
            +
              inline_grammar(<<-'END')
         | 
| 8 | 
            +
                lexer grammar XML;
         | 
| 9 | 
            +
                options { language = Ruby; }
         | 
| 10 | 
            +
                
         | 
| 11 | 
            +
                @members {
         | 
| 12 | 
            +
                  include ANTLR3::Test::CaptureOutput
         | 
| 13 | 
            +
                  include ANTLR3::Test::RaiseErrors
         | 
| 14 | 
            +
                  
         | 
| 15 | 
            +
                  def quote(text)
         | 
| 16 | 
            +
                    text = text.gsub(/\"/, '\\"')
         | 
| 17 | 
            +
                    \%("#{ text }")
         | 
| 18 | 
            +
                  end
         | 
| 19 | 
            +
                }
         | 
| 20 | 
            +
                
         | 
| 21 | 
            +
                DOCUMENT
         | 
| 22 | 
            +
                    :  XMLDECL? WS? DOCTYPE? WS? ELEMENT WS? 
         | 
| 23 | 
            +
                    ;
         | 
| 24 | 
            +
                
         | 
| 25 | 
            +
                fragment DOCTYPE
         | 
| 26 | 
            +
                    :
         | 
| 27 | 
            +
                        '<!DOCTYPE' WS rootElementName=GENERIC_ID 
         | 
| 28 | 
            +
                        {say("ROOTELEMENT: " + $rootElementName.text)}
         | 
| 29 | 
            +
                        WS
         | 
| 30 | 
            +
                        ( 
         | 
| 31 | 
            +
                            ( 'SYSTEM' WS sys1=VALUE
         | 
| 32 | 
            +
                        {say("SYSTEM: " + $sys1.text)}
         | 
| 33 | 
            +
                                
         | 
| 34 | 
            +
                            | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
         | 
| 35 | 
            +
                                {say("PUBLIC: " + $pub.text)}
         | 
| 36 | 
            +
                                {say("SYSTEM: " + $sys2.text)}   
         | 
| 37 | 
            +
                            )
         | 
| 38 | 
            +
                            ( WS )?
         | 
| 39 | 
            +
                        )?
         | 
| 40 | 
            +
                        ( dtd=INTERNAL_DTD
         | 
| 41 | 
            +
                            {say("INTERNAL DTD: " + $dtd.text)}
         | 
| 42 | 
            +
                        )?
         | 
| 43 | 
            +
                    '>'
         | 
| 44 | 
            +
                  ;
         | 
| 45 | 
            +
                
         | 
| 46 | 
            +
                fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
         | 
| 47 | 
            +
                
         | 
| 48 | 
            +
                fragment PI :
         | 
| 49 | 
            +
                        '<?' target=GENERIC_ID WS? 
         | 
| 50 | 
            +
                          {say("PI: " + $target.text)}
         | 
| 51 | 
            +
                        ( ATTRIBUTE WS? )*  '?>'
         | 
| 52 | 
            +
                  ;
         | 
| 53 | 
            +
                
         | 
| 54 | 
            +
                fragment XMLDECL :
         | 
| 55 | 
            +
                        '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS? 
         | 
| 56 | 
            +
                          {say("XML declaration")}
         | 
| 57 | 
            +
                        ( ATTRIBUTE WS? )*  '?>'
         | 
| 58 | 
            +
                  ;
         | 
| 59 | 
            +
                
         | 
| 60 | 
            +
                
         | 
| 61 | 
            +
                fragment ELEMENT
         | 
| 62 | 
            +
                    : ( START_TAG
         | 
| 63 | 
            +
                            (ELEMENT
         | 
| 64 | 
            +
                            | t=PCDATA
         | 
| 65 | 
            +
                                {say("PCDATA: " << quote($t.text))}
         | 
| 66 | 
            +
                            | t=CDATA
         | 
| 67 | 
            +
                                {say("CDATA: " << quote($t.text))}
         | 
| 68 | 
            +
                            | t=COMMENT
         | 
| 69 | 
            +
                                {say("Comment: " << quote($t.text))}
         | 
| 70 | 
            +
                            | pi=PI
         | 
| 71 | 
            +
                            )*
         | 
| 72 | 
            +
                            END_TAG
         | 
| 73 | 
            +
                        | EMPTY_ELEMENT
         | 
| 74 | 
            +
                        )
         | 
| 75 | 
            +
                    ;
         | 
| 76 | 
            +
                
         | 
| 77 | 
            +
                fragment START_TAG 
         | 
| 78 | 
            +
                    : '<' WS? name=GENERIC_ID WS?
         | 
| 79 | 
            +
                          {say("Start Tag: " + $name.text)}
         | 
| 80 | 
            +
                        ( ATTRIBUTE WS? )* '>'
         | 
| 81 | 
            +
                    ;
         | 
| 82 | 
            +
                
         | 
| 83 | 
            +
                fragment EMPTY_ELEMENT 
         | 
| 84 | 
            +
                    : '<' WS? name=GENERIC_ID WS?
         | 
| 85 | 
            +
                          {say("Empty Element: " + $name.text)}
         | 
| 86 | 
            +
                        ( ATTRIBUTE WS? )* '/>'
         | 
| 87 | 
            +
                    ;
         | 
| 88 | 
            +
                
         | 
| 89 | 
            +
                fragment ATTRIBUTE 
         | 
| 90 | 
            +
                    : name=GENERIC_ID WS? '=' WS? value=VALUE
         | 
| 91 | 
            +
                        {say("Attr: " + $name.text + " = "+ $value.text)}
         | 
| 92 | 
            +
                    ;
         | 
| 93 | 
            +
                
         | 
| 94 | 
            +
                fragment END_TAG 
         | 
| 95 | 
            +
                    : '</' WS? name=GENERIC_ID WS? '>'
         | 
| 96 | 
            +
                        {say("End Tag: " + $name.text)}
         | 
| 97 | 
            +
                    ;
         | 
| 98 | 
            +
                
         | 
| 99 | 
            +
                fragment COMMENT
         | 
| 100 | 
            +
                  :	'<!--' (options {greedy=false;} : .)* '-->'
         | 
| 101 | 
            +
                  ;
         | 
| 102 | 
            +
                
         | 
| 103 | 
            +
                fragment CDATA
         | 
| 104 | 
            +
                  :	'<![CDATA[' (options {greedy=false;} : .)* ']]>'
         | 
| 105 | 
            +
                  ;
         | 
| 106 | 
            +
                
         | 
| 107 | 
            +
                fragment PCDATA : (~'<')+ ; 
         | 
| 108 | 
            +
                
         | 
| 109 | 
            +
                fragment VALUE : 
         | 
| 110 | 
            +
                        ( '\"' (~'\"')* '\"'
         | 
| 111 | 
            +
                        | '\'' (~'\'')* '\''
         | 
| 112 | 
            +
                        )
         | 
| 113 | 
            +
                  ;
         | 
| 114 | 
            +
                
         | 
| 115 | 
            +
                fragment GENERIC_ID 
         | 
| 116 | 
            +
                    : ( LETTER | '_' | ':') 
         | 
| 117 | 
            +
                        ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
         | 
| 118 | 
            +
                  ;
         | 
| 119 | 
            +
                
         | 
| 120 | 
            +
                fragment LETTER
         | 
| 121 | 
            +
                  : 'a'..'z' 
         | 
| 122 | 
            +
                  | 'A'..'Z'
         | 
| 123 | 
            +
                  ;
         | 
| 124 | 
            +
                
         | 
| 125 | 
            +
                fragment WS  :
         | 
| 126 | 
            +
                        (   ' '
         | 
| 127 | 
            +
                        |   '\t'
         | 
| 128 | 
            +
                        |  ( '\n'
         | 
| 129 | 
            +
                            |	'\r\n'
         | 
| 130 | 
            +
                            |	'\r'
         | 
| 131 | 
            +
                            )
         | 
| 132 | 
            +
                        )+
         | 
| 133 | 
            +
                    ;    
         | 
| 134 | 
            +
              END
         | 
| 135 | 
            +
              
         | 
| 136 | 
            +
              it "should be valid" do
         | 
| 137 | 
            +
                lexer = XML::Lexer.new(<<-'END'.fixed_indent(0))
         | 
| 138 | 
            +
                  <?xml version='1.0'?>
         | 
| 139 | 
            +
                  <!DOCTYPE component [
         | 
| 140 | 
            +
                  <!ELEMENT component (PCDATA|sub)*>
         | 
| 141 | 
            +
                  <!ATTLIST component
         | 
| 142 | 
            +
                            attr CDATA #IMPLIED
         | 
| 143 | 
            +
                            attr2 CDATA #IMPLIED
         | 
| 144 | 
            +
                  >
         | 
| 145 | 
            +
                  <!ELMENT sub EMPTY>
         | 
| 146 | 
            +
                  
         | 
| 147 | 
            +
                  ]>
         | 
| 148 | 
            +
                  <component attr="val'ue" attr2='val"ue'>
         | 
| 149 | 
            +
                  <!-- This is a comment -->
         | 
| 150 | 
            +
                  Text
         | 
| 151 | 
            +
                  <![CDATA[huhu]]>
         | 
| 152 | 
            +
                  öäüß
         | 
| 153 | 
            +
                  &
         | 
| 154 | 
            +
                  <
         | 
| 155 | 
            +
                  <?xtal cursor='11'?>
         | 
| 156 | 
            +
                  <sub/>
         | 
| 157 | 
            +
                  <sub></sub>
         | 
| 158 | 
            +
                  </component>
         | 
| 159 | 
            +
                END
         | 
| 160 | 
            +
                
         | 
| 161 | 
            +
                lexer.map { |tk| tk }
         | 
| 162 | 
            +
                
         | 
| 163 | 
            +
                lexer.output.should == <<-'END'.fixed_indent(0)
         | 
| 164 | 
            +
                  XML declaration
         | 
| 165 | 
            +
                  Attr: version = '1.0'
         | 
| 166 | 
            +
                  ROOTELEMENT: component
         | 
| 167 | 
            +
                  INTERNAL DTD: [
         | 
| 168 | 
            +
                  <!ELEMENT component (PCDATA|sub)*>
         | 
| 169 | 
            +
                  <!ATTLIST component
         | 
| 170 | 
            +
                            attr CDATA #IMPLIED
         | 
| 171 | 
            +
                            attr2 CDATA #IMPLIED
         | 
| 172 | 
            +
                  >
         | 
| 173 | 
            +
                  <!ELMENT sub EMPTY>
         | 
| 174 | 
            +
                  
         | 
| 175 | 
            +
                  ]
         | 
| 176 | 
            +
                  Start Tag: component
         | 
| 177 | 
            +
                  Attr: attr = "val'ue"
         | 
| 178 | 
            +
                  Attr: attr2 = 'val"ue'
         | 
| 179 | 
            +
                  PCDATA: "
         | 
| 180 | 
            +
                  "
         | 
| 181 | 
            +
                  Comment: "<!-- This is a comment -->"
         | 
| 182 | 
            +
                  PCDATA: "
         | 
| 183 | 
            +
                  Text
         | 
| 184 | 
            +
                  "
         | 
| 185 | 
            +
                  CDATA: "<![CDATA[huhu]]>"
         | 
| 186 | 
            +
                  PCDATA: "
         | 
| 187 | 
            +
                  öäüß
         | 
| 188 | 
            +
                  &
         | 
| 189 | 
            +
                  <
         | 
| 190 | 
            +
                  "
         | 
| 191 | 
            +
                  PI: xtal
         | 
| 192 | 
            +
                  Attr: cursor = '11'
         | 
| 193 | 
            +
                  PCDATA: "
         | 
| 194 | 
            +
                  "
         | 
| 195 | 
            +
                  Empty Element: sub
         | 
| 196 | 
            +
                  PCDATA: "
         | 
| 197 | 
            +
                  "
         | 
| 198 | 
            +
                  Start Tag: sub
         | 
| 199 | 
            +
                  End Tag: sub
         | 
| 200 | 
            +
                  PCDATA: "
         | 
| 201 | 
            +
                  "
         | 
| 202 | 
            +
                  End Tag: component
         | 
| 203 | 
            +
                END
         | 
| 204 | 
            +
              end
         | 
| 205 | 
            +
             | 
| 206 | 
            +
            end
         |