RubyGems - antlr3 - Versions diffs - 1.8.0 → 1.8.2 - Mend

antlr3 1.8.0 → 1.8.2

Files changed (111) hide show

data/History.txt +35 -0
data/Manifest.txt +73 -0
data/README.txt +6 -13
data/java/RubyTarget.java +43 -19
data/java/antlr-full-3.2.1.jar +0 -0
data/lib/antlr3/debug.rb +2 -0
data/lib/antlr3/debug/event-hub.rb +55 -55
data/lib/antlr3/debug/record-event-listener.rb +2 -2
data/lib/antlr3/debug/rule-tracer.rb +14 -14
data/lib/antlr3/debug/socket.rb +47 -47
data/lib/antlr3/debug/trace-event-listener.rb +8 -8
data/lib/antlr3/main.rb +29 -9
data/lib/antlr3/modes/ast-builder.rb +7 -7
data/lib/antlr3/modes/filter.rb +19 -17
data/lib/antlr3/profile.rb +34 -6
data/lib/antlr3/recognizers.rb +50 -1
data/lib/antlr3/streams.rb +19 -15
data/lib/antlr3/streams/rewrite.rb +241 -229
data/lib/antlr3/template/group-file-lexer.rb +6 -8
data/lib/antlr3/template/group-file-parser.rb +16 -16
data/lib/antlr3/template/group-file.rb +1 -1
data/lib/antlr3/test/call-stack.rb +13 -13
data/lib/antlr3/test/core-extensions.rb +69 -69
data/lib/antlr3/test/functional.rb +0 -4
data/lib/antlr3/test/grammar.rb +70 -70
data/lib/antlr3/token.rb +41 -17
data/lib/antlr3/tree.rb +11 -14
data/lib/antlr3/tree/debug.rb +53 -53
data/lib/antlr3/tree/visitor.rb +11 -11
data/lib/antlr3/tree/wizard.rb +35 -35
data/lib/antlr3/util.rb +18 -0
data/lib/antlr3/version.rb +1 -1
data/rakefile +1 -0
data/samples/ANTLRv3Grammar.g +3 -3
data/samples/JavaScript.g +702 -0
data/samples/standard/C/C.g +543 -0
data/samples/standard/C/C.tokens +175 -0
data/samples/standard/C/C__testrig.st +0 -0
data/samples/standard/C/c.rb +12 -0
data/samples/standard/C/input +3479 -0
data/samples/standard/C/output +171 -0
data/samples/standard/LL-star/LLStar.g +101 -0
data/samples/standard/LL-star/input +12 -0
data/samples/standard/LL-star/ll-star.rb +12 -0
data/samples/standard/LL-star/output +2 -0
data/samples/standard/calc/Calculator.g +47 -0
data/samples/standard/calc/Calculator.py +16 -0
data/samples/standard/calc/Calculator.rb +28 -0
data/samples/standard/cminus/CMinus.g +141 -0
data/samples/standard/cminus/bytecode.group +80 -0
data/samples/standard/cminus/cminus.rb +16 -0
data/samples/standard/cminus/input +9 -0
data/samples/standard/cminus/java.group +91 -0
data/samples/standard/cminus/output +11 -0
data/samples/standard/cminus/python.group +48 -0
data/samples/standard/dynamic-scope/DynamicScopes.g +50 -0
data/samples/standard/dynamic-scope/dynamic-scopes.rb +12 -0
data/samples/standard/dynamic-scope/input +7 -0
data/samples/standard/dynamic-scope/output +4 -0
data/samples/standard/fuzzy/FuzzyJava.g +89 -0
data/samples/standard/fuzzy/fuzzy.py +11 -0
data/samples/standard/fuzzy/fuzzy.rb +9 -0
data/samples/standard/fuzzy/input +13 -0
data/samples/standard/fuzzy/output +12 -0
data/samples/standard/hoisted-predicates/HoistedPredicates.g +40 -0
data/samples/standard/hoisted-predicates/hoisted-predicates.rb +13 -0
data/samples/standard/hoisted-predicates/input +1 -0
data/samples/standard/hoisted-predicates/output +1 -0
data/samples/standard/island-grammar/Javadoc.g +46 -0
data/samples/standard/island-grammar/Simple.g +104 -0
data/samples/standard/island-grammar/input +11 -0
data/samples/standard/island-grammar/island.rb +12 -0
data/samples/standard/island-grammar/output +16 -0
data/samples/standard/java/Java.g +827 -0
data/samples/standard/java/input +80 -0
data/samples/standard/java/java.rb +13 -0
data/samples/standard/java/output +1 -0
data/samples/standard/python/Python.g +718 -0
data/samples/standard/python/PythonTokenSource.rb +107 -0
data/samples/standard/python/input +210 -0
data/samples/standard/python/output +24 -0
data/samples/standard/python/python.rb +14 -0
data/samples/standard/rakefile +18 -0
data/samples/standard/scopes/SymbolTable.g +66 -0
data/samples/standard/scopes/input +12 -0
data/samples/standard/scopes/output +3 -0
data/samples/standard/scopes/scopes.rb +12 -0
data/samples/standard/simplecTreeParser/SimpleC.g +113 -0
data/samples/standard/simplecTreeParser/SimpleCWalker.g +64 -0
data/samples/standard/simplecTreeParser/input +12 -0
data/samples/standard/simplecTreeParser/output +1 -0
data/samples/standard/simplecTreeParser/simplec.rb +18 -0
data/samples/standard/treeparser/Lang.g +24 -0
data/samples/standard/treeparser/LangDumpDecl.g +17 -0
data/samples/standard/treeparser/input +1 -0
data/samples/standard/treeparser/output +2 -0
data/samples/standard/treeparser/treeparser.rb +18 -0
data/samples/standard/tweak/Tweak.g +68 -0
data/samples/standard/tweak/input +9 -0
data/samples/standard/tweak/output +16 -0
data/samples/standard/tweak/tweak.rb +13 -0
data/samples/standard/xml/README +16 -0
data/samples/standard/xml/XML.g +123 -0
data/samples/standard/xml/input +21 -0
data/samples/standard/xml/output +39 -0
data/samples/standard/xml/xml.rb +9 -0
data/templates/Ruby.stg +4 -4
data/test/functional/ast-output/auto-ast.rb +0 -5
data/test/functional/ast-output/rewrites.rb +4 -4
data/test/unit/test-scope.rb +45 -0
metadata +96 -8

data/samples/standard/scopes/output ADDED

@@ -0,0 +1,3 @@
+globals: [i, j]
+level 2 symbols: [j, k]
+level 1 symbols: [i, k]

data/samples/standard/scopes/scopes.rb ADDED

@@ -0,0 +1,12 @@
+#!/usr/bin/ruby
+# encoding: utf-8
+$:.unshift( File.dirname( __FILE__ ) )
+require 'SymbolTableLexer'
+require 'SymbolTableParser'
+for file in ARGV
+  input = ANTLR3::FileStream.new( file )
+  lexer = SymbolTable::Lexer.new( input )
+  parser = SymbolTable::Parser.new( lexer )
+  parser.prog
+end

data/samples/standard/simplecTreeParser/SimpleC.g ADDED

@@ -0,0 +1,113 @@
+grammar SimpleC;
+options {
+    language = Ruby;
+    output = AST;
+}
+tokens {
+    VAR_DEF;
+    ARG_DEF;
+    FUNC_HDR;
+    FUNC_DECL;
+    FUNC_DEF;
+    BLOCK;
+}
+program
+    :   declaration+
+    ;
+declaration
+    :   variable
+    |   functionHeader ';' -> ^(FUNC_DECL functionHeader)
+    |   functionHeader block -> ^(FUNC_DEF functionHeader block)
+    ;
+variable
+    :   type declarator ';' -> ^(VAR_DEF type declarator)
+    ;
+declarator
+    :   ID
+    ;
+functionHeader
+    :   type ID '(' ( formalParameter ( ',' formalParameter )* )? ')'
+        -> ^(FUNC_HDR type ID formalParameter+)
+    ;
+formalParameter
+    :   type declarator -> ^(ARG_DEF type declarator)
+    ;
+type
+    :   'int'
+    |   'char'
+    |   'void'
+    |   ID
+    ;
+block
+    :   lc='{'
+            variable*
+            stat*
+        '}'
+        -> ^(BLOCK[$lc,"BLOCK"] variable* stat*)
+    ;
+stat: forStat
+    | expr ';'!
+    | block
+    | assignStat ';'!
+    | ';'!
+    ;
+forStat
+    :   'for' '(' start=assignStat ';' expr ';' next=assignStat ')' block
+        -> ^('for' $start expr $next block)
+    ;
+assignStat
+    :   ID EQ expr -> ^(EQ ID expr)
+    ;
+expr:   condExpr
+    ;
+condExpr
+    :   aexpr ( ('=='^ | '<'^) aexpr )?
+    ;
+aexpr
+    :   atom ( '+'^ atom )*
+    ;
+atom
+    : ID
+    | INT
+    | '(' expr ')' -> expr
+    ;
+FOR : 'for' ;
+INT_TYPE : 'int' ;
+CHAR: 'char';
+VOID: 'void';
+ID  :   ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
+    ;
+INT :	('0'..'9')+
+    ;
+EQ   : '=' ;
+EQEQ : '==' ;
+LT   : '<' ;
+PLUS : '+' ;
+WS  :   (   ' '
+        |   '\t'
+        |   '\r'
+        |   '\n'
+        )+
+        { $channel=HIDDEN }
+    ;

data/samples/standard/simplecTreeParser/SimpleCWalker.g ADDED

@@ -0,0 +1,64 @@
+tree grammar SimpleCWalker;
+options {
+    language = Ruby;
+    tokenVocab = SimpleC;
+    ASTLabelType = CommonTree;
+}
+program
+    :   declaration+
+    ;
+declaration
+    :   variable
+    |   ^(FUNC_DECL functionHeader)
+    |   ^(FUNC_DEF functionHeader block)
+    ;
+variable
+    :   ^(VAR_DEF type declarator)
+    ;
+declarator
+    :   ID
+    ;
+functionHeader
+    :   ^(FUNC_HDR type ID formalParameter+)
+    ;
+formalParameter
+    :   ^(ARG_DEF type declarator)
+    ;
+type
+    :   'int'
+    |   'char'
+    |   'void'
+    |   ID
+    ;
+block
+    :   ^(BLOCK variable* stat*)
+    ;
+stat: forStat
+    | expr
+    | block
+    ;
+forStat
+    :   ^('for' expr expr expr block)
+    ;
+expr:   ^(EQEQ expr expr)
+    |   ^(LT expr expr)
+    |   ^(PLUS expr expr)
+    |   ^(EQ ID expr)
+    |   atom
+    ;
+atom
+    : ID
+    | INT
+    ;

data/samples/standard/simplecTreeParser/input ADDED

@@ -0,0 +1,12 @@
+char c;
+int x;
+void bar(int x);
+int foo(int y, char d) {
+  int i;
+  for (i=0; i<3; i=i+1) {
+    x=3;
+    y=5;
+  }
+}

data/samples/standard/simplecTreeParser/output ADDED

	@@ -0,0 +1 @@
1	+ tree=(VAR_DEF char c) (VAR_DEF int x) (FUNC_DECL (FUNC_HDR void bar (ARG_DEF int x))) (FUNC_DEF (FUNC_HDR int foo (ARG_DEF int y) (ARG_DEF char d)) (BLOCK (VAR_DEF int i) (for (= i 0) (< i 3) (= i (+ i 1)) (BLOCK (= x 3) (= y 5)))))

data/samples/standard/simplecTreeParser/simplec.rb ADDED

@@ -0,0 +1,18 @@
+#!/usr/bin/ruby
+# encoding: utf-8
+$:.unshift( File.dirname( __FILE__ ) )
+require 'SimpleCLexer'
+require 'SimpleCParser'
+require 'SimpleCWalker'
+for file in ARGV
+  input = ANTLR3::FileStream.new( file )
+  parser = SimpleC::Parser.new( input )
+  tree = parser.program.tree
+  puts( "tree: #{ tree.inspect }" )
+  nodes = ANTLR3::AST::CommonTreeNodeStream.new(
+    tree, :token_stream => parser.input
+  )
+  SimpleCWalker::TreeParser.new( nodes ).program
+end

data/samples/standard/treeparser/Lang.g ADDED

@@ -0,0 +1,24 @@
+grammar Lang;
+options {
+  language = Ruby;
+	output = AST;
+}
+tokens {DECL;} // an imaginary node
+start : decl ;
+decl
+  : type ID ';' -> ^(DECL type ID)
+  ;
+type
+  : INT_TYPE  // automatic tree construction builds a node for this rule
+  | FLOAT_TYPE
+  ;
+INT_TYPE : 'int' ;
+FLOAT_TYPE : 'float' ;
+ID : 'a'..'z'+ ;
+INT : '0'..'9'+ ;
+WS : (' '|'\n') {$channel=HIDDEN;} ;

data/samples/standard/treeparser/LangDumpDecl.g ADDED

@@ -0,0 +1,17 @@
+tree grammar LangDumpDecl;
+options {
+  language = Ruby;
+  tokenVocab = Lang;
+}
+decl : ^(DECL type declarator)
+       // label.start, label.start, label.text
+       { puts( "int #{ $declarator.text }" ) }
+     ;
+type : INT_TYPE ;
+declarator
+     : ID
+     ;

data/samples/standard/treeparser/input ADDED

	@@ -0,0 +1 @@
1	+ int a;

data/samples/standard/treeparser/output ADDED

	@@ -0,0 +1,2 @@
1	+ tree: (DECL int a)
2	+ int a

data/samples/standard/treeparser/treeparser.rb ADDED

@@ -0,0 +1,18 @@
+#!/usr/bin/ruby
+# encoding: utf-8
+$:.unshift( File.dirname( __FILE__ ) )
+require 'LangLexer'
+require 'LangParser'
+require 'LangDumpDecl'
+for file in ARGV
+  input = ANTLR3::FileStream.new( file )
+  parser = Lang::Parser.new( input )
+  tree = parser.start.tree
+  puts( "tree: #{ tree.inspect }" )
+  nodes = ANTLR3::AST::CommonTreeNodeStream.new(
+    tree, :token_stream => parser.input
+  )
+  LangDumpDecl::TreeParser.new( nodes ).decl
+end

data/samples/standard/tweak/Tweak.g ADDED

@@ -0,0 +1,68 @@
+/** Convert the simple input to be java code; wrap in a class,
+ *  convert method with "public void", add decls.  This shows how to insert
+ *  extra text into a stream of tokens and how to replace a token
+ *  with some text.  Calling toString() on the TokenRewriteStream
+ *  in Main will print out the original input stream.
+ *
+ *  Note that you can do the instructions in any order as the
+ *  rewrite instructions just get queued up and executed upon toString().
+ */
+grammar Tweak;
+options { language = Ruby; }
+@init {
+  @input = ANTLR3::TokenRewriteStream.new( @input )
+}
+program
+@init { start = @input.look }
+  :   method+
+    {
+    @input.insert_before( start,"public class Wrapper {\n" )
+    # note the reference to the last token matched for method:
+    @input.insert_after( $method.stop, "\n}\n" )
+    }
+  ;
+method
+    : m='method' ID '(' ')' body
+      { @input.replace( $m, "public void" ) }
+    ;
+body
+// decls is on body's local variable stack but is visible to
+// any rule that body calls such as stat.  From other rules
+// it is referenced as $body::decls
+// From within rule body, you can use $decls shorthand
+scope {
+    decls
+}
+@init {
+    $body::decls = Set.new
+}
+    :   lcurly='{' stat* '}'
+        {
+        # dump declarations for all identifiers seen in statement list
+        $body::decls.each { | i | @input.insert_after( $lcurly, "\nint #{ i };" ) }
+        }
+    ;
+stat:   ID '=' expr ';' { $body::decls.add( $ID.text ) } // track left-hand-sides/
+    ;
+expr:   mul ('+' mul)*
+    ;
+mul :   atom ('*' atom)*
+    ;
+atom:   ID
+    |   INT
+    ;
+ID  :   ('a'..'z'|'A'..'Z')+ ;
+INT :   ('0'..'9')+ ;
+WS  :   (' '|'\t'|'\n')+ {$channel=HIDDEN;}
+    ;

data/samples/standard/tweak/input ADDED

@@ -0,0 +1,9 @@
+method foo() {
+  i = 3;
+  k = i;
+  i = k*4;
+}
+method bar() {
+  j = i*2;
+}

data/samples/standard/tweak/output ADDED

@@ -0,0 +1,16 @@
+public class Wrapper {
+public void foo() {
+int k;
+int i;
+  i = 3;
+  k = i;
+  i = k*4;
+}
+public void bar() {
+int j;
+  j = i*2;
+}
+}

data/samples/standard/tweak/tweak.rb ADDED

@@ -0,0 +1,13 @@
+#!/usr/bin/ruby
+# encoding: utf-8
+$:.unshift( File.dirname( __FILE__ ) )
+require 'TweakLexer'
+require 'TweakParser'
+for file in ARGV
+  input = ANTLR3::FileStream.new( file )
+  lexer = Tweak::Lexer.new( input )
+  parser = Tweak::Parser.new( lexer )
+  parser.program
+  puts( parser.input.render )
+end

data/samples/standard/xml/README ADDED

@@ -0,0 +1,16 @@
+The example input file has been slightly modified from the analogous
+Java example: the non-ascii characters have been removed and replaced
+by some ascii data.
+The lexer would fail, if it tries to compare a byte >0x7f from the input
+to a unicode constant.
+It would work fine, if you feed unicode into the lexer, but then
+sys.stdout.write() would probably fail, when it tries to print non-ascii
+data.
+It will also work with non-ascii input (the lexer operates on unicode()
+strings), but then you'll have to do some more work (which I omitted for
+simplicity):
+ - decode the input using the appropriate encoding into a unicode string.
+ - encode the data as you send it to the console or store it in a file.

data/samples/standard/xml/XML.g ADDED

@@ -0,0 +1,123 @@
+lexer grammar XML;
+options { language = Ruby; }
+@members {
+  def quote(text)
+    text = text.gsub(/\"/, '\\"')
+    \%("#{ text }")
+  end
+}
+DOCUMENT
+    :  XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
+    ;
+fragment DOCTYPE
+    :
+        '<!DOCTYPE' WS rootElementName=GENERIC_ID
+        {puts("ROOTELEMENT: " + $rootElementName.text)}
+        WS
+        (
+            ( 'SYSTEM' WS sys1=VALUE
+        {puts("SYSTEM: " + $sys1.text)}
+            | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
+                {puts("PUBLIC: " + $pub.text)}
+                {puts("SYSTEM: " + $sys2.text)}
+            )
+            ( WS )?
+        )?
+        ( dtd=INTERNAL_DTD
+            {puts("INTERNAL DTD: " + $dtd.text)}
+        )?
+    '>'
+  ;
+fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
+fragment PI :
+        '<?' target=GENERIC_ID WS?
+          {puts("PI: " + $target.text)}
+        ( ATTRIBUTE WS? )*  '?>'
+  ;
+fragment XMLDECL :
+        '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
+          {puts("XML declaration")}
+        ( ATTRIBUTE WS? )*  '?>'
+  ;
+fragment ELEMENT
+    : ( START_TAG
+            (ELEMENT
+            | t=PCDATA
+                {puts("PCDATA: " << quote($t.text))}
+            | t=CDATA
+                {puts("CDATA: " << quote($t.text))}
+            | t=COMMENT
+                {puts("Comment: " << quote($t.text))}
+            | pi=PI
+            )*
+            END_TAG
+        | EMPTY_ELEMENT
+        )
+    ;
+fragment START_TAG
+    : '<' WS? name=GENERIC_ID WS?
+          {puts("Start Tag: " + $name.text)}
+        ( ATTRIBUTE WS? )* '>'
+    ;
+fragment EMPTY_ELEMENT
+    : '<' WS? name=GENERIC_ID WS?
+          {puts("Empty Element: " + $name.text)}
+        ( ATTRIBUTE WS? )* '/>'
+    ;
+fragment ATTRIBUTE
+    : name=GENERIC_ID WS? '=' WS? value=VALUE
+        {puts("Attr: " + $name.text + " = "+ $value.text)}
+    ;
+fragment END_TAG
+    : '</' WS? name=GENERIC_ID WS? '>'
+        {puts("End Tag: " + $name.text)}
+    ;
+fragment COMMENT
+  :	'<!--' (options {greedy=false;} : .)* '-->'
+  ;
+fragment CDATA
+  :	'<![CDATA[' (options {greedy=false;} : .)* ']]>'
+  ;
+fragment PCDATA : (~'<')+ ;
+fragment VALUE :
+        ( '\"' (~'\"')* '\"'
+        | '\'' (~'\'')* '\''
+        )
+  ;
+fragment GENERIC_ID
+    : ( LETTER | '_' | ':')
+        ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
+  ;
+fragment LETTER
+  : 'a'..'z'
+  | 'A'..'Z'
+  ;
+fragment WS  :
+        (   ' '
+        |   '\t'
+        |  ( '\n'
+            |	'\r\n'
+            |	'\r'
+            )
+        )+
+    ;