antlr3 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ANTLR-LICENSE.txt +26 -0
- data/History.txt +66 -0
- data/README.txt +139 -0
- data/bin/antlr4ruby +33 -0
- data/java/RubyTarget.java +524 -0
- data/java/antlr-full-3.2.1.jar +0 -0
- data/lib/antlr3.rb +176 -0
- data/lib/antlr3/constants.rb +88 -0
- data/lib/antlr3/debug.rb +701 -0
- data/lib/antlr3/debug/event-hub.rb +210 -0
- data/lib/antlr3/debug/record-event-listener.rb +25 -0
- data/lib/antlr3/debug/rule-tracer.rb +55 -0
- data/lib/antlr3/debug/socket.rb +360 -0
- data/lib/antlr3/debug/trace-event-listener.rb +92 -0
- data/lib/antlr3/dfa.rb +247 -0
- data/lib/antlr3/dot.rb +174 -0
- data/lib/antlr3/error.rb +657 -0
- data/lib/antlr3/main.rb +561 -0
- data/lib/antlr3/modes/ast-builder.rb +41 -0
- data/lib/antlr3/modes/filter.rb +56 -0
- data/lib/antlr3/profile.rb +322 -0
- data/lib/antlr3/recognizers.rb +1280 -0
- data/lib/antlr3/streams.rb +985 -0
- data/lib/antlr3/streams/interactive.rb +91 -0
- data/lib/antlr3/streams/rewrite.rb +412 -0
- data/lib/antlr3/test/call-stack.rb +57 -0
- data/lib/antlr3/test/config.rb +23 -0
- data/lib/antlr3/test/core-extensions.rb +269 -0
- data/lib/antlr3/test/diff.rb +165 -0
- data/lib/antlr3/test/functional.rb +207 -0
- data/lib/antlr3/test/grammar.rb +371 -0
- data/lib/antlr3/token.rb +592 -0
- data/lib/antlr3/tree.rb +1415 -0
- data/lib/antlr3/tree/debug.rb +163 -0
- data/lib/antlr3/tree/visitor.rb +84 -0
- data/lib/antlr3/tree/wizard.rb +481 -0
- data/lib/antlr3/util.rb +149 -0
- data/lib/antlr3/version.rb +27 -0
- data/samples/ANTLRv3Grammar.g +621 -0
- data/samples/Cpp.g +749 -0
- data/templates/AST.stg +335 -0
- data/templates/ASTDbg.stg +40 -0
- data/templates/ASTParser.stg +153 -0
- data/templates/ASTTreeParser.stg +272 -0
- data/templates/Dbg.stg +192 -0
- data/templates/Ruby.stg +1514 -0
- data/test/functional/ast-output/auto-ast.rb +797 -0
- data/test/functional/ast-output/construction.rb +555 -0
- data/test/functional/ast-output/hetero-nodes.rb +753 -0
- data/test/functional/ast-output/rewrites.rb +1327 -0
- data/test/functional/ast-output/tree-rewrite.rb +1662 -0
- data/test/functional/debugging/debug-mode.rb +689 -0
- data/test/functional/debugging/profile-mode.rb +165 -0
- data/test/functional/debugging/rule-tracing.rb +74 -0
- data/test/functional/delegation/import.rb +379 -0
- data/test/functional/lexer/basic.rb +559 -0
- data/test/functional/lexer/filter-mode.rb +245 -0
- data/test/functional/lexer/nuances.rb +47 -0
- data/test/functional/lexer/properties.rb +104 -0
- data/test/functional/lexer/syn-pred.rb +32 -0
- data/test/functional/lexer/xml.rb +206 -0
- data/test/functional/main/main-scripts.rb +245 -0
- data/test/functional/parser/actions.rb +224 -0
- data/test/functional/parser/backtracking.rb +244 -0
- data/test/functional/parser/basic.rb +282 -0
- data/test/functional/parser/calc.rb +98 -0
- data/test/functional/parser/ll-star.rb +143 -0
- data/test/functional/parser/nuances.rb +165 -0
- data/test/functional/parser/predicates.rb +103 -0
- data/test/functional/parser/properties.rb +242 -0
- data/test/functional/parser/rule-methods.rb +132 -0
- data/test/functional/parser/scopes.rb +274 -0
- data/test/functional/token-rewrite/basic.rb +318 -0
- data/test/functional/token-rewrite/via-parser.rb +100 -0
- data/test/functional/tree-parser/basic.rb +750 -0
- data/test/unit/sample-input/file-stream-1 +2 -0
- data/test/unit/sample-input/teststreams.input2 +2 -0
- data/test/unit/test-dfa.rb +52 -0
- data/test/unit/test-exceptions.rb +44 -0
- data/test/unit/test-recognizers.rb +55 -0
- data/test/unit/test-scheme.rb +62 -0
- data/test/unit/test-streams.rb +459 -0
- data/test/unit/test-tree-wizard.rb +535 -0
- data/test/unit/test-trees.rb +854 -0
- metadata +205 -0
data/ANTLR-LICENSE.txt
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
[The "BSD licence"]
|
2
|
+
Copyright (c) 2003-2008 Terence Parr
|
3
|
+
All rights reserved.
|
4
|
+
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
6
|
+
modification, are permitted provided that the following conditions
|
7
|
+
are met:
|
8
|
+
|
9
|
+
1. Redistributions of source code must retain the above copyright
|
10
|
+
notice, this list of conditions and the following disclaimer.
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright
|
12
|
+
notice, this list of conditions and the following disclaimer in the
|
13
|
+
documentation and/or other materials provided with the distribution.
|
14
|
+
3. The name of the author may not be used to endorse or promote products
|
15
|
+
derived from this software without specific prior written permission.
|
16
|
+
|
17
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
18
|
+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
19
|
+
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
20
|
+
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
21
|
+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
22
|
+
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
23
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
24
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
25
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
26
|
+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/History.txt
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
h1(update). V1.1.1 2009-10-24 Kyle Yetter <kcy5b@yahoo.com>
|
2
|
+
|
3
|
+
h2(minor). minor alteration to TokenScheme
|
4
|
+
|
5
|
+
* TokenScheme#register_name would raise an error if the token type was anonymous (i.e. T__19 or such) and the new name didn't have the format "'a string'". That works well for ANTLR tokens, but it's generally pointless, so I got rid of those restrictions.
|
6
|
+
|
7
|
+
|\2. MODIFIED |
|
8
|
+
| lib/antlr3/token.rb | made minor changes to ANTLR3::TokenScheme#register_name |
|
9
|
+
|
10
|
+
h1(update). V1.1.0 2009-10-19 Kyle Yetter <kcy5b@yahoo.com>
|
11
|
+
|
12
|
+
h2(major). moved FilterMode
|
13
|
+
|
14
|
+
* since <tt>ANTLR3::Lexer::FilterMode</tt> is only necessary in lexers that specify filter=true, I moved it out of antlr3/recognizers.rb to its own file, antlr3/modes/filter.rb.
|
15
|
+
|
16
|
+
* the FilterMode module is no longer in the Lexer namespace, but in the broader ANTLR3 namespace ... i.e. module ANTLR3::Lexer::FilterMode ==> module ANTLR3::FilterMode
|
17
|
+
|
18
|
+
* I added an autoload entry to the ANTLR3 module in antlr3.rb for FilterMode, thus the module is autoloaded as needed whenever the constant ANTLR3::FilterMode is referenced
|
19
|
+
|
20
|
+
|\2. MODIFIED |
|
21
|
+
| lib/antlr3/recognizers.rb | removed Lexer::FilterMode definition |
|
22
|
+
| lib/antlr3.rb | added autoload :FilterMode entry |
|
23
|
+
|\2. CREATED |
|
24
|
+
| lib/antlr3/modes/filter.rb | contains FilterMode module extracted from recognizers.rb |
|
25
|
+
|
26
|
+
h2(major). new module ASTBuilder
|
27
|
+
|
28
|
+
* created a new module, ANTLR3::ASTBuilder
|
29
|
+
* all tree-building recognizers (parsers and tree parsers with the output=AST option) include this module
|
30
|
+
* provides a way to test a recognizer to see if it is an AST-building recognizer (rec.is_a?(ANTLR3::ASTBuilder)) and it also added a number of methods to help clean up generated AST-parser code
|
31
|
+
|
32
|
+
|\2. MODIFIED |
|
33
|
+
| lib/antlr3.rb | added the usual autoload entry for ASTBuilder |
|
34
|
+
| templates/AST.stg | added ``include ANTLR3::ASTBuilder'' to automatically include the module on all AST-output recognizers |
|
35
|
+
|\2. CREATED |
|
36
|
+
| lib/antlr3/modes/ast-builder.rb | defined new module ANTLR3::ASTBuilder |
|
37
|
+
|
38
|
+
h2(major). refactored debug and profile modes
|
39
|
+
|
40
|
+
* began process of organizing/cleaning-up the ANTLR3::Debug module (in file lib/antlr3/debug.rb), which contains all runtime support code and features for --debug and --profile option code output.
|
41
|
+
* extracted profiling-oriented classes and modules from the Debug module and moved them to a new file, lib/antlr3/profile.rb.
|
42
|
+
* since usually you're only using one particular type of Debug::EventListener at a time, I moved the six or so different EventListener classes out of Debug and into their own respective files in the antlr3/debug directory
|
43
|
+
* since tree-oriented debugging hooks (Debug::TreeAdaptor and such) are only necessary for tree-oriented code, and thus are not part of the essential core, I moved the tree-related event modules out of Debug and into a separate file, lib/antlr3/tree/debug.rb
|
44
|
+
* debug.rb now only defines the core EventListener interface module and the event-hook modules ParserEvents, TokenStream, and the expanded RecognizerSharedState class
|
45
|
+
* as with most classes that aren't loaded by default in the runtime library, I strategically placed autoload statements in appropriate modules to autoload any of the Debug modules and classes I yanked out of the debug.rb, so there shouldn't be any need for extra 'require' statements to use the classes
|
46
|
+
|
47
|
+
|\2. MODIFIED |
|
48
|
+
| lib/antlr3/debug.rb | removed definitions: EventSocketProxy, RemoteEventSocketListener, TraceEventListener, RuleTracer, EventHub, TreeNodeStream, TreeAdaptor, ParserProfilingEvents, Profiler, Profiler::Profile, Profiler::DataSet |
|
49
|
+
|\2. CREATED |
|
50
|
+
| lib/antlr3/debug/event-hub.rb | new home for Debug::EventHub |
|
51
|
+
| lib/antlr3/debug/record-event-listener.rb | new home for Debug::RecordEventListener |
|
52
|
+
| lib/antlr3/debug/rule-tracer.rb | new home for Debug::RuleTracer |
|
53
|
+
| lib/antlr3/debug/socket.rb | new home for Debug::EventSocketProxy and Debug::RemoteEventSocketListener |
|
54
|
+
| lib/antlr3/debug/trace-event-listener.rb | new home for Debug::TraceEventListener |
|
55
|
+
| lib/antlr3/profile.rb | new profile-oriented module, Profile, contains former debug classes Profile, ProfileEvents, and Profiler |
|
56
|
+
| lib/antlr3/tree/debug.rb | new home for Debug::TreeAdaptor and Debug::TreeNodeStream |
|
57
|
+
|
58
|
+
h2(minor). moved most of the core-extension code to new Util module
|
59
|
+
|
60
|
+
* as handy as it is to add new functions to core classes, I moved most of the code in the core-extension directory to the new slightly-nebulous, catch-all dumping ground modules, ANTLR3::Util and ANTLR3::ClassMacros. String and Module methods defined in the extensions were only used in a small handful of places and thus it seemed intrusive to polute the core classes with these utility methods for little payoff.
|
61
|
+
* ANTLR classes now extend ANTLR3::ClassMacros when necessary. It defines 'abstract' to easily create abstract methods. It also provides a new method, #shared_attribute, which defines an attribute accessor with a writer
|
62
|
+
|
63
|
+
h1(update). V1.0.0 2009-10-14 Kyle Yetter <kcy5b@yahoo.com>
|
64
|
+
|
65
|
+
h2(major). Initial Release: Birthday!
|
66
|
+
|
data/README.txt
ADDED
@@ -0,0 +1,139 @@
|
|
1
|
+
ANTLR 3 for Ruby
|
2
|
+
by Kyle Yetter (kcy5b@yahoo.com)
|
3
|
+
http://antlr3.rubyforge.org
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Fully-featured ANTLR 3 parser generation for Ruby.
|
8
|
+
|
9
|
+
ANTLR (ANother Tool for Language Recognition) is a tool that is used to generate
|
10
|
+
code for performing a variety of language recognition tasks: lexing, parsing,
|
11
|
+
abstract syntax tree construction and manipulation, tree structure recognition,
|
12
|
+
and input translation. The tool operates simillarly to other parser generators,
|
13
|
+
taking in a grammar specification written in the special ANTLR metalanguage and
|
14
|
+
producing source code that implements the recognition functionality.
|
15
|
+
|
16
|
+
While the tool itself is implemented in Java, it has an extensible design that
|
17
|
+
allows for code generation in other programming languages. To implement an
|
18
|
+
ANTLR language target, a developer may supply a set of templates written in the
|
19
|
+
StringTemplate (http://www.stringtemplate.org) language.
|
20
|
+
|
21
|
+
ANTLR is currently distributed with a fairly limited Ruby target implementation.
|
22
|
+
While it does provide implementation for basic lexer and parser classes, the
|
23
|
+
target does not provide any implementation for abstract syntax tree
|
24
|
+
construction, tree parser class generation, input translation, or a number of
|
25
|
+
the other ANTLR features that give the program an edge over traditional code
|
26
|
+
generators.
|
27
|
+
|
28
|
+
This gem packages together a complete implementation of the majority of features
|
29
|
+
ANTLR provides for other language targets, such as Java and Python. It contains:
|
30
|
+
|
31
|
+
* A customized version of the latest ANTLR program, bundling all necessary
|
32
|
+
java code and templates for producing fully featured language recognition
|
33
|
+
in ruby code
|
34
|
+
|
35
|
+
* a ruby runtime library that collects classes used throughout the code that
|
36
|
+
ANTLR generates
|
37
|
+
|
38
|
+
* a wrapper script, `antlr4ruby', which executes the ANTLR command line tool
|
39
|
+
after ensuring the ANTLR jar is java's class path
|
40
|
+
|
41
|
+
== FEATURES
|
42
|
+
|
43
|
+
1. generates ruby code capable of:
|
44
|
+
* lexing text input
|
45
|
+
* parsing lexical output and responding with arbitrary actions
|
46
|
+
* constructing Abstract Syntax Trees (ASTs)
|
47
|
+
* parsing AST structure and responding with arbitrary actions
|
48
|
+
* translating input source to some desired output format
|
49
|
+
|
50
|
+
2. This package can serve as a powerful assistant when performing tasks
|
51
|
+
such as:
|
52
|
+
* code compilation
|
53
|
+
* source code highlighting and formatting
|
54
|
+
* domain-specific language implementation
|
55
|
+
* source code extraction and analysis
|
56
|
+
|
57
|
+
== USAGE
|
58
|
+
|
59
|
+
1. Write an ANTLR grammar specification for a language
|
60
|
+
|
61
|
+
grammar SomeLanguage;
|
62
|
+
|
63
|
+
options {
|
64
|
+
language = Ruby; // <- this option must be set to Ruby
|
65
|
+
output = AST;
|
66
|
+
}
|
67
|
+
|
68
|
+
top: expr ( ',' expr )*
|
69
|
+
;
|
70
|
+
|
71
|
+
and so on...
|
72
|
+
|
73
|
+
|
74
|
+
2. Run the ANTLR tool with the antlr4ruby command to generate output:
|
75
|
+
|
76
|
+
antlr4ruby SomeLanguage.g
|
77
|
+
# creates:
|
78
|
+
# SomeLanguageParser.rb
|
79
|
+
# SomeLanguageLexer.rb
|
80
|
+
# SomeLanguage.g
|
81
|
+
|
82
|
+
3. Try out the results directly, if you like:
|
83
|
+
|
84
|
+
# see how the lexer tokenizes some input
|
85
|
+
ruby SomeLanguageLexer.rb < path/to/source-code.xyz
|
86
|
+
|
87
|
+
# check whether the parser successfully matches some input
|
88
|
+
ruby SomeLanguageParser.rb --rule=top < path/to/source-code.xyz
|
89
|
+
|
90
|
+
-> Read up on the package documentation for more specific details
|
91
|
+
about loading the recognizers and using their class definitions
|
92
|
+
|
93
|
+
== ISSUES
|
94
|
+
|
95
|
+
* Currently, there are a few nuanced ways in which using the ruby output differs
|
96
|
+
from the conventions and examples covered in the ANTLR standard documentation.
|
97
|
+
I am still working on documenting these details.
|
98
|
+
|
99
|
+
* While the target is intended to be complete, I do not provide any
|
100
|
+
implementation of the template-construction mode available for Java and Python
|
101
|
+
targets. While I'm interested in having this capability, I have not implemented
|
102
|
+
it yet because ANTLR forces you to use its StringTemplate templating language.
|
103
|
+
Thus, I would have to port the StringTemplate library to Ruby and write a target
|
104
|
+
for ST parser generation. I would prefer to permit template generation that uses
|
105
|
+
ruby's standard ERB templating library.
|
106
|
+
|
107
|
+
* So far, this has only been tested on Linux with ruby 1.8.7 and ruby 1.9.1.
|
108
|
+
I'm currently working on verifying behavior on other systems and with
|
109
|
+
slightly older versions of ruby.
|
110
|
+
|
111
|
+
== LICENSE
|
112
|
+
|
113
|
+
[The "BSD licence"]
|
114
|
+
Copyright (c) 2009 Kyle Yetter
|
115
|
+
All rights reserved.
|
116
|
+
|
117
|
+
Redistribution and use in source and binary forms, with or without
|
118
|
+
modification, are permitted provided that the following conditions
|
119
|
+
are met:
|
120
|
+
|
121
|
+
1. Redistributions of source code must retain the above copyright
|
122
|
+
notice, this list of conditions and the following disclaimer.
|
123
|
+
2. Redistributions in binary form must reproduce the above copyright
|
124
|
+
notice, this list of conditions and the following disclaimer in the
|
125
|
+
documentation and/or other materials provided with the distribution.
|
126
|
+
3. The name of the author may not be used to endorse or promote products
|
127
|
+
derived from this software without specific prior written permission.
|
128
|
+
|
129
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
130
|
+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
131
|
+
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
132
|
+
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
133
|
+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
134
|
+
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
135
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
136
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
137
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
138
|
+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
139
|
+
|
data/bin/antlr4ruby
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
__DIR__ = File.expand_path( File.dirname __FILE__ )
|
5
|
+
project_top = File.dirname __DIR__
|
6
|
+
load( File.join( project_top, 'lib', 'antlr3', 'version.rb' ) )
|
7
|
+
|
8
|
+
jar_name = "antlr-full-#{ ANTLR_VERSION_STRING }.jar"
|
9
|
+
jar_path = File.join( project_top, 'java', jar_name )
|
10
|
+
|
11
|
+
unless File.exist?( jar_path )
|
12
|
+
$stderr.puts(<<-END.strip!.gsub!(/\s+/, ' '))
|
13
|
+
the ANTLR #{ ANTLR_VERSION_STRING } jar is expected
|
14
|
+
to be located at #{ jar_path }, but it does not
|
15
|
+
appear to exist.
|
16
|
+
END
|
17
|
+
exit( 1 )
|
18
|
+
end
|
19
|
+
|
20
|
+
escape = proc do | a |
|
21
|
+
if a.empty? then "''"
|
22
|
+
else
|
23
|
+
a.gsub( /([^A-Za-z0-9_\-.,:\/@\n])/n, '\\\1' ).
|
24
|
+
gsub( /\n/, "'\n'" )
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
command = %w(java -jar) << escape[ antlr_jar ]
|
29
|
+
for arg in ARGV
|
30
|
+
command << escape[ arg ]
|
31
|
+
end
|
32
|
+
|
33
|
+
exec command.join(' ')
|
@@ -0,0 +1,524 @@
|
|
1
|
+
/*
|
2
|
+
[The "BSD licence"]
|
3
|
+
Copyright (c) 2005 Martin Traverso
|
4
|
+
All rights reserved.
|
5
|
+
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
7
|
+
modification, are permitted provided that the following conditions
|
8
|
+
are met:
|
9
|
+
1. Redistributions of source code must retain the above copyright
|
10
|
+
notice, this list of conditions and the following disclaimer.
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright
|
12
|
+
notice, this list of conditions and the following disclaimer in the
|
13
|
+
documentation and/or other materials provided with the distribution.
|
14
|
+
3. The name of the author may not be used to endorse or promote products
|
15
|
+
derived from this software without specific prior written permission.
|
16
|
+
|
17
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
18
|
+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
19
|
+
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
20
|
+
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
21
|
+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
22
|
+
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
23
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
24
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
25
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
26
|
+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
27
|
+
*/
|
28
|
+
|
29
|
+
package org.antlr.codegen;
|
30
|
+
|
31
|
+
import java.io.IOException;
|
32
|
+
import java.util.*;
|
33
|
+
|
34
|
+
import org.antlr.Tool;
|
35
|
+
import org.antlr.stringtemplate.*;
|
36
|
+
import org.antlr.tool.Grammar;
|
37
|
+
|
38
|
+
public class RubyTarget
|
39
|
+
extends Target
|
40
|
+
{
|
41
|
+
public static final Set rubyKeywords =
|
42
|
+
new HashSet() {
|
43
|
+
{
|
44
|
+
add("alias"); add("end"); add("retry");
|
45
|
+
add("and"); add("ensure"); add("return");
|
46
|
+
add("BEGIN"); add("false"); add("self");
|
47
|
+
add("begin"); add("for"); add("super");
|
48
|
+
add("break"); add("if"); add("then");
|
49
|
+
add("case"); add("in"); add("true");
|
50
|
+
add("class"); add("module"); add("undef");
|
51
|
+
add("def"); add("next"); add("unless");
|
52
|
+
add("defined"); add("nil"); add("until");
|
53
|
+
add("do"); add("not"); add("when");
|
54
|
+
add("else"); add("or"); add("while");
|
55
|
+
add("elsif"); add("redo"); add("yield");
|
56
|
+
add("END"); add("rescue");
|
57
|
+
}
|
58
|
+
};
|
59
|
+
|
60
|
+
public class RubyRenderer
|
61
|
+
implements AttributeRenderer
|
62
|
+
{
|
63
|
+
public String toString(Object o) {
|
64
|
+
return o.toString();
|
65
|
+
}
|
66
|
+
public String toString(Object o, String formatName) {
|
67
|
+
String idString = o.toString();
|
68
|
+
|
69
|
+
if (idString.isEmpty()) return idString;
|
70
|
+
|
71
|
+
if (formatName.equals("snakecase")) {
|
72
|
+
return snakecase(idString);
|
73
|
+
} else if (formatName.equals("camelcase")) {
|
74
|
+
return camelcase(idString);
|
75
|
+
} else if (formatName.equals("subcamelcase")) {
|
76
|
+
return subcamelcase(idString);
|
77
|
+
} else if (formatName.equals("constant")) {
|
78
|
+
return constantcase(idString);
|
79
|
+
} else if (formatName.equals("platform")) {
|
80
|
+
return platform(idString);
|
81
|
+
} else if (formatName.equals("lexerRule")) {
|
82
|
+
return lexerRule(idString);
|
83
|
+
} else if (formatName.equals("constantPath")) {
|
84
|
+
return constantPath(idString);
|
85
|
+
} else if (formatName.equals("label")) {
|
86
|
+
return label(idString);
|
87
|
+
} else if (formatName.equals("symbol")) {
|
88
|
+
return symbol(idString);
|
89
|
+
} else {
|
90
|
+
throw new IllegalArgumentException("Unsupported format name");
|
91
|
+
}
|
92
|
+
}
|
93
|
+
/** given an input string, which is presumed
|
94
|
+
* to contain a word, which may potentially be camelcased,
|
95
|
+
* and convert it to snake_case underscore style.
|
96
|
+
*
|
97
|
+
* algorithm --
|
98
|
+
* iterate through the string with a sliding window 3 chars wide
|
99
|
+
*
|
100
|
+
* example -- aGUIWhatNot
|
101
|
+
* c c+1 c+2 action
|
102
|
+
* a G << 'a' << '_' // a lower-upper word edge
|
103
|
+
* G U I << 'g'
|
104
|
+
* U I W << 'w'
|
105
|
+
* I W h << 'i' << '_' // the last character in an acronym run of uppers
|
106
|
+
* W h << 'w'
|
107
|
+
* ... and so on
|
108
|
+
*/
|
109
|
+
private String snakecase(String value) {
|
110
|
+
StringBuilder output_buffer = new StringBuilder();
|
111
|
+
int l = value.length();
|
112
|
+
int cliff = l - 1;
|
113
|
+
char cur;
|
114
|
+
char next;
|
115
|
+
char peek;
|
116
|
+
|
117
|
+
if (value.isEmpty()) return value;
|
118
|
+
if (l == 1) return value.toLowerCase();
|
119
|
+
|
120
|
+
for (int i = 0; i < cliff; i++) {
|
121
|
+
cur = value.charAt(i);
|
122
|
+
next = value.charAt(i + 1);
|
123
|
+
|
124
|
+
if ( Character.isLetter( cur ) ) {
|
125
|
+
output_buffer.append( Character.toLowerCase( cur ) );
|
126
|
+
|
127
|
+
if ( Character.isDigit( next ) || Character.isWhitespace( next ) ) {
|
128
|
+
output_buffer.append( '_' );
|
129
|
+
} else if ( Character.isLowerCase( cur ) && Character.isUpperCase( next ) ) {
|
130
|
+
// at camelcase word edge
|
131
|
+
output_buffer.append( '_' );
|
132
|
+
} else if ( (i < cliff - 1) && Character.isUpperCase( cur ) && Character.isUpperCase( next ) ) {
|
133
|
+
// cur is part of an acronym
|
134
|
+
|
135
|
+
peek = value.charAt(i + 2);
|
136
|
+
if ( Character.isLowerCase( peek ) ) {
|
137
|
+
/* if next is the start of word (indicated when peek is lowercase)
|
138
|
+
then the acronym must be completed by appending an underscore */
|
139
|
+
output_buffer.append('_');
|
140
|
+
}
|
141
|
+
}
|
142
|
+
} else if( Character.isDigit( cur ) ) {
|
143
|
+
output_buffer.append( cur );
|
144
|
+
if ( Character.isLetter( next ) ) {
|
145
|
+
output_buffer.append('_');
|
146
|
+
}
|
147
|
+
} else if (Character.isWhitespace( cur )) {
|
148
|
+
// do nothing
|
149
|
+
} else {
|
150
|
+
output_buffer.append( cur );
|
151
|
+
}
|
152
|
+
|
153
|
+
}
|
154
|
+
|
155
|
+
cur = value.charAt(cliff);
|
156
|
+
if (! Character.isWhitespace(cur) ) {
|
157
|
+
output_buffer.append( Character.toLowerCase( cur ) );
|
158
|
+
}
|
159
|
+
|
160
|
+
return output_buffer.toString();
|
161
|
+
}
|
162
|
+
private String constantcase(String value) {
|
163
|
+
return snakecase(value).toUpperCase();
|
164
|
+
}
|
165
|
+
private String platform(String value) {
|
166
|
+
return ("__" + value + "__");
|
167
|
+
}
|
168
|
+
private String symbol(String value) {
|
169
|
+
if (value.matches("[a-zA-Z_]\\w*[\\?\\!\\=]?")) {
|
170
|
+
return (":" + value);
|
171
|
+
} else {
|
172
|
+
return ("%s(" + value + ")");
|
173
|
+
}
|
174
|
+
}
|
175
|
+
private String lexerRule(String value) {
|
176
|
+
if (value.equals("Tokens")) {
|
177
|
+
return "token!";
|
178
|
+
} else {
|
179
|
+
return (snakecase(value) + "!");
|
180
|
+
}
|
181
|
+
}
|
182
|
+
private String constantPath(String value) {
|
183
|
+
return value.replaceAll("\\.", "::");
|
184
|
+
}
|
185
|
+
private String camelcase(String value) {
|
186
|
+
StringBuilder output_buffer = new StringBuilder();
|
187
|
+
int cliff = value.length();
|
188
|
+
char cur;
|
189
|
+
char next;
|
190
|
+
boolean at_edge = true;
|
191
|
+
|
192
|
+
if (value.isEmpty()) return value;
|
193
|
+
if (cliff == 1) return value.toUpperCase();
|
194
|
+
|
195
|
+
for (int i = 0; i < cliff; i++) {
|
196
|
+
cur = value.charAt(i);
|
197
|
+
|
198
|
+
if ( Character.isWhitespace( cur ) ) {
|
199
|
+
at_edge = true;
|
200
|
+
continue;
|
201
|
+
} else if ( cur == '_' ) {
|
202
|
+
at_edge = true;
|
203
|
+
continue;
|
204
|
+
} else if ( Character.isDigit( cur ) ) {
|
205
|
+
output_buffer.append( cur );
|
206
|
+
at_edge = true;
|
207
|
+
continue;
|
208
|
+
}
|
209
|
+
|
210
|
+
if (at_edge) {
|
211
|
+
output_buffer.append( Character.toUpperCase( cur ) );
|
212
|
+
if ( Character.isLetter( cur ) ) at_edge = false;
|
213
|
+
} else {
|
214
|
+
output_buffer.append( cur );
|
215
|
+
}
|
216
|
+
}
|
217
|
+
|
218
|
+
return output_buffer.toString();
|
219
|
+
}
|
220
|
+
private String label(String value) {
|
221
|
+
if (rubyKeywords.contains(value)) {
|
222
|
+
return platform(value);
|
223
|
+
} else if (Character.isUpperCase(value.charAt(0)) &&
|
224
|
+
(!value.equals("FILE")) &&
|
225
|
+
(!value.equals("LINE"))) {
|
226
|
+
return platform(value);
|
227
|
+
} else if (value.equals("FILE")) {
|
228
|
+
return "_FILE_";
|
229
|
+
} else if (value.equals("LINE")) {
|
230
|
+
return "_LINE_";
|
231
|
+
} else {
|
232
|
+
return value;
|
233
|
+
}
|
234
|
+
}
|
235
|
+
private String subcamelcase(String value) {
|
236
|
+
value = camelcase(value);
|
237
|
+
if (value.isEmpty())
|
238
|
+
return value;
|
239
|
+
Character head = Character.toLowerCase( value.charAt(0) );
|
240
|
+
String tail = value.substring(1);
|
241
|
+
return head.toString().concat(tail);
|
242
|
+
}
|
243
|
+
}
|
244
|
+
|
245
|
+
protected void genRecognizerFile(Tool tool,
|
246
|
+
CodeGenerator generator,
|
247
|
+
Grammar grammar,
|
248
|
+
StringTemplate outputFileST)
|
249
|
+
throws IOException
|
250
|
+
{
|
251
|
+
StringTemplateGroup group = generator.getTemplates();
|
252
|
+
RubyRenderer renderer = new RubyRenderer();
|
253
|
+
try {
|
254
|
+
group.registerRenderer(Class.forName("java.lang.String"), renderer);
|
255
|
+
} catch (ClassNotFoundException e) {
|
256
|
+
// this shouldn't happen
|
257
|
+
System.err.println("ClassNotFoundException: " + e.getMessage());
|
258
|
+
e.printStackTrace(System.err);
|
259
|
+
}
|
260
|
+
String fileName =
|
261
|
+
generator.getRecognizerFileName(grammar.name, grammar.type);
|
262
|
+
generator.write(outputFileST, fileName);
|
263
|
+
}
|
264
|
+
public String getTargetCharLiteralFromANTLRCharLiteral(
|
265
|
+
CodeGenerator generator,
|
266
|
+
String literal)
|
267
|
+
{
|
268
|
+
literal = literal.substring(1, literal.length() - 1);
|
269
|
+
|
270
|
+
String result = "?";
|
271
|
+
|
272
|
+
if (literal.equals("\\")) {
|
273
|
+
result += "\\\\";
|
274
|
+
}
|
275
|
+
else if (literal.equals(" ")) {
|
276
|
+
result += "\\s";
|
277
|
+
}
|
278
|
+
else if (literal.startsWith("\\u")) {
|
279
|
+
result = "0x" + literal.substring(2);
|
280
|
+
}
|
281
|
+
else {
|
282
|
+
result += literal;
|
283
|
+
}
|
284
|
+
|
285
|
+
return result;
|
286
|
+
}
|
287
|
+
public int getMaxCharValue(CodeGenerator generator)
|
288
|
+
{
|
289
|
+
// we don't support unicode, yet.
|
290
|
+
return 0xFF;
|
291
|
+
}
|
292
|
+
public String getTokenTypeAsTargetLabel(CodeGenerator generator, int ttype)
|
293
|
+
{
|
294
|
+
String name = generator.grammar.getTokenDisplayName(ttype);
|
295
|
+
// If name is a literal, return the token type instead
|
296
|
+
if ( name.charAt(0)=='\'' ) {
|
297
|
+
return generator.grammar.computeTokenNameFromLiteral(ttype, name);
|
298
|
+
}
|
299
|
+
return name;
|
300
|
+
}
|
301
|
+
/** Is scope in @scope::name {action} valid for this kind of grammar?
|
302
|
+
* Targets like C++ may want to allow new scopes like headerfile or
|
303
|
+
* some such. The action names themselves are not policed at the
|
304
|
+
* moment so targets can add template actions w/o having to recompile
|
305
|
+
* ANTLR.
|
306
|
+
*/
|
307
|
+
public boolean isValidActionScope(int grammarType, String scope) {
|
308
|
+
switch (grammarType) {
|
309
|
+
case Grammar.LEXER:
|
310
|
+
if (scope.equals("lexer")) {
|
311
|
+
return true;
|
312
|
+
}
|
313
|
+
if (scope.equals("token")) {
|
314
|
+
return true;
|
315
|
+
}
|
316
|
+
if (scope.equals("module")) {
|
317
|
+
return true;
|
318
|
+
}
|
319
|
+
if (scope.equals("overrides")) {
|
320
|
+
return true;
|
321
|
+
}
|
322
|
+
break;
|
323
|
+
case Grammar.PARSER:
|
324
|
+
if (scope.equals("parser")) {
|
325
|
+
return true;
|
326
|
+
}
|
327
|
+
if (scope.equals("token")) {
|
328
|
+
return true;
|
329
|
+
}
|
330
|
+
if (scope.equals("module")) {
|
331
|
+
return true;
|
332
|
+
}
|
333
|
+
if (scope.equals("overrides")) {
|
334
|
+
return true;
|
335
|
+
}
|
336
|
+
break;
|
337
|
+
case Grammar.COMBINED:
|
338
|
+
if (scope.equals("parser")) {
|
339
|
+
return true;
|
340
|
+
}
|
341
|
+
if (scope.equals("lexer")) {
|
342
|
+
return true;
|
343
|
+
}
|
344
|
+
if (scope.equals("token")) {
|
345
|
+
return true;
|
346
|
+
}
|
347
|
+
if (scope.equals("module")) {
|
348
|
+
return true;
|
349
|
+
}
|
350
|
+
if (scope.equals("overrides")) {
|
351
|
+
return true;
|
352
|
+
}
|
353
|
+
break;
|
354
|
+
case Grammar.TREE_PARSER:
|
355
|
+
if (scope.equals("treeparser")) {
|
356
|
+
return true;
|
357
|
+
}
|
358
|
+
if (scope.equals("token")) {
|
359
|
+
return true;
|
360
|
+
}
|
361
|
+
if (scope.equals("module")) {
|
362
|
+
return true;
|
363
|
+
}
|
364
|
+
if (scope.equals("overrides")) {
|
365
|
+
return true;
|
366
|
+
}
|
367
|
+
break;
|
368
|
+
}
|
369
|
+
return false;
|
370
|
+
}
|
371
|
+
/*
|
372
|
+
public String getTargetStringLiteralFromString(String s)
|
373
|
+
{
|
374
|
+
System.out.print(s + "\n");
|
375
|
+
return super.getTargetStringLiteralFromString(s);
|
376
|
+
}
|
377
|
+
|
378
|
+
public String getTargetStringLiteralFromString(String s, boolean quoted)
|
379
|
+
{
|
380
|
+
// System.out.print(s + "\n");
|
381
|
+
String ret_value = super.getTargetStringLiteralFromString(s, quoted);
|
382
|
+
System.out.print(ret_value + "\n");
|
383
|
+
return(ret_value);
|
384
|
+
}
|
385
|
+
|
386
|
+
public String getTarget64BitStringFromValue(long word)
|
387
|
+
{
|
388
|
+
System.out.print(((Long)word).toString() + "\n");
|
389
|
+
String result = super.getTarget64BitStringFromValue(word);
|
390
|
+
System.out.print(result + "\n");
|
391
|
+
return result;
|
392
|
+
}
|
393
|
+
*/
|
394
|
+
public String encodeIntAsCharEscape(final int v) {
|
395
|
+
final int intValue;
|
396
|
+
|
397
|
+
if (v == 65535) {
|
398
|
+
intValue = -1;
|
399
|
+
} else {
|
400
|
+
intValue = v;
|
401
|
+
}
|
402
|
+
|
403
|
+
return String.valueOf(intValue);
|
404
|
+
}
|
405
|
+
// public List postProcessAction(List chunks, antlr.Token actionToken) {
|
406
|
+
// List nChunks = new ArrayList();
|
407
|
+
//
|
408
|
+
// for (int i = 0; i < chunks.size(); i++) {
|
409
|
+
// Object chunk = chunks.get(i);
|
410
|
+
//
|
411
|
+
// if ( chunk instanceof String ) {
|
412
|
+
// String text = (String)chunks.get(i);
|
413
|
+
// if ( nChunks.size() == 0 && actionToken.getColumn() > 0 ) {
|
414
|
+
// // first chunk and some 'virtual' WS at beginning
|
415
|
+
// // prepend to this chunk
|
416
|
+
//
|
417
|
+
// String ws = "";
|
418
|
+
// for ( int j = 0 ; j < actionToken.getColumn() ; j++ ) {
|
419
|
+
// ws += " ";
|
420
|
+
// }
|
421
|
+
// text = ws + text;
|
422
|
+
// }
|
423
|
+
//
|
424
|
+
// String[] parts = text.split("\r?\n");
|
425
|
+
// for ( String line : parts ) {
|
426
|
+
// nChunks.add(line);
|
427
|
+
// }
|
428
|
+
// }
|
429
|
+
// else {
|
430
|
+
// if ( nChunks.size() == 0 && actionToken.getColumn() > 0 ) {
|
431
|
+
// // first chunk and some 'virtual' WS at beginning
|
432
|
+
// // add as a chunk of its own
|
433
|
+
//
|
434
|
+
// String ws = "";
|
435
|
+
// for ( int j = 0 ; j < actionToken.getColumn() ; j++ ) {
|
436
|
+
// ws += " ";
|
437
|
+
// }
|
438
|
+
// nChunks.add(ws);
|
439
|
+
// }
|
440
|
+
//
|
441
|
+
// nChunks.add(chunk);
|
442
|
+
// }
|
443
|
+
// }
|
444
|
+
//
|
445
|
+
// int lineNo = actionToken.getLine();
|
446
|
+
// int col = 0;
|
447
|
+
//
|
448
|
+
// // strip trailing empty lines
|
449
|
+
// int lastChunk = nChunks.size() - 1;
|
450
|
+
// while ( lastChunk > 0
|
451
|
+
// && nChunks.get(lastChunk) instanceof String
|
452
|
+
// && ((String)nChunks.get(lastChunk)).trim().length() == 0 )
|
453
|
+
// lastChunk--;
|
454
|
+
//
|
455
|
+
// // string leading empty lines
|
456
|
+
// int firstChunk = 0;
|
457
|
+
// while ( firstChunk <= lastChunk
|
458
|
+
// && nChunks.get(firstChunk) instanceof String
|
459
|
+
// && ((String)nChunks.get(firstChunk)).trim().length() == 0
|
460
|
+
// && ((String)nChunks.get(firstChunk)).endsWith("\n") ) {
|
461
|
+
// lineNo++;
|
462
|
+
// firstChunk++;
|
463
|
+
// }
|
464
|
+
//
|
465
|
+
// int indent = -1;
|
466
|
+
// for ( int i = firstChunk ; i <= lastChunk ; i++ ) {
|
467
|
+
// Object chunk = nChunks.get(i);
|
468
|
+
//
|
469
|
+
// //System.out.println(lineNo + ":" + col + " " + quote(chunk.toString()));
|
470
|
+
//
|
471
|
+
// if ( chunk instanceof String ) {
|
472
|
+
// String text = (String)chunk;
|
473
|
+
//
|
474
|
+
// if ( col == 0 ) {
|
475
|
+
// if ( indent == -1 ) {
|
476
|
+
// // first non-blank line
|
477
|
+
// // count number of leading whitespaces
|
478
|
+
//
|
479
|
+
// indent = 0;
|
480
|
+
// for ( int j = 0; j < text.length(); j++ ) {
|
481
|
+
// if ( !Character.isWhitespace(text.charAt(j)) )
|
482
|
+
// break;
|
483
|
+
//
|
484
|
+
// indent++;
|
485
|
+
// }
|
486
|
+
// }
|
487
|
+
//
|
488
|
+
// if ( text.length() >= indent ) {
|
489
|
+
// int j;
|
490
|
+
// for ( j = 0; j < indent ; j++ ) {
|
491
|
+
// if ( !Character.isWhitespace(text.charAt(j)) ) {
|
492
|
+
// // should do real error reporting here...
|
493
|
+
// System.err.println("Warning: badly indented line " + lineNo + " in action:");
|
494
|
+
// System.err.println(text);
|
495
|
+
// break;
|
496
|
+
// }
|
497
|
+
// }
|
498
|
+
//
|
499
|
+
// nChunks.set(i, text.substring(j));
|
500
|
+
// }
|
501
|
+
// else if ( text.trim().length() > 0 ) {
|
502
|
+
// // should do real error reporting here...
|
503
|
+
// System.err.println("Warning: badly indented line " + lineNo + " in action:");
|
504
|
+
// System.err.println(text);
|
505
|
+
// }
|
506
|
+
// }
|
507
|
+
//
|
508
|
+
// if ( text.endsWith("\n") ) {
|
509
|
+
// lineNo++;
|
510
|
+
// col = 0;
|
511
|
+
// }
|
512
|
+
// else {
|
513
|
+
// col += text.length();
|
514
|
+
// }
|
515
|
+
// }
|
516
|
+
// else {
|
517
|
+
// // not really correct, but all I need is col to increment...
|
518
|
+
// col += 1;
|
519
|
+
// }
|
520
|
+
// }
|
521
|
+
//
|
522
|
+
// return nChunks;
|
523
|
+
// }
|
524
|
+
}
|