hipe-gorillagrammar 0.0.1beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +21 -0
- data/History.txt +8 -0
- data/LICENSE.txt +19 -0
- data/README.txt +11 -0
- data/Rakefile +25 -0
- data/Thorfile +135 -0
- data/hipe-gorillagrammar.gemspec +66 -0
- data/lib/hipe-gorillagrammar/extensions/syntax.rb +50 -0
- data/lib/hipe-gorillagrammar.rb +492 -0
- data/spec/argv.rb +3 -0
- data/spec/extensions/syntax_spec.rb +41 -0
- data/spec/grammar_spec.rb +91 -0
- data/spec/helpers.rb +5 -0
- data/spec/parse_tree_spec.rb +64 -0
- data/spec/parsing_spec.rb +304 -0
- data/spec/range_spec.rb +47 -0
- data/spec/regexp_spec.rb +24 -0
- data/spec/runtime_spec.rb +66 -0
- data/spec/sequence_spec.rb +85 -0
- data/spec/shorthand_spec.rb +186 -0
- data/spec/spec.opts +4 -0
- data/spec/symbol_reference_spec.rb +28 -0
- data/spec/symbol_spec.rb +45 -0
- metadata +88 -0
data/.gitignore
ADDED
data/History.txt
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2009 Mark Meves
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in
|
11
|
+
all copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
THE SOFTWARE.
|
data/README.txt
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
Experiments in parsing. Previous version was in the "Githelper" script of mine on github.
|
2
|
+
|
3
|
+
(The original version was a php class i wrote many years ago with the same name, ...@todo)
|
4
|
+
|
5
|
+
ThankYou's
|
6
|
+
Brian Helkamp and the webrat team for giving me an excellent Thorfile and teach examples of rspec
|
7
|
+
argv[0] Sun Nov 29 00:05:46 EST 2009 in #ruby-lang for schooling me on module inheritance chains
|
8
|
+
hagabaka Mon Nov 30 02:15:51 EST 2009
|
9
|
+
raggi for suggesting Marshal.dump/Marshal.load
|
10
|
+
|
11
|
+
Above all, thank you Yoko for putting up w/ me
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
|
2
|
+
# require 'spec'
|
3
|
+
require 'spec/rake/spectask'
|
4
|
+
require 'spec/rake/verify_rcov'
|
5
|
+
|
6
|
+
desc "Run API and Core specs"
|
7
|
+
Spec::Rake::SpecTask.new do |t|
|
8
|
+
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
9
|
+
#t.spec_files = FileList['spec/public/**/*_spec.rb'] + FileList['spec/private/**/*_spec.rb']
|
10
|
+
t.spec_files = FileList['spec/**/*_spec.rb'] + FileList['spec/private/**/*_spec.rb']
|
11
|
+
end
|
12
|
+
|
13
|
+
desc "Run all examples with RCov"
|
14
|
+
Spec::Rake::SpecTask.new('rcov') do |t|
|
15
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
16
|
+
t.rcov = true
|
17
|
+
t.rcov_opts = ['--exclude', 'spec,/Library/Ruby/Gems/1.8/gems']
|
18
|
+
end
|
19
|
+
|
20
|
+
RCov::VerifyTask.new(:rcovv => 'rcov') do |t|
|
21
|
+
t.threshold = 95.0
|
22
|
+
t.index_html = 'coverage/index.html'
|
23
|
+
end
|
24
|
+
|
25
|
+
|
data/Thorfile
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
# this file was originally copy-pasted from webrat's Thorfile. Thank you Bryan Helmkamp!
|
2
|
+
require 'ruby-debug'
|
3
|
+
|
4
|
+
module GemHelpers
|
5
|
+
|
6
|
+
def generate_gemspec
|
7
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), "lib")))
|
8
|
+
require "hipe-gorillagrammar"
|
9
|
+
|
10
|
+
Gem::Specification.new do |s|
|
11
|
+
s.name = 'hipe-gorillagrammar'
|
12
|
+
s.version = Hipe::GorillaGrammar::VERSION
|
13
|
+
s.required_rubygems_version = Gem::Requirement.new("> 1.3.1") if s.respond_to? :required_rubygems_version=
|
14
|
+
s.authors = ["Mark Meves"]
|
15
|
+
s.email = "mark.meves@gmail.com"
|
16
|
+
s.homepage = "http://github.com/hipe/hipe-gorillagrammar"
|
17
|
+
s.date = %q{2009-11-23}
|
18
|
+
s.summary = %q{'beta attempt at a simple LR parser generator driven by DSL under 500LOC 100% C1 test coverage'}
|
19
|
+
s.description = <<-EOS.strip
|
20
|
+
LR Parser Generator (?) under 500 LOC with 100*% C1 test coverage. No useful AST yet. No useful docs yet.
|
21
|
+
EOS
|
22
|
+
|
23
|
+
# s.rubyforge_project = "webrat"
|
24
|
+
|
25
|
+
require "git"
|
26
|
+
repo = Git.open(".")
|
27
|
+
|
28
|
+
s.files = normalize_files(repo.ls_files.keys - repo.lib.ignored_files)
|
29
|
+
s.test_files = normalize_files(Dir['spec/***.rb'] - repo.lib.ignored_files)
|
30
|
+
#s.test_files = normalize_files(Dir['spec/*.rb'] - repo.lib.ignored_files)
|
31
|
+
|
32
|
+
s.has_rdoc = 'yard' # trying out arg[0]/lsegal's doc tool
|
33
|
+
#s.extra_rdoc_files = %w[README.rdoc MIT-LICENSE.txt History.txt]
|
34
|
+
#s.extra_rdoc_files = %w[MIT-LICENSE.txt History.txt]
|
35
|
+
|
36
|
+
#s.add_dependency "nokogiri", ">= 1.2.0"
|
37
|
+
#s.add_dependency "rack", ">= 1.0"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def normalize_files(array)
|
42
|
+
# only keep files, no directories, and sort
|
43
|
+
array.select do |path|
|
44
|
+
File.file?(path)
|
45
|
+
end.sort
|
46
|
+
end
|
47
|
+
|
48
|
+
# Adds extra space when outputting an array. This helps create better version
|
49
|
+
# control diffs, because otherwise it is all on the same line.
|
50
|
+
def prettyify_array(gemspec_ruby, array_name)
|
51
|
+
gemspec_ruby.gsub(/s\.#{array_name.to_s} = \[.+?\]/) do |match|
|
52
|
+
leadin, files = match[0..-2].split("[")
|
53
|
+
leadin + "[\n #{files.split(",").join(",\n ")}\n ]"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def read_gemspec
|
58
|
+
@read_gemspec ||= eval(File.read("hipe-gorillagrammar.gemspec"))
|
59
|
+
end
|
60
|
+
|
61
|
+
def sh(command)
|
62
|
+
puts command
|
63
|
+
system command
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
class Default < Thor
|
68
|
+
include GemHelpers
|
69
|
+
|
70
|
+
desc "gemspec", "Regenerate hipe-gorillagrammar.gemspec"
|
71
|
+
def gemspec
|
72
|
+
File.open("hipe-gorillagrammar.gemspec", "w") do |file|
|
73
|
+
gemspec_ruby = generate_gemspec.to_ruby
|
74
|
+
gemspec_ruby = prettyify_array(gemspec_ruby, :files)
|
75
|
+
gemspec_ruby = prettyify_array(gemspec_ruby, :test_files)
|
76
|
+
gemspec_ruby = prettyify_array(gemspec_ruby, :extra_rdoc_files)
|
77
|
+
|
78
|
+
file.write gemspec_ruby
|
79
|
+
end
|
80
|
+
|
81
|
+
puts "Wrote gemspec to hipe-gorillagrammar.gemspec"
|
82
|
+
read_gemspec.validate
|
83
|
+
end
|
84
|
+
|
85
|
+
desc "build", "Build a hipe-gorillagrammar gem"
|
86
|
+
def build
|
87
|
+
sh "gem build hipe-gorillagrammar.gemspec"
|
88
|
+
FileUtils.mkdir_p "pkg"
|
89
|
+
FileUtils.mv read_gemspec.file_name, "pkg"
|
90
|
+
end
|
91
|
+
|
92
|
+
desc "install", "Install the latest built gem"
|
93
|
+
def install
|
94
|
+
sh "gem install --local pkg/#{read_gemspec.file_name}"
|
95
|
+
end
|
96
|
+
|
97
|
+
desc "release", "Release the current branch to GitHub and Gemcutter"
|
98
|
+
def release
|
99
|
+
gemspec
|
100
|
+
build
|
101
|
+
Release.new.tag
|
102
|
+
Release.new.gem
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
|
108
|
+
class Spec < Thor
|
109
|
+
desc "spec", "el speco"
|
110
|
+
def run which='all'
|
111
|
+
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
112
|
+
t.spec_files = FileList['spec/**/*_spec.rb'] + FileList['spec/**/*_spec.rb']
|
113
|
+
debugger
|
114
|
+
'x'
|
115
|
+
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
class Release < Thor
|
122
|
+
include GemHelpers
|
123
|
+
|
124
|
+
desc "tag", "Tag the gem on the origin server"
|
125
|
+
def tag
|
126
|
+
release_tag = "v#{read_gemspec.version}"
|
127
|
+
sh "git tag -a #{release_tag} -m 'Tagging #{release_tag}'"
|
128
|
+
sh "git push origin #{release_tag}"
|
129
|
+
end
|
130
|
+
|
131
|
+
desc "gem", "Push the gem to Gemcutter"
|
132
|
+
def gem
|
133
|
+
sh "gem push pkg/#{read_gemspec.file_name}"
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{hipe-gorillagrammar}
|
5
|
+
s.version = "0.0.1beta"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new("> 1.3.1") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Mark Meves"]
|
9
|
+
s.date = %q{2009-11-23}
|
10
|
+
s.description = %q{LR Parser Generator (?) under 500 LOC with 100*% C1 test coverage. No useful AST yet. No useful docs yet.}
|
11
|
+
s.email = %q{mark.meves@gmail.com}
|
12
|
+
s.files = [
|
13
|
+
".gitignore",
|
14
|
+
"History.txt",
|
15
|
+
"LICENSE.txt",
|
16
|
+
"README.txt",
|
17
|
+
"Rakefile",
|
18
|
+
"Thorfile",
|
19
|
+
"hipe-gorillagrammar.gemspec",
|
20
|
+
"lib/hipe-gorillagrammar.rb",
|
21
|
+
"lib/hipe-gorillagrammar/extensions/syntax.rb",
|
22
|
+
"spec/argv.rb",
|
23
|
+
"spec/extensions/syntax_spec.rb",
|
24
|
+
"spec/grammar_spec.rb",
|
25
|
+
"spec/helpers.rb",
|
26
|
+
"spec/parse_tree_spec.rb",
|
27
|
+
"spec/parsing_spec.rb",
|
28
|
+
"spec/range_spec.rb",
|
29
|
+
"spec/regexp_spec.rb",
|
30
|
+
"spec/runtime_spec.rb",
|
31
|
+
"spec/sequence_spec.rb",
|
32
|
+
"spec/shorthand_spec.rb",
|
33
|
+
"spec/spec.opts",
|
34
|
+
"spec/symbol_reference_spec.rb",
|
35
|
+
"spec/symbol_spec.rb"
|
36
|
+
]
|
37
|
+
s.has_rdoc = %q{yard}
|
38
|
+
s.homepage = %q{http://github.com/hipe/hipe-gorillagrammar}
|
39
|
+
s.require_paths = ["lib"]
|
40
|
+
s.rubygems_version = %q{1.3.5}
|
41
|
+
s.summary = %q{'beta attempt at a simple LR parser generator driven by DSL under 500LOC 100% C1 test coverage'}
|
42
|
+
s.test_files = [
|
43
|
+
"spec/argv.rb",
|
44
|
+
"spec/grammar_spec.rb",
|
45
|
+
"spec/helpers.rb",
|
46
|
+
"spec/parse_tree_spec.rb",
|
47
|
+
"spec/parsing_spec.rb",
|
48
|
+
"spec/range_spec.rb",
|
49
|
+
"spec/regexp_spec.rb",
|
50
|
+
"spec/runtime_spec.rb",
|
51
|
+
"spec/sequence_spec.rb",
|
52
|
+
"spec/shorthand_spec.rb",
|
53
|
+
"spec/symbol_reference_spec.rb",
|
54
|
+
"spec/symbol_spec.rb"
|
55
|
+
]
|
56
|
+
|
57
|
+
if s.respond_to? :specification_version then
|
58
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
59
|
+
s.specification_version = 3
|
60
|
+
|
61
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
62
|
+
else
|
63
|
+
end
|
64
|
+
else
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Hipe::GorillaGrammar
|
2
|
+
module GorillaSymbol
|
3
|
+
def syntax_pretty_name; @name ? @name.to_s.upcase : nil end
|
4
|
+
end
|
5
|
+
|
6
|
+
module RegexpTerminal
|
7
|
+
def syntax_tokens; [syntax] end
|
8
|
+
def syntax; syntax_pretty_name || self end
|
9
|
+
end
|
10
|
+
|
11
|
+
module StringTerminal
|
12
|
+
def syntax_tokens; [syntax] end
|
13
|
+
def syntax; self end
|
14
|
+
end
|
15
|
+
|
16
|
+
class SymbolReference
|
17
|
+
def syntax_tokens; [syntax_pretty_name] end
|
18
|
+
end
|
19
|
+
|
20
|
+
module NonTerminalSymbol
|
21
|
+
def syntax
|
22
|
+
return (syntax_tokens * ' ').gsub(%r{(?:[\[\(] +| +[\]\)]| +\| +)}){|x| x.strip}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Sequence
|
27
|
+
def syntax_tokens
|
28
|
+
@group.map{ |x| x.syntax_tokens }.flatten
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class RangeOf
|
33
|
+
def syntax_tokens;
|
34
|
+
is_one_or_more = (@range == (1..Infinity) and @group.size == 1 and @group[0].kind_of? SymbolReference)
|
35
|
+
things = case true
|
36
|
+
when (@range == (0..1)) then ['[', nil, ']']
|
37
|
+
when (@range == (1..1)) then ['(', nil, ')']
|
38
|
+
when (@range == (0..Infinity)) then ['[', '[...]', ']']
|
39
|
+
when (is_one_or_more) then ['[', '[...]', ']'] # *special handing below
|
40
|
+
else [%{(#{@range.to_s}) of (}, nil, ')']
|
41
|
+
end
|
42
|
+
thing = @group.map{ |x| x.syntax_tokens }.zip(Array.new(@group.size-1,'|')).flatten.compact
|
43
|
+
thing.unshift things[0]
|
44
|
+
thing.push things[1] if things[1]
|
45
|
+
thing.push things[2]
|
46
|
+
thing.unshift @group[0].syntax_pretty_name if is_one_or_more
|
47
|
+
thing
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,492 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
#require 'ruby-debug'
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
class Symbol
|
6
|
+
def satisfied?; @satisfied; end
|
7
|
+
def accepting?; @accepting; end
|
8
|
+
end
|
9
|
+
# these are the different states that a parsing node can have
|
10
|
+
# after it's consumed a token. absurd hack so we can use smiley faces as symbols (note 2)
|
11
|
+
# mouth open means "still accepting". Smiley means "is satisfied". four permutations
|
12
|
+
:D.instance_variable_set :@satisfied, true # open mouth happy
|
13
|
+
:D.instance_variable_set :@accepting, true
|
14
|
+
:>.instance_variable_set :@satisfied, true # closed mouth happy
|
15
|
+
:>.instance_variable_set :@accepting, false
|
16
|
+
:O.instance_variable_set :@satisfied, false # open mouth unhappy
|
17
|
+
:O.instance_variable_set :@accepting, true
|
18
|
+
:C.instance_variable_set :@satisfied, false # closed mouth unhappy
|
19
|
+
:C.instance_variable_set :@accepting, false
|
20
|
+
module Hipe
|
21
|
+
def self.GorillaGrammar opts=nil, &block
|
22
|
+
GorillaGrammar.define(opts, &block)
|
23
|
+
end
|
24
|
+
module GorillaGrammar
|
25
|
+
VERSION = '0.0.1beta'
|
26
|
+
Infinity = 1.0 / 0
|
27
|
+
def self.define(opts=nil, &block)
|
28
|
+
runtime = Runtime.instance
|
29
|
+
g = runtime.create_grammar! opts
|
30
|
+
runtime.push_grammar g
|
31
|
+
begin
|
32
|
+
g.define(&block)
|
33
|
+
ensure
|
34
|
+
runtime.pop_grammar
|
35
|
+
end
|
36
|
+
end
|
37
|
+
class Runtime # for global-like stuff
|
38
|
+
include Singleton
|
39
|
+
def initialize
|
40
|
+
@grammar_stack = []
|
41
|
+
@grammars = {}
|
42
|
+
end
|
43
|
+
def push_grammar(grammar)
|
44
|
+
raise UsageFailure.new('sorry, no making grammars inside of grammars') if @grammar_stack.size > 0
|
45
|
+
@grammar_stack << grammar
|
46
|
+
end
|
47
|
+
def pop_grammar; @grammar_stack.pop; end
|
48
|
+
def current_grammar!
|
49
|
+
raise UsageFailure.new("no current grammar") unless current_grammar
|
50
|
+
current_grammar
|
51
|
+
end
|
52
|
+
def current_grammar; @grammar_stack.last; end
|
53
|
+
def self.method_missing(a,*b)
|
54
|
+
return instance.send(a,*b) if instance.respond_to? a
|
55
|
+
raise NoMethodError.new %{undefined method `#{a}' for #{inspect}}
|
56
|
+
end
|
57
|
+
def create_grammar! opts
|
58
|
+
opts ||= {}
|
59
|
+
g = Grammar.new opts
|
60
|
+
g.name = %{grammar#{@grammars.size+1}} unless g.name
|
61
|
+
raise GorillaException.new("for now we can't reopen grammars") if @grammars[g.name]
|
62
|
+
@grammars[g.name] = g
|
63
|
+
end
|
64
|
+
def get_grammar name; @grammars[name]; end
|
65
|
+
# enables the use of =~, |, (0..1).of .., .., .., and :some_name[/regexp/] in grammars
|
66
|
+
def enable_operator_shorthands
|
67
|
+
return if @shorthands_enabled
|
68
|
+
Symbol.instance_eval { include SymbolHack, PipeHack }
|
69
|
+
String.instance_eval { include PipeHack }
|
70
|
+
Fixnum.instance_eval { include FixnumOfHack }
|
71
|
+
Range.instance_eval { include RangeOfHack }
|
72
|
+
@shorthands_enabled = true
|
73
|
+
end
|
74
|
+
end
|
75
|
+
class Grammar < Hash
|
76
|
+
attr_accessor :name, :root
|
77
|
+
alias_method :names, :keys
|
78
|
+
def initialize opts
|
79
|
+
opts ||= {}
|
80
|
+
@name = opts[:name] if opts[:name]
|
81
|
+
@with_operator_shorthands = opts.has_key?(:enable_operator_shorthands) ?
|
82
|
+
opts[:enable_operator_shorthands] : true
|
83
|
+
end
|
84
|
+
def == other; self.inspect == other.inspect end #hack
|
85
|
+
def define(&block) # should it return grammar or last symbol? grammar. (note 4:)
|
86
|
+
Runtime.enable_operator_shorthands if @with_operator_shorthands
|
87
|
+
@root = GorillaSymbol.factory instance_eval(&block) # allows anonymous ranges & sequences as grammr
|
88
|
+
@root = @root.dereference if @root.instance_of? SymbolReference
|
89
|
+
self[@root.name = '__main__'] = @root if (@root.name.nil?)
|
90
|
+
self
|
91
|
+
end
|
92
|
+
def parse tox; @root.parse tox; end
|
93
|
+
def self.register_shorthand name, klass
|
94
|
+
instance_eval {
|
95
|
+
define_method(name) { |*args|
|
96
|
+
klass.construct_from_shorthand(name, *args)
|
97
|
+
}
|
98
|
+
}
|
99
|
+
end
|
100
|
+
def []= name, symbol
|
101
|
+
raise GrammarGrammarException.new(%{Can't redefine symbols. Multiple definitions for :#{name}}) if self[name]
|
102
|
+
unless symbol.kind_of? GorillaSymbol
|
103
|
+
raise GorillaException.new(%{Expecting GorillaSymbol had #{symbol.inspect}})
|
104
|
+
end
|
105
|
+
symbol.name = name
|
106
|
+
super name, symbol
|
107
|
+
end
|
108
|
+
end
|
109
|
+
class GorillaException < Exception
|
110
|
+
def initialize(*args)
|
111
|
+
super args.shift if args[0].instance_of? String
|
112
|
+
@info = args.last.instance_of?(Hash) ? args.pop : {}
|
113
|
+
@extra = args.count > 0 ? args : []
|
114
|
+
end
|
115
|
+
def tree; @info[:tree]; end
|
116
|
+
end
|
117
|
+
class UsageFailure < GorillaException; end
|
118
|
+
class GrammarGrammarException < UsageFailure; end
|
119
|
+
class AmbiguousGrammar < GrammarGrammarException; end
|
120
|
+
class ParseFailure < GorillaException
|
121
|
+
def is_error?; true; end
|
122
|
+
def inspect; message; end # just for irb debugo
|
123
|
+
end
|
124
|
+
class UnexpectedEndOfInput < ParseFailure;
|
125
|
+
def message; <<-EOS.gsub(/^ /,'').gsub("\n",' ')
|
126
|
+
sorry, unexpected end of input. I was
|
127
|
+
expecting you to say #{RangeOf.join(tree.expecting.uniq,', ',' or ')}.
|
128
|
+
EOS
|
129
|
+
end
|
130
|
+
end
|
131
|
+
class UnexpectedInput < ParseFailure;
|
132
|
+
def message
|
133
|
+
%{sorry, i don't know what you mean by "#{@info[:token]}". } +
|
134
|
+
((ex = tree.expecting.uniq).size == 0 ? %{i wasn't expecting any more input.} :
|
135
|
+
%{i was expecting you to say #{RangeOf.join(ex,', ',' or ')}.})
|
136
|
+
end
|
137
|
+
end
|
138
|
+
module PipeHack
|
139
|
+
def |(other)
|
140
|
+
if self.instance_of? RangeOf
|
141
|
+
raise UsageFailure("no") unless self.is_pipe_hack
|
142
|
+
self << other
|
143
|
+
ret = self
|
144
|
+
else
|
145
|
+
ret = RangeOf.new((1..1),[self,other])
|
146
|
+
ret.is_pipe_hack = true
|
147
|
+
end
|
148
|
+
ret
|
149
|
+
end
|
150
|
+
end
|
151
|
+
module FixnumOfHack # note that IRL, this will rarely be used for other than 1
|
152
|
+
def of *args
|
153
|
+
return RangeOf.new((1..1), args)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
module RangeOfHack
|
157
|
+
def of *args
|
158
|
+
return RangeOf.new(self, args)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
module SymbolHack
|
162
|
+
def =~ (symbol_data)
|
163
|
+
return super unless ( grammar = Runtime.instance.current_grammar )
|
164
|
+
grammar[self] = GorillaSymbol.factory symbol_data
|
165
|
+
end
|
166
|
+
def [] (*symbol_data)
|
167
|
+
return super unless Runtime.instance.current_grammar
|
168
|
+
symbol_data = symbol_data[0] if symbol_data.size == 1
|
169
|
+
new_symbol = self.=~(symbol_data)
|
170
|
+
return SymbolReference.new self
|
171
|
+
end
|
172
|
+
end
|
173
|
+
module ParseTree; def is_error?; false end end
|
174
|
+
module GorillaSymbol # @abstract base
|
175
|
+
def self.factory obj # maps data structures to symbols. returns same object or new
|
176
|
+
case obj
|
177
|
+
when GorillaSymbol then obj # this one must stay on top!
|
178
|
+
when Array then Sequence.new(*obj)
|
179
|
+
# note RangeOf is never constructed directly with factory()
|
180
|
+
when Symbol then SymbolReference.new obj
|
181
|
+
when String then obj.extend StringTerminal
|
182
|
+
when Regexp then obj.extend RegexpTerminal
|
183
|
+
else
|
184
|
+
raise UsageFailure.new %{Can't determine symbol type for "#{obj.inspect}"},:obj=>obj
|
185
|
+
end
|
186
|
+
end
|
187
|
+
def finalize; self; end
|
188
|
+
attr_accessor :name
|
189
|
+
attr_reader :kleene
|
190
|
+
def natural_name; name ? name.to_s.gsub('_',' ') : nil; end
|
191
|
+
def fork_for_parse; Marshal.load Marshal.dump(self); end # note 1
|
192
|
+
def reinit_for_parse; extend ParseTree end
|
193
|
+
def prune! names=[]; a=(names&instance_variables); a.each{ |x| remove_instance_variable x }; a.size end
|
194
|
+
def dereference; self; end
|
195
|
+
end
|
196
|
+
module TerminalSymbol;
|
197
|
+
include GorillaSymbol;
|
198
|
+
attr_reader :status
|
199
|
+
def inspect; [@name ? @name.inspect : nil, super, %{(#{@token ? '.' : '_' })}].compact.join; end
|
200
|
+
def pretty_print q
|
201
|
+
q.text inspect
|
202
|
+
q.group 1,'{','}' do
|
203
|
+
instance_variables.sort.each do |n|
|
204
|
+
next if instance_variable_get(n).nil?
|
205
|
+
q.text %{#{n}=}; q.pp instance_variable_get(n); q.text(';'); q.breakable
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
def recurse █ yield self end
|
210
|
+
def tokens; [@token]; end
|
211
|
+
end
|
212
|
+
module NonTerminalSymbol;
|
213
|
+
include GorillaSymbol;
|
214
|
+
def [] (i); super(i) ? super(i) : @group[i]; end
|
215
|
+
def size; kind_of?(ParseTree) ? super : @group.size; end
|
216
|
+
def == other
|
217
|
+
kind_of?(ParseTree) ? builtin_equality(other) :
|
218
|
+
(other.class == self.class && @name == @name and @group == other.group)
|
219
|
+
end
|
220
|
+
attr_reader :group
|
221
|
+
def _inspect left, right, name = nil
|
222
|
+
name = %{:#{@name}} if name.nil? and @name
|
223
|
+
[name, left,
|
224
|
+
(0..(@group ? @group.size : self.size)-1).map{|i| (self[i] ? self[i] : @group[i]).inspect}.join(', '),
|
225
|
+
right ].compact.join
|
226
|
+
end
|
227
|
+
def pretty_print q
|
228
|
+
names = instance_variables.sort{|a,b| (a=='@group') ? -1 : (b=='@group' ? 1 : a<=>b)}
|
229
|
+
names.delete_if{|x| instance_variable_get(x).nil?} # it may have been pruned
|
230
|
+
q.group 1,'{','}' do
|
231
|
+
q.breakable
|
232
|
+
# show any non-nil properties of this object
|
233
|
+
names.each do |name|
|
234
|
+
q.text %{#{name}:}
|
235
|
+
q.pp instance_variable_get(name)
|
236
|
+
q.breakable
|
237
|
+
end
|
238
|
+
# show any captured children of this object (if it's parsed something)
|
239
|
+
each_with_index do |child, i|
|
240
|
+
q.text %{#{i}=>}
|
241
|
+
q.pp child
|
242
|
+
q.breakable
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
def status; raise 'no' unless @status; @status end
|
247
|
+
def _status=(smiley); raise 'no' unless smiley.kind_of? Symbol; @status = smiley end
|
248
|
+
def recurse &block
|
249
|
+
sum_like = yield self
|
250
|
+
self.each{ |x| if x.respond_to?(:recurse) then sum_like += x.recurse(&block) end }
|
251
|
+
sum_like
|
252
|
+
end
|
253
|
+
def tokens; [] end # for use in a call to recurse e.g. non_terminal.recurse{|x| x.tokens}
|
254
|
+
end
|
255
|
+
module StringTerminal # this could also be considered a special case of regexp terminal
|
256
|
+
include TerminalSymbol
|
257
|
+
def match token
|
258
|
+
status = if (self==token)
|
259
|
+
@token = token
|
260
|
+
(:>)
|
261
|
+
else
|
262
|
+
(:C)
|
263
|
+
end
|
264
|
+
@status = status
|
265
|
+
end
|
266
|
+
def expecting; [%{"#{self}"}]; end
|
267
|
+
end
|
268
|
+
module RegexpTerminal
|
269
|
+
include TerminalSymbol
|
270
|
+
Grammar.register_shorthand :regexp, self
|
271
|
+
def self.construct_from_shorthand name, *args
|
272
|
+
args[0].extend self
|
273
|
+
end
|
274
|
+
def [](capture_offset); @captures[capture_offset]; end
|
275
|
+
def expecting; @name ? [natural_name] : [self.inspect]; end
|
276
|
+
def match token # cleaned up for note 5 @ 11/27 11:39 am
|
277
|
+
@status = if (md = super(token))
|
278
|
+
@captures = md.captures if (md.captures.size>0)
|
279
|
+
@token = token
|
280
|
+
(:>)
|
281
|
+
else
|
282
|
+
(:C)
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
class SymbolReference
|
287
|
+
include GorillaSymbol
|
288
|
+
def inspect; %{::#{@name}}; end
|
289
|
+
def pp q; q.text(inspect); end
|
290
|
+
def initialize symbol
|
291
|
+
@name = symbol
|
292
|
+
@grammar_name = Runtime.current_grammar!.name
|
293
|
+
end
|
294
|
+
def dereference;
|
295
|
+
@actual ||= Runtime.instance.get_grammar(@grammar_name)[@name].fork_for_parse.reinit_for_parse
|
296
|
+
end
|
297
|
+
def dereference_light
|
298
|
+
Runtime.instance.get_grammar(@grammar_name)[@name]
|
299
|
+
end
|
300
|
+
[:kleene, :expecting, :reinit_for_parse].each do |name|
|
301
|
+
define_method(name){ dereference_light.send(name) }
|
302
|
+
end
|
303
|
+
end
|
304
|
+
module CanParse
|
305
|
+
def parse tokens
|
306
|
+
tree = self.fork_for_parse.reinit_for_parse
|
307
|
+
while token = tokens.shift and tree.match(token).accepting?; end
|
308
|
+
if (:C) == tree.status or (tokens.size > 0) # xtra
|
309
|
+
UnexpectedInput.new :token=>(:C==tree.status ? token : tokens.first), :tree=>tree
|
310
|
+
elsif ! tree.status.satisfied?
|
311
|
+
UnexpectedEndOfInput.new :tree=>tree
|
312
|
+
else
|
313
|
+
tree.finalize; #tree.prune!
|
314
|
+
end
|
315
|
+
end
|
316
|
+
end
|
317
|
+
class Sequence < Array
|
318
|
+
alias_method :builtin_equality, :==
|
319
|
+
include CanParse, NonTerminalSymbol, PipeHack
|
320
|
+
Grammar.register_shorthand :sequence, self
|
321
|
+
def self.construct_from_shorthand(name, *args); self.new(*args); end
|
322
|
+
def initialize *args
|
323
|
+
@index = 0 # we use this to report expecting whether or not we are a parse
|
324
|
+
raise GrammarGrammarException.new "Arguments must be non-zero length" unless args.size > 0
|
325
|
+
@group = args.map{|x| GorillaSymbol.factory x }
|
326
|
+
end
|
327
|
+
def reinit_for_parse;
|
328
|
+
super
|
329
|
+
@stop_here = @group.size;
|
330
|
+
num = @group.reverse.map{|x|
|
331
|
+
x.reinit_for_parse
|
332
|
+
x.kleene
|
333
|
+
}.find_index{|x| x==false || x.nil? } || @group.size
|
334
|
+
@satisfied_at = @group.size - num # trailing children that can be zero length affect when we are satisfied.
|
335
|
+
@status = (@satisfied_at==@index) ? :D : :O
|
336
|
+
self
|
337
|
+
end
|
338
|
+
def inspect; _inspect '[',']'; end
|
339
|
+
def finalize; _advance if @child; self; end
|
340
|
+
def prune!; super %w(@group @index @satisfied_at @status @stop_here @kleene) end
|
341
|
+
def expecting # cleanup @fixme @todo. this was some genetic programming
|
342
|
+
return [] if self.status == :>
|
343
|
+
child = @child || self.last # might be nil if nothing was grabbed
|
344
|
+
expecting = []
|
345
|
+
if (child) # if we were able to parse at least one token
|
346
|
+
if ((index=@index-1) >= 0) # go backwards, reporting expected from any kleene closures
|
347
|
+
(index..0).each do
|
348
|
+
if (self[index].kleene) # kleeneup to use find_index
|
349
|
+
expecting |= self[index].expecting
|
350
|
+
end
|
351
|
+
end
|
352
|
+
end
|
353
|
+
expecting += child.expecting unless child.status == :> # report the expecting tokens from the current symbol ()
|
354
|
+
end
|
355
|
+
index = size
|
356
|
+
# index = child ? ( @index + 1 ) : @index # whether or not we got to a token,
|
357
|
+
#index = @index
|
358
|
+
if ((!child or child.kleene or child.status.satisfied?) and @group and index < @group.size)
|
359
|
+
begin # go forward reporting the expecting from any kleene closures
|
360
|
+
expecting |= @group[index].expecting
|
361
|
+
break unless @group[index].kleene
|
362
|
+
end while((index+=1)<@group.size)
|
363
|
+
end
|
364
|
+
expecting << "an end to the phrase" if (index == @group.size)
|
365
|
+
expecting
|
366
|
+
end
|
367
|
+
def _advance
|
368
|
+
# @child.prune! unless @child.kleene #@todo
|
369
|
+
self << remove_instance_variable('@child') # child must be :> (if the child was :D we should keep it)
|
370
|
+
case (@index += 1)
|
371
|
+
when @stop_here then (:>) # iff we just finished the last child (there is no lookahead note 5)
|
372
|
+
when @satisfied_at then (:D)
|
373
|
+
else (:O)
|
374
|
+
end
|
375
|
+
end
|
376
|
+
def match token
|
377
|
+
while true
|
378
|
+
@child ||= @group[@index].dereference; # @group[@index] = nil; save it for prune
|
379
|
+
child_prev_status = @child.status
|
380
|
+
child_status = @child.match token
|
381
|
+
self._status = case child_status
|
382
|
+
when :> then _advance
|
383
|
+
when :O then :O
|
384
|
+
when :D then ((@index+1)>=@satisfied_at) ? :D : :O
|
385
|
+
when :C
|
386
|
+
if (child_prev_status == :D)
|
387
|
+
self._status = _advance
|
388
|
+
next if @status.accepting?
|
389
|
+
:C
|
390
|
+
else
|
391
|
+
:C
|
392
|
+
end
|
393
|
+
else; raise GorillaException.new('symbol returned bad status')
|
394
|
+
end # case
|
395
|
+
break; # break out of infinite loop
|
396
|
+
end # infinite loop
|
397
|
+
@status
|
398
|
+
end # def match
|
399
|
+
end # Sequence
|
400
|
+
class MoreOneOff
|
401
|
+
Grammar.register_shorthand :more, self
|
402
|
+
def self.construct_from_shorthand(a,*b); Infinity; end
|
403
|
+
end
|
404
|
+
class RangeOf < Array
|
405
|
+
alias_method :builtin_equality, :==
|
406
|
+
include CanParse, NonTerminalSymbol, PipeHack
|
407
|
+
[:zero_or_more,:one_or_more,:zero_or_one,:one,:range_of].each do |name|
|
408
|
+
Grammar.register_shorthand name, self
|
409
|
+
end
|
410
|
+
def self.construct_from_shorthand name, *args; self.new name, args; end
|
411
|
+
def initialize name, args
|
412
|
+
unless args.size > 0
|
413
|
+
raise GrammarGrammarException.new "Arguments must be non-zero length"
|
414
|
+
end
|
415
|
+
@range = name.instance_of?(Range) ? name : case name
|
416
|
+
when :zero_or_more then (0..Infinity)
|
417
|
+
when :one_or_more then (1..Infinity)
|
418
|
+
when :zero_or_one then (0..1)
|
419
|
+
when :one then (1..1)
|
420
|
+
when :range_of then args.shift
|
421
|
+
else raise UsageFailure.new(%{invalid name string "#{name}"})
|
422
|
+
end
|
423
|
+
raise UsageFailure.new("must be range") unless @range.instance_of? Range
|
424
|
+
@group = args.map{|x| GorillaSymbol.factory x }
|
425
|
+
end
|
426
|
+
attr_reader :range
|
427
|
+
attr_accessor :is_pipe_hack
|
428
|
+
def reinit_for_parse
|
429
|
+
super
|
430
|
+
@group.each{ |x| x.reinit_for_parse; @unkleene = true unless x.kleene }
|
431
|
+
@kleene = @range.begin == 0 || ! @unkleene
|
432
|
+
@status = @kleene ? :D : :O
|
433
|
+
@frame_prototype = Marshal.dump @group
|
434
|
+
_reframe
|
435
|
+
self
|
436
|
+
end
|
437
|
+
def prune!; super %w(@frame @frame_prototype @range @group @kleene @unkleene) end
|
438
|
+
def << jobber # for PipeHack. code smell (:note 1)
|
439
|
+
if kind_of?(ParseTree) then super jobber
|
440
|
+
else; @group << GorillaSymbol.factory(jobber); end
|
441
|
+
end
|
442
|
+
def expecting; (@frame || @group || []).map{ |x| x.expecting }.flatten end
|
443
|
+
def inspect;
|
444
|
+
_inspect '(',')',[@name ? %{:#{@name}} : nil , '(', @range.to_s.gsub('..Infinity',' or more'),'):'].compact.join
|
445
|
+
end
|
446
|
+
def match token
|
447
|
+
@status = nil
|
448
|
+
statii = Hash.new(){ |h,k| h[k] = [] }
|
449
|
+
@frame.each { |symbol| status = symbol.match(token); statii[status] << symbol }
|
450
|
+
if statii[:C].size == @frame.size then @status = :C
|
451
|
+
else case statii[:>].size
|
452
|
+
when 2..Infinity then raise AmbiguousGrammar.new(:parse => self, :children => statii[:>] )
|
453
|
+
when 1 then @status = _advance(statii[:>][0])
|
454
|
+
when 0 #fallthru
|
455
|
+
end end
|
456
|
+
# past this point we know that zero are :> and not all are :C, so some must be :O or :D
|
457
|
+
if @status.nil?
|
458
|
+
@frame.delete_if{ |x| ! x.status.accepting? }
|
459
|
+
@status = @frame.select{|x| x.status == :D }.count == @frame.size ? :D : :O
|
460
|
+
end
|
461
|
+
@status
|
462
|
+
end
|
463
|
+
def _advance object
|
464
|
+
self << object # (object.kleene ? object : object.prune!)
|
465
|
+
case size
|
466
|
+
when @range.end then :>
|
467
|
+
when @range then _reframe; :D
|
468
|
+
else; _reframe; :O
|
469
|
+
end
|
470
|
+
end
|
471
|
+
def _reframe; @frame = (Marshal.load @frame_prototype).map{|x| x.kind_of?(SymbolReference) ? x.dereference : x}; end
|
472
|
+
## @fixme this is waiting for unparse()
|
473
|
+
def self.join list, conj1, conj2, &block
|
474
|
+
list.map!(&block) if block
|
475
|
+
case list.size
|
476
|
+
when 0 then ''
|
477
|
+
when 1 then list[0]
|
478
|
+
else
|
479
|
+
joiners = ['',conj2]
|
480
|
+
joiners += Array.new(list.size-2,conj1) if list.size >= 3
|
481
|
+
list.zip(joiners.reverse).flatten.join
|
482
|
+
end
|
483
|
+
end
|
484
|
+
end # RangeOf
|
485
|
+
end
|
486
|
+
end
|
487
|
+
# note 1 having grammar nodes as parse tree nodes. is it code smell?
|
488
|
+
# note 3 (resolved - we use them now) consider getting rid of unused base classes
|
489
|
+
# note 5 peeking isn't even used at this point
|
490
|
+
# note 6 you might use to_s for unparse
|
491
|
+
# note 7 todo: descention from regexp to string or vice versa,
|
492
|
+
# note 8 one day we might have set-like RangeOfs that .., note 9 rangeof forks, sequence just inits group
|