hipe-gorillagrammar 0.0.1beta
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +21 -0
- data/History.txt +8 -0
- data/LICENSE.txt +19 -0
- data/README.txt +11 -0
- data/Rakefile +25 -0
- data/Thorfile +135 -0
- data/hipe-gorillagrammar.gemspec +66 -0
- data/lib/hipe-gorillagrammar/extensions/syntax.rb +50 -0
- data/lib/hipe-gorillagrammar.rb +492 -0
- data/spec/argv.rb +3 -0
- data/spec/extensions/syntax_spec.rb +41 -0
- data/spec/grammar_spec.rb +91 -0
- data/spec/helpers.rb +5 -0
- data/spec/parse_tree_spec.rb +64 -0
- data/spec/parsing_spec.rb +304 -0
- data/spec/range_spec.rb +47 -0
- data/spec/regexp_spec.rb +24 -0
- data/spec/runtime_spec.rb +66 -0
- data/spec/sequence_spec.rb +85 -0
- data/spec/shorthand_spec.rb +186 -0
- data/spec/spec.opts +4 -0
- data/spec/symbol_reference_spec.rb +28 -0
- data/spec/symbol_spec.rb +45 -0
- metadata +88 -0
data/.gitignore
ADDED
data/History.txt
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright (c) 2009 Mark Meves
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in
|
11
|
+
all copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
THE SOFTWARE.
|
data/README.txt
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
Experiments in parsing. Previous version was in the "Githelper" script of mine on github.
|
2
|
+
|
3
|
+
(The original version was a php class i wrote many years ago with the same name, ...@todo)
|
4
|
+
|
5
|
+
ThankYou's
|
6
|
+
Brian Helkamp and the webrat team for giving me an excellent Thorfile and teach examples of rspec
|
7
|
+
argv[0] Sun Nov 29 00:05:46 EST 2009 in #ruby-lang for schooling me on module inheritance chains
|
8
|
+
hagabaka Mon Nov 30 02:15:51 EST 2009
|
9
|
+
raggi for suggesting Marshal.dump/Marshal.load
|
10
|
+
|
11
|
+
Above all, thank you Yoko for putting up w/ me
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
|
2
|
+
# require 'spec'
|
3
|
+
require 'spec/rake/spectask'
|
4
|
+
require 'spec/rake/verify_rcov'
|
5
|
+
|
6
|
+
desc "Run API and Core specs"
|
7
|
+
Spec::Rake::SpecTask.new do |t|
|
8
|
+
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
9
|
+
#t.spec_files = FileList['spec/public/**/*_spec.rb'] + FileList['spec/private/**/*_spec.rb']
|
10
|
+
t.spec_files = FileList['spec/**/*_spec.rb'] + FileList['spec/private/**/*_spec.rb']
|
11
|
+
end
|
12
|
+
|
13
|
+
desc "Run all examples with RCov"
|
14
|
+
Spec::Rake::SpecTask.new('rcov') do |t|
|
15
|
+
t.spec_files = FileList['spec/**/*_spec.rb']
|
16
|
+
t.rcov = true
|
17
|
+
t.rcov_opts = ['--exclude', 'spec,/Library/Ruby/Gems/1.8/gems']
|
18
|
+
end
|
19
|
+
|
20
|
+
RCov::VerifyTask.new(:rcovv => 'rcov') do |t|
|
21
|
+
t.threshold = 95.0
|
22
|
+
t.index_html = 'coverage/index.html'
|
23
|
+
end
|
24
|
+
|
25
|
+
|
data/Thorfile
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
# this file was originally copy-pasted from webrat's Thorfile. Thank you Bryan Helmkamp!
|
2
|
+
require 'ruby-debug'
|
3
|
+
|
4
|
+
module GemHelpers
|
5
|
+
|
6
|
+
def generate_gemspec
|
7
|
+
$LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), "lib")))
|
8
|
+
require "hipe-gorillagrammar"
|
9
|
+
|
10
|
+
Gem::Specification.new do |s|
|
11
|
+
s.name = 'hipe-gorillagrammar'
|
12
|
+
s.version = Hipe::GorillaGrammar::VERSION
|
13
|
+
s.required_rubygems_version = Gem::Requirement.new("> 1.3.1") if s.respond_to? :required_rubygems_version=
|
14
|
+
s.authors = ["Mark Meves"]
|
15
|
+
s.email = "mark.meves@gmail.com"
|
16
|
+
s.homepage = "http://github.com/hipe/hipe-gorillagrammar"
|
17
|
+
s.date = %q{2009-11-23}
|
18
|
+
s.summary = %q{'beta attempt at a simple LR parser generator driven by DSL under 500LOC 100% C1 test coverage'}
|
19
|
+
s.description = <<-EOS.strip
|
20
|
+
LR Parser Generator (?) under 500 LOC with 100*% C1 test coverage. No useful AST yet. No useful docs yet.
|
21
|
+
EOS
|
22
|
+
|
23
|
+
# s.rubyforge_project = "webrat"
|
24
|
+
|
25
|
+
require "git"
|
26
|
+
repo = Git.open(".")
|
27
|
+
|
28
|
+
s.files = normalize_files(repo.ls_files.keys - repo.lib.ignored_files)
|
29
|
+
s.test_files = normalize_files(Dir['spec/***.rb'] - repo.lib.ignored_files)
|
30
|
+
#s.test_files = normalize_files(Dir['spec/*.rb'] - repo.lib.ignored_files)
|
31
|
+
|
32
|
+
s.has_rdoc = 'yard' # trying out arg[0]/lsegal's doc tool
|
33
|
+
#s.extra_rdoc_files = %w[README.rdoc MIT-LICENSE.txt History.txt]
|
34
|
+
#s.extra_rdoc_files = %w[MIT-LICENSE.txt History.txt]
|
35
|
+
|
36
|
+
#s.add_dependency "nokogiri", ">= 1.2.0"
|
37
|
+
#s.add_dependency "rack", ">= 1.0"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def normalize_files(array)
|
42
|
+
# only keep files, no directories, and sort
|
43
|
+
array.select do |path|
|
44
|
+
File.file?(path)
|
45
|
+
end.sort
|
46
|
+
end
|
47
|
+
|
48
|
+
# Adds extra space when outputting an array. This helps create better version
|
49
|
+
# control diffs, because otherwise it is all on the same line.
|
50
|
+
def prettyify_array(gemspec_ruby, array_name)
|
51
|
+
gemspec_ruby.gsub(/s\.#{array_name.to_s} = \[.+?\]/) do |match|
|
52
|
+
leadin, files = match[0..-2].split("[")
|
53
|
+
leadin + "[\n #{files.split(",").join(",\n ")}\n ]"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def read_gemspec
|
58
|
+
@read_gemspec ||= eval(File.read("hipe-gorillagrammar.gemspec"))
|
59
|
+
end
|
60
|
+
|
61
|
+
def sh(command)
|
62
|
+
puts command
|
63
|
+
system command
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
class Default < Thor
|
68
|
+
include GemHelpers
|
69
|
+
|
70
|
+
desc "gemspec", "Regenerate hipe-gorillagrammar.gemspec"
|
71
|
+
def gemspec
|
72
|
+
File.open("hipe-gorillagrammar.gemspec", "w") do |file|
|
73
|
+
gemspec_ruby = generate_gemspec.to_ruby
|
74
|
+
gemspec_ruby = prettyify_array(gemspec_ruby, :files)
|
75
|
+
gemspec_ruby = prettyify_array(gemspec_ruby, :test_files)
|
76
|
+
gemspec_ruby = prettyify_array(gemspec_ruby, :extra_rdoc_files)
|
77
|
+
|
78
|
+
file.write gemspec_ruby
|
79
|
+
end
|
80
|
+
|
81
|
+
puts "Wrote gemspec to hipe-gorillagrammar.gemspec"
|
82
|
+
read_gemspec.validate
|
83
|
+
end
|
84
|
+
|
85
|
+
desc "build", "Build a hipe-gorillagrammar gem"
|
86
|
+
def build
|
87
|
+
sh "gem build hipe-gorillagrammar.gemspec"
|
88
|
+
FileUtils.mkdir_p "pkg"
|
89
|
+
FileUtils.mv read_gemspec.file_name, "pkg"
|
90
|
+
end
|
91
|
+
|
92
|
+
desc "install", "Install the latest built gem"
|
93
|
+
def install
|
94
|
+
sh "gem install --local pkg/#{read_gemspec.file_name}"
|
95
|
+
end
|
96
|
+
|
97
|
+
desc "release", "Release the current branch to GitHub and Gemcutter"
|
98
|
+
def release
|
99
|
+
gemspec
|
100
|
+
build
|
101
|
+
Release.new.tag
|
102
|
+
Release.new.gem
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
|
108
|
+
class Spec < Thor
|
109
|
+
desc "spec", "el speco"
|
110
|
+
def run which='all'
|
111
|
+
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
112
|
+
t.spec_files = FileList['spec/**/*_spec.rb'] + FileList['spec/**/*_spec.rb']
|
113
|
+
debugger
|
114
|
+
'x'
|
115
|
+
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
class Release < Thor
|
122
|
+
include GemHelpers
|
123
|
+
|
124
|
+
desc "tag", "Tag the gem on the origin server"
|
125
|
+
def tag
|
126
|
+
release_tag = "v#{read_gemspec.version}"
|
127
|
+
sh "git tag -a #{release_tag} -m 'Tagging #{release_tag}'"
|
128
|
+
sh "git push origin #{release_tag}"
|
129
|
+
end
|
130
|
+
|
131
|
+
desc "gem", "Push the gem to Gemcutter"
|
132
|
+
def gem
|
133
|
+
sh "gem push pkg/#{read_gemspec.file_name}"
|
134
|
+
end
|
135
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{hipe-gorillagrammar}
|
5
|
+
s.version = "0.0.1beta"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new("> 1.3.1") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Mark Meves"]
|
9
|
+
s.date = %q{2009-11-23}
|
10
|
+
s.description = %q{LR Parser Generator (?) under 500 LOC with 100*% C1 test coverage. No useful AST yet. No useful docs yet.}
|
11
|
+
s.email = %q{mark.meves@gmail.com}
|
12
|
+
s.files = [
|
13
|
+
".gitignore",
|
14
|
+
"History.txt",
|
15
|
+
"LICENSE.txt",
|
16
|
+
"README.txt",
|
17
|
+
"Rakefile",
|
18
|
+
"Thorfile",
|
19
|
+
"hipe-gorillagrammar.gemspec",
|
20
|
+
"lib/hipe-gorillagrammar.rb",
|
21
|
+
"lib/hipe-gorillagrammar/extensions/syntax.rb",
|
22
|
+
"spec/argv.rb",
|
23
|
+
"spec/extensions/syntax_spec.rb",
|
24
|
+
"spec/grammar_spec.rb",
|
25
|
+
"spec/helpers.rb",
|
26
|
+
"spec/parse_tree_spec.rb",
|
27
|
+
"spec/parsing_spec.rb",
|
28
|
+
"spec/range_spec.rb",
|
29
|
+
"spec/regexp_spec.rb",
|
30
|
+
"spec/runtime_spec.rb",
|
31
|
+
"spec/sequence_spec.rb",
|
32
|
+
"spec/shorthand_spec.rb",
|
33
|
+
"spec/spec.opts",
|
34
|
+
"spec/symbol_reference_spec.rb",
|
35
|
+
"spec/symbol_spec.rb"
|
36
|
+
]
|
37
|
+
s.has_rdoc = %q{yard}
|
38
|
+
s.homepage = %q{http://github.com/hipe/hipe-gorillagrammar}
|
39
|
+
s.require_paths = ["lib"]
|
40
|
+
s.rubygems_version = %q{1.3.5}
|
41
|
+
s.summary = %q{'beta attempt at a simple LR parser generator driven by DSL under 500LOC 100% C1 test coverage'}
|
42
|
+
s.test_files = [
|
43
|
+
"spec/argv.rb",
|
44
|
+
"spec/grammar_spec.rb",
|
45
|
+
"spec/helpers.rb",
|
46
|
+
"spec/parse_tree_spec.rb",
|
47
|
+
"spec/parsing_spec.rb",
|
48
|
+
"spec/range_spec.rb",
|
49
|
+
"spec/regexp_spec.rb",
|
50
|
+
"spec/runtime_spec.rb",
|
51
|
+
"spec/sequence_spec.rb",
|
52
|
+
"spec/shorthand_spec.rb",
|
53
|
+
"spec/symbol_reference_spec.rb",
|
54
|
+
"spec/symbol_spec.rb"
|
55
|
+
]
|
56
|
+
|
57
|
+
if s.respond_to? :specification_version then
|
58
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
59
|
+
s.specification_version = 3
|
60
|
+
|
61
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
62
|
+
else
|
63
|
+
end
|
64
|
+
else
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Hipe::GorillaGrammar
|
2
|
+
module GorillaSymbol
|
3
|
+
def syntax_pretty_name; @name ? @name.to_s.upcase : nil end
|
4
|
+
end
|
5
|
+
|
6
|
+
module RegexpTerminal
|
7
|
+
def syntax_tokens; [syntax] end
|
8
|
+
def syntax; syntax_pretty_name || self end
|
9
|
+
end
|
10
|
+
|
11
|
+
module StringTerminal
|
12
|
+
def syntax_tokens; [syntax] end
|
13
|
+
def syntax; self end
|
14
|
+
end
|
15
|
+
|
16
|
+
class SymbolReference
|
17
|
+
def syntax_tokens; [syntax_pretty_name] end
|
18
|
+
end
|
19
|
+
|
20
|
+
module NonTerminalSymbol
|
21
|
+
def syntax
|
22
|
+
return (syntax_tokens * ' ').gsub(%r{(?:[\[\(] +| +[\]\)]| +\| +)}){|x| x.strip}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Sequence
|
27
|
+
def syntax_tokens
|
28
|
+
@group.map{ |x| x.syntax_tokens }.flatten
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class RangeOf
|
33
|
+
def syntax_tokens;
|
34
|
+
is_one_or_more = (@range == (1..Infinity) and @group.size == 1 and @group[0].kind_of? SymbolReference)
|
35
|
+
things = case true
|
36
|
+
when (@range == (0..1)) then ['[', nil, ']']
|
37
|
+
when (@range == (1..1)) then ['(', nil, ')']
|
38
|
+
when (@range == (0..Infinity)) then ['[', '[...]', ']']
|
39
|
+
when (is_one_or_more) then ['[', '[...]', ']'] # *special handing below
|
40
|
+
else [%{(#{@range.to_s}) of (}, nil, ')']
|
41
|
+
end
|
42
|
+
thing = @group.map{ |x| x.syntax_tokens }.zip(Array.new(@group.size-1,'|')).flatten.compact
|
43
|
+
thing.unshift things[0]
|
44
|
+
thing.push things[1] if things[1]
|
45
|
+
thing.push things[2]
|
46
|
+
thing.unshift @group[0].syntax_pretty_name if is_one_or_more
|
47
|
+
thing
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,492 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
#require 'ruby-debug'
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
class Symbol
|
6
|
+
def satisfied?; @satisfied; end
|
7
|
+
def accepting?; @accepting; end
|
8
|
+
end
|
9
|
+
# these are the different states that a parsing node can have
|
10
|
+
# after it's consumed a token. absurd hack so we can use smiley faces as symbols (note 2)
|
11
|
+
# mouth open means "still accepting". Smiley means "is satisfied". four permutations
|
12
|
+
:D.instance_variable_set :@satisfied, true # open mouth happy
|
13
|
+
:D.instance_variable_set :@accepting, true
|
14
|
+
:>.instance_variable_set :@satisfied, true # closed mouth happy
|
15
|
+
:>.instance_variable_set :@accepting, false
|
16
|
+
:O.instance_variable_set :@satisfied, false # open mouth unhappy
|
17
|
+
:O.instance_variable_set :@accepting, true
|
18
|
+
:C.instance_variable_set :@satisfied, false # closed mouth unhappy
|
19
|
+
:C.instance_variable_set :@accepting, false
|
20
|
+
module Hipe
|
21
|
+
def self.GorillaGrammar opts=nil, &block
|
22
|
+
GorillaGrammar.define(opts, &block)
|
23
|
+
end
|
24
|
+
module GorillaGrammar
|
25
|
+
VERSION = '0.0.1beta'
|
26
|
+
Infinity = 1.0 / 0
|
27
|
+
def self.define(opts=nil, &block)
|
28
|
+
runtime = Runtime.instance
|
29
|
+
g = runtime.create_grammar! opts
|
30
|
+
runtime.push_grammar g
|
31
|
+
begin
|
32
|
+
g.define(&block)
|
33
|
+
ensure
|
34
|
+
runtime.pop_grammar
|
35
|
+
end
|
36
|
+
end
|
37
|
+
class Runtime # for global-like stuff
|
38
|
+
include Singleton
|
39
|
+
def initialize
|
40
|
+
@grammar_stack = []
|
41
|
+
@grammars = {}
|
42
|
+
end
|
43
|
+
def push_grammar(grammar)
|
44
|
+
raise UsageFailure.new('sorry, no making grammars inside of grammars') if @grammar_stack.size > 0
|
45
|
+
@grammar_stack << grammar
|
46
|
+
end
|
47
|
+
def pop_grammar; @grammar_stack.pop; end
|
48
|
+
def current_grammar!
|
49
|
+
raise UsageFailure.new("no current grammar") unless current_grammar
|
50
|
+
current_grammar
|
51
|
+
end
|
52
|
+
def current_grammar; @grammar_stack.last; end
|
53
|
+
def self.method_missing(a,*b)
|
54
|
+
return instance.send(a,*b) if instance.respond_to? a
|
55
|
+
raise NoMethodError.new %{undefined method `#{a}' for #{inspect}}
|
56
|
+
end
|
57
|
+
def create_grammar! opts
|
58
|
+
opts ||= {}
|
59
|
+
g = Grammar.new opts
|
60
|
+
g.name = %{grammar#{@grammars.size+1}} unless g.name
|
61
|
+
raise GorillaException.new("for now we can't reopen grammars") if @grammars[g.name]
|
62
|
+
@grammars[g.name] = g
|
63
|
+
end
|
64
|
+
def get_grammar name; @grammars[name]; end
|
65
|
+
# enables the use of =~, |, (0..1).of .., .., .., and :some_name[/regexp/] in grammars
|
66
|
+
def enable_operator_shorthands
|
67
|
+
return if @shorthands_enabled
|
68
|
+
Symbol.instance_eval { include SymbolHack, PipeHack }
|
69
|
+
String.instance_eval { include PipeHack }
|
70
|
+
Fixnum.instance_eval { include FixnumOfHack }
|
71
|
+
Range.instance_eval { include RangeOfHack }
|
72
|
+
@shorthands_enabled = true
|
73
|
+
end
|
74
|
+
end
|
75
|
+
class Grammar < Hash
|
76
|
+
attr_accessor :name, :root
|
77
|
+
alias_method :names, :keys
|
78
|
+
def initialize opts
|
79
|
+
opts ||= {}
|
80
|
+
@name = opts[:name] if opts[:name]
|
81
|
+
@with_operator_shorthands = opts.has_key?(:enable_operator_shorthands) ?
|
82
|
+
opts[:enable_operator_shorthands] : true
|
83
|
+
end
|
84
|
+
def == other; self.inspect == other.inspect end #hack
|
85
|
+
def define(&block) # should it return grammar or last symbol? grammar. (note 4:)
|
86
|
+
Runtime.enable_operator_shorthands if @with_operator_shorthands
|
87
|
+
@root = GorillaSymbol.factory instance_eval(&block) # allows anonymous ranges & sequences as grammr
|
88
|
+
@root = @root.dereference if @root.instance_of? SymbolReference
|
89
|
+
self[@root.name = '__main__'] = @root if (@root.name.nil?)
|
90
|
+
self
|
91
|
+
end
|
92
|
+
def parse tox; @root.parse tox; end
|
93
|
+
def self.register_shorthand name, klass
|
94
|
+
instance_eval {
|
95
|
+
define_method(name) { |*args|
|
96
|
+
klass.construct_from_shorthand(name, *args)
|
97
|
+
}
|
98
|
+
}
|
99
|
+
end
|
100
|
+
def []= name, symbol
|
101
|
+
raise GrammarGrammarException.new(%{Can't redefine symbols. Multiple definitions for :#{name}}) if self[name]
|
102
|
+
unless symbol.kind_of? GorillaSymbol
|
103
|
+
raise GorillaException.new(%{Expecting GorillaSymbol had #{symbol.inspect}})
|
104
|
+
end
|
105
|
+
symbol.name = name
|
106
|
+
super name, symbol
|
107
|
+
end
|
108
|
+
end
|
109
|
+
class GorillaException < Exception
|
110
|
+
def initialize(*args)
|
111
|
+
super args.shift if args[0].instance_of? String
|
112
|
+
@info = args.last.instance_of?(Hash) ? args.pop : {}
|
113
|
+
@extra = args.count > 0 ? args : []
|
114
|
+
end
|
115
|
+
def tree; @info[:tree]; end
|
116
|
+
end
|
117
|
+
class UsageFailure < GorillaException; end
|
118
|
+
class GrammarGrammarException < UsageFailure; end
|
119
|
+
class AmbiguousGrammar < GrammarGrammarException; end
|
120
|
+
class ParseFailure < GorillaException
|
121
|
+
def is_error?; true; end
|
122
|
+
def inspect; message; end # just for irb debugo
|
123
|
+
end
|
124
|
+
class UnexpectedEndOfInput < ParseFailure;
|
125
|
+
def message; <<-EOS.gsub(/^ /,'').gsub("\n",' ')
|
126
|
+
sorry, unexpected end of input. I was
|
127
|
+
expecting you to say #{RangeOf.join(tree.expecting.uniq,', ',' or ')}.
|
128
|
+
EOS
|
129
|
+
end
|
130
|
+
end
|
131
|
+
class UnexpectedInput < ParseFailure;
|
132
|
+
def message
|
133
|
+
%{sorry, i don't know what you mean by "#{@info[:token]}". } +
|
134
|
+
((ex = tree.expecting.uniq).size == 0 ? %{i wasn't expecting any more input.} :
|
135
|
+
%{i was expecting you to say #{RangeOf.join(ex,', ',' or ')}.})
|
136
|
+
end
|
137
|
+
end
|
138
|
+
module PipeHack
|
139
|
+
def |(other)
|
140
|
+
if self.instance_of? RangeOf
|
141
|
+
raise UsageFailure("no") unless self.is_pipe_hack
|
142
|
+
self << other
|
143
|
+
ret = self
|
144
|
+
else
|
145
|
+
ret = RangeOf.new((1..1),[self,other])
|
146
|
+
ret.is_pipe_hack = true
|
147
|
+
end
|
148
|
+
ret
|
149
|
+
end
|
150
|
+
end
|
151
|
+
module FixnumOfHack # note that IRL, this will rarely be used for other than 1
|
152
|
+
def of *args
|
153
|
+
return RangeOf.new((1..1), args)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
module RangeOfHack
|
157
|
+
def of *args
|
158
|
+
return RangeOf.new(self, args)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
module SymbolHack
|
162
|
+
def =~ (symbol_data)
|
163
|
+
return super unless ( grammar = Runtime.instance.current_grammar )
|
164
|
+
grammar[self] = GorillaSymbol.factory symbol_data
|
165
|
+
end
|
166
|
+
def [] (*symbol_data)
|
167
|
+
return super unless Runtime.instance.current_grammar
|
168
|
+
symbol_data = symbol_data[0] if symbol_data.size == 1
|
169
|
+
new_symbol = self.=~(symbol_data)
|
170
|
+
return SymbolReference.new self
|
171
|
+
end
|
172
|
+
end
|
173
|
+
module ParseTree; def is_error?; false end end
|
174
|
+
module GorillaSymbol # @abstract base
|
175
|
+
def self.factory obj # maps data structures to symbols. returns same object or new
|
176
|
+
case obj
|
177
|
+
when GorillaSymbol then obj # this one must stay on top!
|
178
|
+
when Array then Sequence.new(*obj)
|
179
|
+
# note RangeOf is never constructed directly with factory()
|
180
|
+
when Symbol then SymbolReference.new obj
|
181
|
+
when String then obj.extend StringTerminal
|
182
|
+
when Regexp then obj.extend RegexpTerminal
|
183
|
+
else
|
184
|
+
raise UsageFailure.new %{Can't determine symbol type for "#{obj.inspect}"},:obj=>obj
|
185
|
+
end
|
186
|
+
end
|
187
|
+
def finalize; self; end
|
188
|
+
attr_accessor :name
|
189
|
+
attr_reader :kleene
|
190
|
+
def natural_name; name ? name.to_s.gsub('_',' ') : nil; end
|
191
|
+
def fork_for_parse; Marshal.load Marshal.dump(self); end # note 1
|
192
|
+
def reinit_for_parse; extend ParseTree end
|
193
|
+
def prune! names=[]; a=(names&instance_variables); a.each{ |x| remove_instance_variable x }; a.size end
|
194
|
+
def dereference; self; end
|
195
|
+
end
|
196
|
+
module TerminalSymbol;
|
197
|
+
include GorillaSymbol;
|
198
|
+
attr_reader :status
|
199
|
+
def inspect; [@name ? @name.inspect : nil, super, %{(#{@token ? '.' : '_' })}].compact.join; end
|
200
|
+
def pretty_print q
|
201
|
+
q.text inspect
|
202
|
+
q.group 1,'{','}' do
|
203
|
+
instance_variables.sort.each do |n|
|
204
|
+
next if instance_variable_get(n).nil?
|
205
|
+
q.text %{#{n}=}; q.pp instance_variable_get(n); q.text(';'); q.breakable
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
def recurse █ yield self end
|
210
|
+
def tokens; [@token]; end
|
211
|
+
end
|
212
|
+
module NonTerminalSymbol;
|
213
|
+
include GorillaSymbol;
|
214
|
+
def [] (i); super(i) ? super(i) : @group[i]; end
|
215
|
+
def size; kind_of?(ParseTree) ? super : @group.size; end
|
216
|
+
def == other
|
217
|
+
kind_of?(ParseTree) ? builtin_equality(other) :
|
218
|
+
(other.class == self.class && @name == @name and @group == other.group)
|
219
|
+
end
|
220
|
+
attr_reader :group
|
221
|
+
def _inspect left, right, name = nil
|
222
|
+
name = %{:#{@name}} if name.nil? and @name
|
223
|
+
[name, left,
|
224
|
+
(0..(@group ? @group.size : self.size)-1).map{|i| (self[i] ? self[i] : @group[i]).inspect}.join(', '),
|
225
|
+
right ].compact.join
|
226
|
+
end
|
227
|
+
def pretty_print q
|
228
|
+
names = instance_variables.sort{|a,b| (a=='@group') ? -1 : (b=='@group' ? 1 : a<=>b)}
|
229
|
+
names.delete_if{|x| instance_variable_get(x).nil?} # it may have been pruned
|
230
|
+
q.group 1,'{','}' do
|
231
|
+
q.breakable
|
232
|
+
# show any non-nil properties of this object
|
233
|
+
names.each do |name|
|
234
|
+
q.text %{#{name}:}
|
235
|
+
q.pp instance_variable_get(name)
|
236
|
+
q.breakable
|
237
|
+
end
|
238
|
+
# show any captured children of this object (if it's parsed something)
|
239
|
+
each_with_index do |child, i|
|
240
|
+
q.text %{#{i}=>}
|
241
|
+
q.pp child
|
242
|
+
q.breakable
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
def status; raise 'no' unless @status; @status end
|
247
|
+
def _status=(smiley); raise 'no' unless smiley.kind_of? Symbol; @status = smiley end
|
248
|
+
def recurse &block
|
249
|
+
sum_like = yield self
|
250
|
+
self.each{ |x| if x.respond_to?(:recurse) then sum_like += x.recurse(&block) end }
|
251
|
+
sum_like
|
252
|
+
end
|
253
|
+
def tokens; [] end # for use in a call to recurse e.g. non_terminal.recurse{|x| x.tokens}
|
254
|
+
end
|
255
|
+
module StringTerminal # this could also be considered a special case of regexp terminal
|
256
|
+
include TerminalSymbol
|
257
|
+
def match token
|
258
|
+
status = if (self==token)
|
259
|
+
@token = token
|
260
|
+
(:>)
|
261
|
+
else
|
262
|
+
(:C)
|
263
|
+
end
|
264
|
+
@status = status
|
265
|
+
end
|
266
|
+
def expecting; [%{"#{self}"}]; end
|
267
|
+
end
|
268
|
+
module RegexpTerminal
|
269
|
+
include TerminalSymbol
|
270
|
+
Grammar.register_shorthand :regexp, self
|
271
|
+
def self.construct_from_shorthand name, *args
|
272
|
+
args[0].extend self
|
273
|
+
end
|
274
|
+
def [](capture_offset); @captures[capture_offset]; end
|
275
|
+
def expecting; @name ? [natural_name] : [self.inspect]; end
|
276
|
+
def match token # cleaned up for note 5 @ 11/27 11:39 am
|
277
|
+
@status = if (md = super(token))
|
278
|
+
@captures = md.captures if (md.captures.size>0)
|
279
|
+
@token = token
|
280
|
+
(:>)
|
281
|
+
else
|
282
|
+
(:C)
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
class SymbolReference
|
287
|
+
include GorillaSymbol
|
288
|
+
def inspect; %{::#{@name}}; end
|
289
|
+
def pp q; q.text(inspect); end
|
290
|
+
def initialize symbol
|
291
|
+
@name = symbol
|
292
|
+
@grammar_name = Runtime.current_grammar!.name
|
293
|
+
end
|
294
|
+
def dereference;
|
295
|
+
@actual ||= Runtime.instance.get_grammar(@grammar_name)[@name].fork_for_parse.reinit_for_parse
|
296
|
+
end
|
297
|
+
def dereference_light
|
298
|
+
Runtime.instance.get_grammar(@grammar_name)[@name]
|
299
|
+
end
|
300
|
+
[:kleene, :expecting, :reinit_for_parse].each do |name|
|
301
|
+
define_method(name){ dereference_light.send(name) }
|
302
|
+
end
|
303
|
+
end
|
304
|
+
module CanParse
|
305
|
+
def parse tokens
|
306
|
+
tree = self.fork_for_parse.reinit_for_parse
|
307
|
+
while token = tokens.shift and tree.match(token).accepting?; end
|
308
|
+
if (:C) == tree.status or (tokens.size > 0) # xtra
|
309
|
+
UnexpectedInput.new :token=>(:C==tree.status ? token : tokens.first), :tree=>tree
|
310
|
+
elsif ! tree.status.satisfied?
|
311
|
+
UnexpectedEndOfInput.new :tree=>tree
|
312
|
+
else
|
313
|
+
tree.finalize; #tree.prune!
|
314
|
+
end
|
315
|
+
end
|
316
|
+
end
|
317
|
+
class Sequence < Array
|
318
|
+
alias_method :builtin_equality, :==
|
319
|
+
include CanParse, NonTerminalSymbol, PipeHack
|
320
|
+
Grammar.register_shorthand :sequence, self
|
321
|
+
def self.construct_from_shorthand(name, *args); self.new(*args); end
|
322
|
+
def initialize *args
|
323
|
+
@index = 0 # we use this to report expecting whether or not we are a parse
|
324
|
+
raise GrammarGrammarException.new "Arguments must be non-zero length" unless args.size > 0
|
325
|
+
@group = args.map{|x| GorillaSymbol.factory x }
|
326
|
+
end
|
327
|
+
def reinit_for_parse;
|
328
|
+
super
|
329
|
+
@stop_here = @group.size;
|
330
|
+
num = @group.reverse.map{|x|
|
331
|
+
x.reinit_for_parse
|
332
|
+
x.kleene
|
333
|
+
}.find_index{|x| x==false || x.nil? } || @group.size
|
334
|
+
@satisfied_at = @group.size - num # trailing children that can be zero length affect when we are satisfied.
|
335
|
+
@status = (@satisfied_at==@index) ? :D : :O
|
336
|
+
self
|
337
|
+
end
|
338
|
+
def inspect; _inspect '[',']'; end
|
339
|
+
def finalize; _advance if @child; self; end
|
340
|
+
def prune!; super %w(@group @index @satisfied_at @status @stop_here @kleene) end
|
341
|
+
def expecting # cleanup @fixme @todo. this was some genetic programming
|
342
|
+
return [] if self.status == :>
|
343
|
+
child = @child || self.last # might be nil if nothing was grabbed
|
344
|
+
expecting = []
|
345
|
+
if (child) # if we were able to parse at least one token
|
346
|
+
if ((index=@index-1) >= 0) # go backwards, reporting expected from any kleene closures
|
347
|
+
(index..0).each do
|
348
|
+
if (self[index].kleene) # kleeneup to use find_index
|
349
|
+
expecting |= self[index].expecting
|
350
|
+
end
|
351
|
+
end
|
352
|
+
end
|
353
|
+
expecting += child.expecting unless child.status == :> # report the expecting tokens from the current symbol ()
|
354
|
+
end
|
355
|
+
index = size
|
356
|
+
# index = child ? ( @index + 1 ) : @index # whether or not we got to a token,
|
357
|
+
#index = @index
|
358
|
+
if ((!child or child.kleene or child.status.satisfied?) and @group and index < @group.size)
|
359
|
+
begin # go forward reporting the expecting from any kleene closures
|
360
|
+
expecting |= @group[index].expecting
|
361
|
+
break unless @group[index].kleene
|
362
|
+
end while((index+=1)<@group.size)
|
363
|
+
end
|
364
|
+
expecting << "an end to the phrase" if (index == @group.size)
|
365
|
+
expecting
|
366
|
+
end
|
367
|
+
def _advance
|
368
|
+
# @child.prune! unless @child.kleene #@todo
|
369
|
+
self << remove_instance_variable('@child') # child must be :> (if the child was :D we should keep it)
|
370
|
+
case (@index += 1)
|
371
|
+
when @stop_here then (:>) # iff we just finished the last child (there is no lookahead note 5)
|
372
|
+
when @satisfied_at then (:D)
|
373
|
+
else (:O)
|
374
|
+
end
|
375
|
+
end
|
376
|
+
def match token
|
377
|
+
while true
|
378
|
+
@child ||= @group[@index].dereference; # @group[@index] = nil; save it for prune
|
379
|
+
child_prev_status = @child.status
|
380
|
+
child_status = @child.match token
|
381
|
+
self._status = case child_status
|
382
|
+
when :> then _advance
|
383
|
+
when :O then :O
|
384
|
+
when :D then ((@index+1)>=@satisfied_at) ? :D : :O
|
385
|
+
when :C
|
386
|
+
if (child_prev_status == :D)
|
387
|
+
self._status = _advance
|
388
|
+
next if @status.accepting?
|
389
|
+
:C
|
390
|
+
else
|
391
|
+
:C
|
392
|
+
end
|
393
|
+
else; raise GorillaException.new('symbol returned bad status')
|
394
|
+
end # case
|
395
|
+
break; # break out of infinite loop
|
396
|
+
end # infinite loop
|
397
|
+
@status
|
398
|
+
end # def match
|
399
|
+
end # Sequence
|
400
|
+
class MoreOneOff
|
401
|
+
Grammar.register_shorthand :more, self
|
402
|
+
def self.construct_from_shorthand(a,*b); Infinity; end
|
403
|
+
end
|
404
|
+
class RangeOf < Array
|
405
|
+
alias_method :builtin_equality, :==
|
406
|
+
include CanParse, NonTerminalSymbol, PipeHack
|
407
|
+
[:zero_or_more,:one_or_more,:zero_or_one,:one,:range_of].each do |name|
|
408
|
+
Grammar.register_shorthand name, self
|
409
|
+
end
|
410
|
+
def self.construct_from_shorthand name, *args; self.new name, args; end
|
411
|
+
def initialize name, args
|
412
|
+
unless args.size > 0
|
413
|
+
raise GrammarGrammarException.new "Arguments must be non-zero length"
|
414
|
+
end
|
415
|
+
@range = name.instance_of?(Range) ? name : case name
|
416
|
+
when :zero_or_more then (0..Infinity)
|
417
|
+
when :one_or_more then (1..Infinity)
|
418
|
+
when :zero_or_one then (0..1)
|
419
|
+
when :one then (1..1)
|
420
|
+
when :range_of then args.shift
|
421
|
+
else raise UsageFailure.new(%{invalid name string "#{name}"})
|
422
|
+
end
|
423
|
+
raise UsageFailure.new("must be range") unless @range.instance_of? Range
|
424
|
+
@group = args.map{|x| GorillaSymbol.factory x }
|
425
|
+
end
|
426
|
+
attr_reader :range
|
427
|
+
attr_accessor :is_pipe_hack
|
428
|
+
def reinit_for_parse
|
429
|
+
super
|
430
|
+
@group.each{ |x| x.reinit_for_parse; @unkleene = true unless x.kleene }
|
431
|
+
@kleene = @range.begin == 0 || ! @unkleene
|
432
|
+
@status = @kleene ? :D : :O
|
433
|
+
@frame_prototype = Marshal.dump @group
|
434
|
+
_reframe
|
435
|
+
self
|
436
|
+
end
|
437
|
+
def prune!; super %w(@frame @frame_prototype @range @group @kleene @unkleene) end
|
438
|
+
def << jobber # for PipeHack. code smell (:note 1)
|
439
|
+
if kind_of?(ParseTree) then super jobber
|
440
|
+
else; @group << GorillaSymbol.factory(jobber); end
|
441
|
+
end
|
442
|
+
def expecting; (@frame || @group || []).map{ |x| x.expecting }.flatten end
|
443
|
+
def inspect;
|
444
|
+
_inspect '(',')',[@name ? %{:#{@name}} : nil , '(', @range.to_s.gsub('..Infinity',' or more'),'):'].compact.join
|
445
|
+
end
|
446
|
+
def match token
|
447
|
+
@status = nil
|
448
|
+
statii = Hash.new(){ |h,k| h[k] = [] }
|
449
|
+
@frame.each { |symbol| status = symbol.match(token); statii[status] << symbol }
|
450
|
+
if statii[:C].size == @frame.size then @status = :C
|
451
|
+
else case statii[:>].size
|
452
|
+
when 2..Infinity then raise AmbiguousGrammar.new(:parse => self, :children => statii[:>] )
|
453
|
+
when 1 then @status = _advance(statii[:>][0])
|
454
|
+
when 0 #fallthru
|
455
|
+
end end
|
456
|
+
# past this point we know that zero are :> and not all are :C, so some must be :O or :D
|
457
|
+
if @status.nil?
|
458
|
+
@frame.delete_if{ |x| ! x.status.accepting? }
|
459
|
+
@status = @frame.select{|x| x.status == :D }.count == @frame.size ? :D : :O
|
460
|
+
end
|
461
|
+
@status
|
462
|
+
end
|
463
|
+
def _advance object
|
464
|
+
self << object # (object.kleene ? object : object.prune!)
|
465
|
+
case size
|
466
|
+
when @range.end then :>
|
467
|
+
when @range then _reframe; :D
|
468
|
+
else; _reframe; :O
|
469
|
+
end
|
470
|
+
end
|
471
|
+
def _reframe; @frame = (Marshal.load @frame_prototype).map{|x| x.kind_of?(SymbolReference) ? x.dereference : x}; end
|
472
|
+
## @fixme this is waiting for unparse()
|
473
|
+
def self.join list, conj1, conj2, &block
|
474
|
+
list.map!(&block) if block
|
475
|
+
case list.size
|
476
|
+
when 0 then ''
|
477
|
+
when 1 then list[0]
|
478
|
+
else
|
479
|
+
joiners = ['',conj2]
|
480
|
+
joiners += Array.new(list.size-2,conj1) if list.size >= 3
|
481
|
+
list.zip(joiners.reverse).flatten.join
|
482
|
+
end
|
483
|
+
end
|
484
|
+
end # RangeOf
|
485
|
+
end
|
486
|
+
end
|
487
|
+
# note 1 having grammar nodes as parse tree nodes. is it code smell?
|
488
|
+
# note 3 (resolved - we use them now) consider getting rid of unused base classes
|
489
|
+
# note 5 peeking isn't even used at this point
|
490
|
+
# note 6 you might use to_s for unparse
|
491
|
+
# note 7 todo: descention from regexp to string or vice versa,
|
492
|
+
# note 8 one day we might have set-like RangeOfs that .., note 9 rangeof forks, sequence just inits group
|