mkbison 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +7 -0
- data/TODO.txt +5 -0
- data/bin/mkbison +71 -0
- data/bison_parser.rby +206 -0
- data/ext/bison_parser/bison_parser.c +1862 -0
- data/ext/bison_parser/bison_parser.y +295 -0
- data/ext/bison_parser/extconf.rb +9 -0
- data/lib/bison.rb +14 -0
- data/lib/bison/action.rb +63 -0
- data/lib/bison/grammar_file.rb +59 -0
- data/lib/bison/nonterminal.rb +17 -0
- data/lib/bison/rule.rb +11 -0
- data/lib/bison/sequence.rb +19 -0
- data/lib/bison/token.rb +20 -0
- data/lib/bison/version.rb +3 -0
- data/lib/bison_parser.rb +123 -0
- data/lib/bison_parser/actions.rb +81 -0
- data/lib/bison_parser/base.rb +49 -0
- data/mkbison.gemspec +24 -0
- data/templates/actions.rb.erb +10 -0
- data/templates/base.rb.erb +49 -0
- data/templates/class.rb.erb +10 -0
- data/templates/extconf.rb.erb +9 -0
- data/templates/parser.y.erb +123 -0
- metadata +116 -0
data/lib/bison/rule.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Bison
|
2
|
+
class Sequence
|
3
|
+
attr_accessor :rule, :index
|
4
|
+
attr_reader :elements
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@elements = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def <<(element)
|
11
|
+
if Bison::Action === element
|
12
|
+
element.predecessors = elements.clone
|
13
|
+
end
|
14
|
+
element.sequence = self
|
15
|
+
elements << element
|
16
|
+
self
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/bison/token.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Bison
|
2
|
+
class Token
|
3
|
+
attr_accessor :name
|
4
|
+
attr_accessor :number
|
5
|
+
attr_accessor :associativity
|
6
|
+
|
7
|
+
def initialize(name, assoc=nil)
|
8
|
+
self.name = name
|
9
|
+
self.associativity = assoc
|
10
|
+
end
|
11
|
+
|
12
|
+
def left?
|
13
|
+
associativity == :left
|
14
|
+
end
|
15
|
+
|
16
|
+
def right?
|
17
|
+
associativity == :right
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/bison_parser.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
|
2
|
+
class BisonParser
|
3
|
+
attr_accessor :section
|
4
|
+
|
5
|
+
def lex
|
6
|
+
x = real_lex
|
7
|
+
Tokens.constants.each do |const|
|
8
|
+
if Tokens.const_get(const) == x
|
9
|
+
warn "Lex'd #{const}\t: #{lex_value.inspect}" if ENV['DEBUG_GRAMMAR']
|
10
|
+
return x
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
warn "Lex'd #{x.inspect}" if ENV['DEBUG_GRAMMAR']
|
15
|
+
|
16
|
+
x
|
17
|
+
end
|
18
|
+
|
19
|
+
def real_lex
|
20
|
+
self.section ||= 0
|
21
|
+
self.lex_value = nil
|
22
|
+
|
23
|
+
if section == 2
|
24
|
+
self.lex_value = io.read
|
25
|
+
self.section += 2
|
26
|
+
return Tokens::ACTIONS
|
27
|
+
end
|
28
|
+
|
29
|
+
# skip space
|
30
|
+
while true
|
31
|
+
while (c = self.read) && c =~ /\s/
|
32
|
+
end
|
33
|
+
|
34
|
+
if c == '#'
|
35
|
+
while (char = self.read) && char != "\n"
|
36
|
+
end
|
37
|
+
else
|
38
|
+
break
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
return nil unless c
|
43
|
+
|
44
|
+
case c
|
45
|
+
when ':'
|
46
|
+
return Tokens::COLON
|
47
|
+
when ';'
|
48
|
+
return Tokens::SEMICOLON
|
49
|
+
when '|'
|
50
|
+
return Tokens::PIPE
|
51
|
+
when '%'
|
52
|
+
if self.peak == '%'
|
53
|
+
self.read
|
54
|
+
self.section += 1
|
55
|
+
return Tokens::DOUBLE_HASH
|
56
|
+
end
|
57
|
+
return Tokens::HASH
|
58
|
+
when '['
|
59
|
+
return Tokens::LBRACK
|
60
|
+
when ']'
|
61
|
+
return Tokens::RBRACK
|
62
|
+
when '{'
|
63
|
+
nesting = 1
|
64
|
+
action = ''
|
65
|
+
while (c = self.read) && nesting > 0
|
66
|
+
nesting += 1 if c == '{'
|
67
|
+
nesting -= 1 if c == '}'
|
68
|
+
action << c unless nesting.zero?
|
69
|
+
end
|
70
|
+
self.lex_value = action
|
71
|
+
return Tokens::ACTIONS
|
72
|
+
when '0'..'9'
|
73
|
+
number = c
|
74
|
+
while (c = self.peak) && ('0'..'9').include?(c)
|
75
|
+
number << self.read
|
76
|
+
end
|
77
|
+
self.lex_value = number.to_i
|
78
|
+
return Tokens::NUMBER
|
79
|
+
when '"'
|
80
|
+
string = ''
|
81
|
+
while (c = self.read) && c != '"'
|
82
|
+
string << c
|
83
|
+
end
|
84
|
+
self.lex_value = string
|
85
|
+
return Tokens::STRING
|
86
|
+
when "'"
|
87
|
+
string = ''
|
88
|
+
while (c = self.read) && c != "'"
|
89
|
+
string << c
|
90
|
+
end
|
91
|
+
self.lex_value = string
|
92
|
+
return Tokens::STRING
|
93
|
+
end
|
94
|
+
|
95
|
+
if c =~ /\w/
|
96
|
+
string = c
|
97
|
+
while (c = self.peak) && c =~ /\w/
|
98
|
+
self.read
|
99
|
+
string << c
|
100
|
+
end
|
101
|
+
|
102
|
+
if section.zero? && string == 'token'
|
103
|
+
return Tokens::KW_TOKEN
|
104
|
+
elsif section.zero? && string == 'left'
|
105
|
+
return Tokens::KW_LEFT
|
106
|
+
elsif section.zero? && string == 'right'
|
107
|
+
return Tokens::KW_RIGHT
|
108
|
+
else
|
109
|
+
self.lex_value = string
|
110
|
+
return Tokens::IDENTIFIER
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
warn "Yielding literal #{c.inspect}"
|
115
|
+
|
116
|
+
return c.ord
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
require 'bison_parser/base'
|
122
|
+
require 'bison_parser/actions'
|
123
|
+
require 'bison_parser/bison_parser'
|
@@ -0,0 +1,81 @@
|
|
1
|
+
class BisonParser
|
2
|
+
class Actions
|
3
|
+
attr_accessor :parser, :result
|
4
|
+
|
5
|
+
def _0_grammar_file(tokens, rules, code)
|
6
|
+
self.result = Bison::GrammarFile.new(tokens, rules, code)
|
7
|
+
end
|
8
|
+
|
9
|
+
def _0_optional_code()
|
10
|
+
nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def _1_optional_code(actions)
|
14
|
+
actions
|
15
|
+
end
|
16
|
+
|
17
|
+
def _0_token_list()
|
18
|
+
[]
|
19
|
+
end
|
20
|
+
|
21
|
+
def _1_token_list(list, token)
|
22
|
+
list << token
|
23
|
+
end
|
24
|
+
|
25
|
+
def _0_token(name)
|
26
|
+
Bison::Token.new(name)
|
27
|
+
end
|
28
|
+
|
29
|
+
def _1_token(name)
|
30
|
+
Bison::Token.new(name, :left)
|
31
|
+
end
|
32
|
+
|
33
|
+
def _2_token(name)
|
34
|
+
Bison::Token.new(name, :right)
|
35
|
+
end
|
36
|
+
|
37
|
+
def _3_token(token, num)
|
38
|
+
token.tap{ |t| t.number = num }
|
39
|
+
end
|
40
|
+
|
41
|
+
def _0_grammar_rules()
|
42
|
+
[]
|
43
|
+
end
|
44
|
+
|
45
|
+
def _1_grammar_rules(list, rule)
|
46
|
+
list << rule
|
47
|
+
end
|
48
|
+
|
49
|
+
def _0_grammar_rule(name, components)
|
50
|
+
Bison::Rule.new(name, components).tap{ |r| r.location = @name }
|
51
|
+
end
|
52
|
+
|
53
|
+
def _0_components(sequence)
|
54
|
+
[sequence]
|
55
|
+
end
|
56
|
+
|
57
|
+
def _1_components(sequences, sequence)
|
58
|
+
sequences << sequence
|
59
|
+
end
|
60
|
+
|
61
|
+
def _0_sequence()
|
62
|
+
Bison::Sequence.new
|
63
|
+
end
|
64
|
+
|
65
|
+
def _1_sequence(sequence, code)
|
66
|
+
sequence << Bison::Action.new(code).tap{ |a| a.location = @code }
|
67
|
+
end
|
68
|
+
|
69
|
+
def _2_sequence(sequence, follower)
|
70
|
+
sequence << Bison::Nonterminal.new(follower).tap{ |x| x.location = @follower }
|
71
|
+
end
|
72
|
+
|
73
|
+
def _3_sequence(sequence, follower, tag)
|
74
|
+
sequence << Bison::Nonterminal.new(follower, tag).tap{ |x| x.location = @follower }
|
75
|
+
end
|
76
|
+
|
77
|
+
def _4_sequence(sequence, follower)
|
78
|
+
sequence << Bison::String.new(follower).tap{ |x| x.location = @follower }
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class BisonParser
|
2
|
+
attr_reader :io
|
3
|
+
attr_accessor :lex_value, :token_row, :token_col, :row, :col
|
4
|
+
attr_accessor :source, :result
|
5
|
+
|
6
|
+
module Base
|
7
|
+
def initialize(io)
|
8
|
+
if String === io
|
9
|
+
io = ::File.open(io, 'r')
|
10
|
+
end
|
11
|
+
@source = io.respond_to?(:path) ? io.path : nil
|
12
|
+
@io, @row, @col = io, 1, 0
|
13
|
+
end
|
14
|
+
|
15
|
+
def read
|
16
|
+
io.read(1).tap do |c|
|
17
|
+
if c == "\n"
|
18
|
+
self.row += 1
|
19
|
+
self.col = 0
|
20
|
+
elsif c
|
21
|
+
self.col += 1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def peak
|
27
|
+
io.read(1).tap{ |c| io.ungetc(c) if c }
|
28
|
+
end
|
29
|
+
|
30
|
+
def begin_token
|
31
|
+
self.token_row = row
|
32
|
+
self.token_col = col
|
33
|
+
end
|
34
|
+
|
35
|
+
def error(msg, row, col)
|
36
|
+
raise Error.new(msg, source, row, col)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
include Base
|
41
|
+
|
42
|
+
class Error < ::Exception
|
43
|
+
attr_reader :message
|
44
|
+
def initialize(msg, source, row, col)
|
45
|
+
source ||= '-'
|
46
|
+
@message = "#{source}:#{row}.#{col} #{msg}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/mkbison.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
lib = File.expand_path('../lib', __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require 'bison/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "mkbison"
|
7
|
+
spec.version = Bison::VERSION
|
8
|
+
spec.authors = ["Peter Woo"]
|
9
|
+
spec.email = ["peter@wioux.net"]
|
10
|
+
spec.summary = %q{Tool to generate bison parser C extensions}
|
11
|
+
spec.homepage = ""
|
12
|
+
spec.license = "MIT"
|
13
|
+
|
14
|
+
spec.files = `git ls-files -z`.split("\x0")
|
15
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
+
spec.require_paths = ["lib"]
|
18
|
+
|
19
|
+
spec.extensions = %w[ext/bison_parser/extconf.rb]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "rake-compiler"
|
24
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class <%= name %>
|
2
|
+
class Actions
|
3
|
+
attr_accessor :parser, :result
|
4
|
+
<% rules.map(&:components).flatten.map(&:elements).flatten.grep(Bison::Action).each_with_index do |action, i| %>
|
5
|
+
def <%= action.name %>(<%= action.predecessor_tags.values.join(', ') %>)
|
6
|
+
<%= action.code.strip %>
|
7
|
+
end
|
8
|
+
<% end -%>
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class <%= name %>
|
2
|
+
attr_reader :io
|
3
|
+
attr_accessor :lex_value, :token_row, :token_col, :row, :col
|
4
|
+
attr_accessor :source, :result
|
5
|
+
|
6
|
+
module Base
|
7
|
+
def initialize(io)
|
8
|
+
if String === io
|
9
|
+
io = ::File.open(io, 'r')
|
10
|
+
end
|
11
|
+
@source = io.respond_to?(:path) ? io.path : nil
|
12
|
+
@io, @row, @col = io, 1, 0
|
13
|
+
end
|
14
|
+
|
15
|
+
def read
|
16
|
+
io.read(1).tap do |c|
|
17
|
+
if c == "\n"
|
18
|
+
self.row += 1
|
19
|
+
self.col = 0
|
20
|
+
elsif c
|
21
|
+
self.col += 1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def peak
|
27
|
+
io.read(1).tap{ |c| io.ungetc(c) if c }
|
28
|
+
end
|
29
|
+
|
30
|
+
def begin_token
|
31
|
+
self.token_row = row
|
32
|
+
self.token_col = col
|
33
|
+
end
|
34
|
+
|
35
|
+
def error(msg, row, col)
|
36
|
+
raise Error.new(msg, source, row, col)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
include Base
|
41
|
+
|
42
|
+
class Error < ::Exception
|
43
|
+
attr_reader :message
|
44
|
+
def initialize(msg, source, row, col)
|
45
|
+
source ||= '-'
|
46
|
+
@message = "#{source}:#{row}.#{col} #{msg}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
|
2
|
+
<% tokens.each do |token| -%>
|
3
|
+
<% if token.number -%>
|
4
|
+
%token <%= token.name %><%= "\t" * (4 - token.name.length/8)%><%= token.number %>
|
5
|
+
<% else -%>
|
6
|
+
%token <%= token.name %>
|
7
|
+
<% end -%>
|
8
|
+
<% end -%>
|
9
|
+
|
10
|
+
<% tokens.select{|t| t.left? || t.right?}.each do |token| -%>
|
11
|
+
<%= token.left? ? '%left' : '%right' %> <%= token.name %>
|
12
|
+
<% end -%>
|
13
|
+
|
14
|
+
%define api.pure true
|
15
|
+
%define parse.error verbose
|
16
|
+
%parse-param { VALUE __actions }
|
17
|
+
%lex-param { VALUE __actions }
|
18
|
+
%locations
|
19
|
+
|
20
|
+
%{
|
21
|
+
#include <ruby.h>
|
22
|
+
#define YYSTYPE VALUE
|
23
|
+
%}
|
24
|
+
|
25
|
+
%code provides {
|
26
|
+
static int yylex(YYSTYPE *, YYLTYPE *, VALUE);
|
27
|
+
static void yyerror(YYLTYPE *, VALUE, const char *);
|
28
|
+
}
|
29
|
+
|
30
|
+
%%
|
31
|
+
|
32
|
+
<% rules.each do |rule| -%>
|
33
|
+
<%= rule.name %>:
|
34
|
+
<% rule.components.each_with_index do |seq, i| -%>
|
35
|
+
<%= "|\n" unless i.zero? -%>
|
36
|
+
<%= ' '+seq.elements.map(&:to_bison).join(' ') unless seq.elements.empty? %>
|
37
|
+
<% if !(Bison::Action === seq.elements[-1]) -%>
|
38
|
+
<% if seq.elements.grep(Bison::Nonterminal).empty? -%>
|
39
|
+
{ $$ = Qnil; }
|
40
|
+
<% else -%>
|
41
|
+
{ $$ = $<%= seq.elements.find_index{ |x| Bison::Nonterminal === x } + 1%>; }
|
42
|
+
<% end %>
|
43
|
+
<% end -%>
|
44
|
+
<% end -%>
|
45
|
+
;
|
46
|
+
|
47
|
+
<% end -%>
|
48
|
+
|
49
|
+
%%
|
50
|
+
|
51
|
+
static VALUE c<%= name %>;
|
52
|
+
static VALUE c<%= name %>Tokens;
|
53
|
+
static VALUE c<%= name %>Actions;
|
54
|
+
|
55
|
+
static VALUE <%= uname %>_parse(VALUE);
|
56
|
+
|
57
|
+
void Init_<%= uname %>(void) {
|
58
|
+
c<%= name %> = rb_define_class("<%= name %>", rb_cObject);
|
59
|
+
c<%= name %>Tokens = rb_define_module_under(c<%= name %>, "Tokens");
|
60
|
+
c<%= name %>Actions = rb_define_class_under(c<%= name %>, "Actions", rb_cObject);
|
61
|
+
|
62
|
+
<% tokens.each do |token| -%>
|
63
|
+
rb_define_const(c<%= name %>Tokens, "<%= token.name %>", INT2FIX(<%= token.name %>));
|
64
|
+
<% end -%>
|
65
|
+
|
66
|
+
rb_define_method(c<%= name %>, "parse", <%= uname %>_parse, 0);
|
67
|
+
}
|
68
|
+
|
69
|
+
VALUE <%= uname %>_parse(VALUE self) {
|
70
|
+
VALUE actions = rb_funcall(c<%= name %>Actions, rb_intern("new"), 0);
|
71
|
+
rb_funcall(actions, rb_intern("parser="), 1, self);
|
72
|
+
if (yyparse(actions))
|
73
|
+
return Qnil;
|
74
|
+
return rb_funcall(actions, rb_intern("result"), 0);
|
75
|
+
}
|
76
|
+
|
77
|
+
static void yyerror(YYLTYPE *loc, VALUE actions, const char *msg) {
|
78
|
+
VALUE parser = rb_funcall(actions, rb_intern("parser"), 0);
|
79
|
+
rb_funcall(parser, rb_intern("error"), 3,
|
80
|
+
rb_str_new_cstr(msg),
|
81
|
+
INT2FIX(loc->first_line),
|
82
|
+
INT2FIX(loc->first_column));
|
83
|
+
}
|
84
|
+
|
85
|
+
static int yylex(YYSTYPE *lval, YYLTYPE *lloc, VALUE actions) {
|
86
|
+
int c;
|
87
|
+
VALUE parser, value, vtok;
|
88
|
+
|
89
|
+
parser = rb_funcall(actions, rb_intern("parser"), 0);
|
90
|
+
|
91
|
+
rb_funcall(parser, rb_intern("lex_value="), 1, Qnil);
|
92
|
+
rb_funcall(parser, rb_intern("token_row="), 1, INT2FIX(lloc->last_line));
|
93
|
+
rb_funcall(parser, rb_intern("token_col="), 1, INT2FIX(lloc->last_column));
|
94
|
+
|
95
|
+
vtok = rb_funcall(parser, rb_intern("lex"), 0);
|
96
|
+
value = rb_funcall(parser, rb_intern("lex_value"), 0);
|
97
|
+
|
98
|
+
lloc->first_line = FIX2INT(rb_funcall(parser, rb_intern("token_row"), 0));
|
99
|
+
lloc->first_column = FIX2INT(rb_funcall(parser, rb_intern("token_col"), 0));
|
100
|
+
lloc->last_line = FIX2INT(rb_funcall(parser, rb_intern("row"), 0));
|
101
|
+
lloc->last_column = FIX2INT(rb_funcall(parser, rb_intern("col"), 0));
|
102
|
+
|
103
|
+
if (vtok == Qnil) {
|
104
|
+
*lval = Qnil;
|
105
|
+
// fprintf(stderr, "parse is nil\n");
|
106
|
+
return 0;
|
107
|
+
}
|
108
|
+
|
109
|
+
if (vtok & 1) {
|
110
|
+
*lval = value;
|
111
|
+
// fprintf(stderr, "parse is int: %d\n", FIX2INT(vtok));
|
112
|
+
return FIX2INT(vtok);
|
113
|
+
}
|
114
|
+
|
115
|
+
if (RBASIC(vtok)->klass == rb_cString) {
|
116
|
+
c = StringValueCStr(vtok)[0];
|
117
|
+
*lval = rb_sprintf("%c", c);
|
118
|
+
// fprintf(stderr, "parse is char: '%c' (%d)\n", c, (int)c);
|
119
|
+
return c;
|
120
|
+
}
|
121
|
+
|
122
|
+
return 0;
|
123
|
+
}
|