mkbison 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +7 -0
- data/TODO.txt +5 -0
- data/bin/mkbison +71 -0
- data/bison_parser.rby +206 -0
- data/ext/bison_parser/bison_parser.c +1862 -0
- data/ext/bison_parser/bison_parser.y +295 -0
- data/ext/bison_parser/extconf.rb +9 -0
- data/lib/bison.rb +14 -0
- data/lib/bison/action.rb +63 -0
- data/lib/bison/grammar_file.rb +59 -0
- data/lib/bison/nonterminal.rb +17 -0
- data/lib/bison/rule.rb +11 -0
- data/lib/bison/sequence.rb +19 -0
- data/lib/bison/token.rb +20 -0
- data/lib/bison/version.rb +3 -0
- data/lib/bison_parser.rb +123 -0
- data/lib/bison_parser/actions.rb +81 -0
- data/lib/bison_parser/base.rb +49 -0
- data/mkbison.gemspec +24 -0
- data/templates/actions.rb.erb +10 -0
- data/templates/base.rb.erb +49 -0
- data/templates/class.rb.erb +10 -0
- data/templates/extconf.rb.erb +9 -0
- data/templates/parser.y.erb +123 -0
- metadata +116 -0
data/lib/bison/rule.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Bison
|
2
|
+
class Sequence
|
3
|
+
attr_accessor :rule, :index
|
4
|
+
attr_reader :elements
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@elements = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def <<(element)
|
11
|
+
if Bison::Action === element
|
12
|
+
element.predecessors = elements.clone
|
13
|
+
end
|
14
|
+
element.sequence = self
|
15
|
+
elements << element
|
16
|
+
self
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/bison/token.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Bison
|
2
|
+
class Token
|
3
|
+
attr_accessor :name
|
4
|
+
attr_accessor :number
|
5
|
+
attr_accessor :associativity
|
6
|
+
|
7
|
+
def initialize(name, assoc=nil)
|
8
|
+
self.name = name
|
9
|
+
self.associativity = assoc
|
10
|
+
end
|
11
|
+
|
12
|
+
def left?
|
13
|
+
associativity == :left
|
14
|
+
end
|
15
|
+
|
16
|
+
def right?
|
17
|
+
associativity == :right
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/bison_parser.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
|
2
|
+
class BisonParser
|
3
|
+
attr_accessor :section
|
4
|
+
|
5
|
+
def lex
|
6
|
+
x = real_lex
|
7
|
+
Tokens.constants.each do |const|
|
8
|
+
if Tokens.const_get(const) == x
|
9
|
+
warn "Lex'd #{const}\t: #{lex_value.inspect}" if ENV['DEBUG_GRAMMAR']
|
10
|
+
return x
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
warn "Lex'd #{x.inspect}" if ENV['DEBUG_GRAMMAR']
|
15
|
+
|
16
|
+
x
|
17
|
+
end
|
18
|
+
|
19
|
+
def real_lex
|
20
|
+
self.section ||= 0
|
21
|
+
self.lex_value = nil
|
22
|
+
|
23
|
+
if section == 2
|
24
|
+
self.lex_value = io.read
|
25
|
+
self.section += 2
|
26
|
+
return Tokens::ACTIONS
|
27
|
+
end
|
28
|
+
|
29
|
+
# skip space
|
30
|
+
while true
|
31
|
+
while (c = self.read) && c =~ /\s/
|
32
|
+
end
|
33
|
+
|
34
|
+
if c == '#'
|
35
|
+
while (char = self.read) && char != "\n"
|
36
|
+
end
|
37
|
+
else
|
38
|
+
break
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
return nil unless c
|
43
|
+
|
44
|
+
case c
|
45
|
+
when ':'
|
46
|
+
return Tokens::COLON
|
47
|
+
when ';'
|
48
|
+
return Tokens::SEMICOLON
|
49
|
+
when '|'
|
50
|
+
return Tokens::PIPE
|
51
|
+
when '%'
|
52
|
+
if self.peak == '%'
|
53
|
+
self.read
|
54
|
+
self.section += 1
|
55
|
+
return Tokens::DOUBLE_HASH
|
56
|
+
end
|
57
|
+
return Tokens::HASH
|
58
|
+
when '['
|
59
|
+
return Tokens::LBRACK
|
60
|
+
when ']'
|
61
|
+
return Tokens::RBRACK
|
62
|
+
when '{'
|
63
|
+
nesting = 1
|
64
|
+
action = ''
|
65
|
+
while (c = self.read) && nesting > 0
|
66
|
+
nesting += 1 if c == '{'
|
67
|
+
nesting -= 1 if c == '}'
|
68
|
+
action << c unless nesting.zero?
|
69
|
+
end
|
70
|
+
self.lex_value = action
|
71
|
+
return Tokens::ACTIONS
|
72
|
+
when '0'..'9'
|
73
|
+
number = c
|
74
|
+
while (c = self.peak) && ('0'..'9').include?(c)
|
75
|
+
number << self.read
|
76
|
+
end
|
77
|
+
self.lex_value = number.to_i
|
78
|
+
return Tokens::NUMBER
|
79
|
+
when '"'
|
80
|
+
string = ''
|
81
|
+
while (c = self.read) && c != '"'
|
82
|
+
string << c
|
83
|
+
end
|
84
|
+
self.lex_value = string
|
85
|
+
return Tokens::STRING
|
86
|
+
when "'"
|
87
|
+
string = ''
|
88
|
+
while (c = self.read) && c != "'"
|
89
|
+
string << c
|
90
|
+
end
|
91
|
+
self.lex_value = string
|
92
|
+
return Tokens::STRING
|
93
|
+
end
|
94
|
+
|
95
|
+
if c =~ /\w/
|
96
|
+
string = c
|
97
|
+
while (c = self.peak) && c =~ /\w/
|
98
|
+
self.read
|
99
|
+
string << c
|
100
|
+
end
|
101
|
+
|
102
|
+
if section.zero? && string == 'token'
|
103
|
+
return Tokens::KW_TOKEN
|
104
|
+
elsif section.zero? && string == 'left'
|
105
|
+
return Tokens::KW_LEFT
|
106
|
+
elsif section.zero? && string == 'right'
|
107
|
+
return Tokens::KW_RIGHT
|
108
|
+
else
|
109
|
+
self.lex_value = string
|
110
|
+
return Tokens::IDENTIFIER
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
warn "Yielding literal #{c.inspect}"
|
115
|
+
|
116
|
+
return c.ord
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
require 'bison_parser/base'
|
122
|
+
require 'bison_parser/actions'
|
123
|
+
require 'bison_parser/bison_parser'
|
@@ -0,0 +1,81 @@
|
|
1
|
+
class BisonParser
|
2
|
+
class Actions
|
3
|
+
attr_accessor :parser, :result
|
4
|
+
|
5
|
+
def _0_grammar_file(tokens, rules, code)
|
6
|
+
self.result = Bison::GrammarFile.new(tokens, rules, code)
|
7
|
+
end
|
8
|
+
|
9
|
+
def _0_optional_code()
|
10
|
+
nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def _1_optional_code(actions)
|
14
|
+
actions
|
15
|
+
end
|
16
|
+
|
17
|
+
def _0_token_list()
|
18
|
+
[]
|
19
|
+
end
|
20
|
+
|
21
|
+
def _1_token_list(list, token)
|
22
|
+
list << token
|
23
|
+
end
|
24
|
+
|
25
|
+
def _0_token(name)
|
26
|
+
Bison::Token.new(name)
|
27
|
+
end
|
28
|
+
|
29
|
+
def _1_token(name)
|
30
|
+
Bison::Token.new(name, :left)
|
31
|
+
end
|
32
|
+
|
33
|
+
def _2_token(name)
|
34
|
+
Bison::Token.new(name, :right)
|
35
|
+
end
|
36
|
+
|
37
|
+
def _3_token(token, num)
|
38
|
+
token.tap{ |t| t.number = num }
|
39
|
+
end
|
40
|
+
|
41
|
+
def _0_grammar_rules()
|
42
|
+
[]
|
43
|
+
end
|
44
|
+
|
45
|
+
def _1_grammar_rules(list, rule)
|
46
|
+
list << rule
|
47
|
+
end
|
48
|
+
|
49
|
+
def _0_grammar_rule(name, components)
|
50
|
+
Bison::Rule.new(name, components).tap{ |r| r.location = @name }
|
51
|
+
end
|
52
|
+
|
53
|
+
def _0_components(sequence)
|
54
|
+
[sequence]
|
55
|
+
end
|
56
|
+
|
57
|
+
def _1_components(sequences, sequence)
|
58
|
+
sequences << sequence
|
59
|
+
end
|
60
|
+
|
61
|
+
def _0_sequence()
|
62
|
+
Bison::Sequence.new
|
63
|
+
end
|
64
|
+
|
65
|
+
def _1_sequence(sequence, code)
|
66
|
+
sequence << Bison::Action.new(code).tap{ |a| a.location = @code }
|
67
|
+
end
|
68
|
+
|
69
|
+
def _2_sequence(sequence, follower)
|
70
|
+
sequence << Bison::Nonterminal.new(follower).tap{ |x| x.location = @follower }
|
71
|
+
end
|
72
|
+
|
73
|
+
def _3_sequence(sequence, follower, tag)
|
74
|
+
sequence << Bison::Nonterminal.new(follower, tag).tap{ |x| x.location = @follower }
|
75
|
+
end
|
76
|
+
|
77
|
+
def _4_sequence(sequence, follower)
|
78
|
+
sequence << Bison::String.new(follower).tap{ |x| x.location = @follower }
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class BisonParser
|
2
|
+
attr_reader :io
|
3
|
+
attr_accessor :lex_value, :token_row, :token_col, :row, :col
|
4
|
+
attr_accessor :source, :result
|
5
|
+
|
6
|
+
module Base
|
7
|
+
def initialize(io)
|
8
|
+
if String === io
|
9
|
+
io = ::File.open(io, 'r')
|
10
|
+
end
|
11
|
+
@source = io.respond_to?(:path) ? io.path : nil
|
12
|
+
@io, @row, @col = io, 1, 0
|
13
|
+
end
|
14
|
+
|
15
|
+
def read
|
16
|
+
io.read(1).tap do |c|
|
17
|
+
if c == "\n"
|
18
|
+
self.row += 1
|
19
|
+
self.col = 0
|
20
|
+
elsif c
|
21
|
+
self.col += 1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def peak
|
27
|
+
io.read(1).tap{ |c| io.ungetc(c) if c }
|
28
|
+
end
|
29
|
+
|
30
|
+
def begin_token
|
31
|
+
self.token_row = row
|
32
|
+
self.token_col = col
|
33
|
+
end
|
34
|
+
|
35
|
+
def error(msg, row, col)
|
36
|
+
raise Error.new(msg, source, row, col)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
include Base
|
41
|
+
|
42
|
+
class Error < ::Exception
|
43
|
+
attr_reader :message
|
44
|
+
def initialize(msg, source, row, col)
|
45
|
+
source ||= '-'
|
46
|
+
@message = "#{source}:#{row}.#{col} #{msg}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/mkbison.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
lib = File.expand_path('../lib', __FILE__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require 'bison/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "mkbison"
|
7
|
+
spec.version = Bison::VERSION
|
8
|
+
spec.authors = ["Peter Woo"]
|
9
|
+
spec.email = ["peter@wioux.net"]
|
10
|
+
spec.summary = %q{Tool to generate bison parser C extensions}
|
11
|
+
spec.homepage = ""
|
12
|
+
spec.license = "MIT"
|
13
|
+
|
14
|
+
spec.files = `git ls-files -z`.split("\x0")
|
15
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
+
spec.require_paths = ["lib"]
|
18
|
+
|
19
|
+
spec.extensions = %w[ext/bison_parser/extconf.rb]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "rake-compiler"
|
24
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
class <%= name %>
|
2
|
+
class Actions
|
3
|
+
attr_accessor :parser, :result
|
4
|
+
<% rules.map(&:components).flatten.map(&:elements).flatten.grep(Bison::Action).each_with_index do |action, i| %>
|
5
|
+
def <%= action.name %>(<%= action.predecessor_tags.values.join(', ') %>)
|
6
|
+
<%= action.code.strip %>
|
7
|
+
end
|
8
|
+
<% end -%>
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class <%= name %>
|
2
|
+
attr_reader :io
|
3
|
+
attr_accessor :lex_value, :token_row, :token_col, :row, :col
|
4
|
+
attr_accessor :source, :result
|
5
|
+
|
6
|
+
module Base
|
7
|
+
def initialize(io)
|
8
|
+
if String === io
|
9
|
+
io = ::File.open(io, 'r')
|
10
|
+
end
|
11
|
+
@source = io.respond_to?(:path) ? io.path : nil
|
12
|
+
@io, @row, @col = io, 1, 0
|
13
|
+
end
|
14
|
+
|
15
|
+
def read
|
16
|
+
io.read(1).tap do |c|
|
17
|
+
if c == "\n"
|
18
|
+
self.row += 1
|
19
|
+
self.col = 0
|
20
|
+
elsif c
|
21
|
+
self.col += 1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def peak
|
27
|
+
io.read(1).tap{ |c| io.ungetc(c) if c }
|
28
|
+
end
|
29
|
+
|
30
|
+
def begin_token
|
31
|
+
self.token_row = row
|
32
|
+
self.token_col = col
|
33
|
+
end
|
34
|
+
|
35
|
+
def error(msg, row, col)
|
36
|
+
raise Error.new(msg, source, row, col)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
include Base
|
41
|
+
|
42
|
+
class Error < ::Exception
|
43
|
+
attr_reader :message
|
44
|
+
def initialize(msg, source, row, col)
|
45
|
+
source ||= '-'
|
46
|
+
@message = "#{source}:#{row}.#{col} #{msg}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
|
2
|
+
<% tokens.each do |token| -%>
|
3
|
+
<% if token.number -%>
|
4
|
+
%token <%= token.name %><%= "\t" * (4 - token.name.length/8)%><%= token.number %>
|
5
|
+
<% else -%>
|
6
|
+
%token <%= token.name %>
|
7
|
+
<% end -%>
|
8
|
+
<% end -%>
|
9
|
+
|
10
|
+
<% tokens.select{|t| t.left? || t.right?}.each do |token| -%>
|
11
|
+
<%= token.left? ? '%left' : '%right' %> <%= token.name %>
|
12
|
+
<% end -%>
|
13
|
+
|
14
|
+
%define api.pure true
|
15
|
+
%define parse.error verbose
|
16
|
+
%parse-param { VALUE __actions }
|
17
|
+
%lex-param { VALUE __actions }
|
18
|
+
%locations
|
19
|
+
|
20
|
+
%{
|
21
|
+
#include <ruby.h>
|
22
|
+
#define YYSTYPE VALUE
|
23
|
+
%}
|
24
|
+
|
25
|
+
%code provides {
|
26
|
+
static int yylex(YYSTYPE *, YYLTYPE *, VALUE);
|
27
|
+
static void yyerror(YYLTYPE *, VALUE, const char *);
|
28
|
+
}
|
29
|
+
|
30
|
+
%%
|
31
|
+
|
32
|
+
<% rules.each do |rule| -%>
|
33
|
+
<%= rule.name %>:
|
34
|
+
<% rule.components.each_with_index do |seq, i| -%>
|
35
|
+
<%= "|\n" unless i.zero? -%>
|
36
|
+
<%= ' '+seq.elements.map(&:to_bison).join(' ') unless seq.elements.empty? %>
|
37
|
+
<% if !(Bison::Action === seq.elements[-1]) -%>
|
38
|
+
<% if seq.elements.grep(Bison::Nonterminal).empty? -%>
|
39
|
+
{ $$ = Qnil; }
|
40
|
+
<% else -%>
|
41
|
+
{ $$ = $<%= seq.elements.find_index{ |x| Bison::Nonterminal === x } + 1%>; }
|
42
|
+
<% end %>
|
43
|
+
<% end -%>
|
44
|
+
<% end -%>
|
45
|
+
;
|
46
|
+
|
47
|
+
<% end -%>
|
48
|
+
|
49
|
+
%%
|
50
|
+
|
51
|
+
static VALUE c<%= name %>;
|
52
|
+
static VALUE c<%= name %>Tokens;
|
53
|
+
static VALUE c<%= name %>Actions;
|
54
|
+
|
55
|
+
static VALUE <%= uname %>_parse(VALUE);
|
56
|
+
|
57
|
+
void Init_<%= uname %>(void) {
|
58
|
+
c<%= name %> = rb_define_class("<%= name %>", rb_cObject);
|
59
|
+
c<%= name %>Tokens = rb_define_module_under(c<%= name %>, "Tokens");
|
60
|
+
c<%= name %>Actions = rb_define_class_under(c<%= name %>, "Actions", rb_cObject);
|
61
|
+
|
62
|
+
<% tokens.each do |token| -%>
|
63
|
+
rb_define_const(c<%= name %>Tokens, "<%= token.name %>", INT2FIX(<%= token.name %>));
|
64
|
+
<% end -%>
|
65
|
+
|
66
|
+
rb_define_method(c<%= name %>, "parse", <%= uname %>_parse, 0);
|
67
|
+
}
|
68
|
+
|
69
|
+
VALUE <%= uname %>_parse(VALUE self) {
|
70
|
+
VALUE actions = rb_funcall(c<%= name %>Actions, rb_intern("new"), 0);
|
71
|
+
rb_funcall(actions, rb_intern("parser="), 1, self);
|
72
|
+
if (yyparse(actions))
|
73
|
+
return Qnil;
|
74
|
+
return rb_funcall(actions, rb_intern("result"), 0);
|
75
|
+
}
|
76
|
+
|
77
|
+
static void yyerror(YYLTYPE *loc, VALUE actions, const char *msg) {
|
78
|
+
VALUE parser = rb_funcall(actions, rb_intern("parser"), 0);
|
79
|
+
rb_funcall(parser, rb_intern("error"), 3,
|
80
|
+
rb_str_new_cstr(msg),
|
81
|
+
INT2FIX(loc->first_line),
|
82
|
+
INT2FIX(loc->first_column));
|
83
|
+
}
|
84
|
+
|
85
|
+
static int yylex(YYSTYPE *lval, YYLTYPE *lloc, VALUE actions) {
|
86
|
+
int c;
|
87
|
+
VALUE parser, value, vtok;
|
88
|
+
|
89
|
+
parser = rb_funcall(actions, rb_intern("parser"), 0);
|
90
|
+
|
91
|
+
rb_funcall(parser, rb_intern("lex_value="), 1, Qnil);
|
92
|
+
rb_funcall(parser, rb_intern("token_row="), 1, INT2FIX(lloc->last_line));
|
93
|
+
rb_funcall(parser, rb_intern("token_col="), 1, INT2FIX(lloc->last_column));
|
94
|
+
|
95
|
+
vtok = rb_funcall(parser, rb_intern("lex"), 0);
|
96
|
+
value = rb_funcall(parser, rb_intern("lex_value"), 0);
|
97
|
+
|
98
|
+
lloc->first_line = FIX2INT(rb_funcall(parser, rb_intern("token_row"), 0));
|
99
|
+
lloc->first_column = FIX2INT(rb_funcall(parser, rb_intern("token_col"), 0));
|
100
|
+
lloc->last_line = FIX2INT(rb_funcall(parser, rb_intern("row"), 0));
|
101
|
+
lloc->last_column = FIX2INT(rb_funcall(parser, rb_intern("col"), 0));
|
102
|
+
|
103
|
+
if (vtok == Qnil) {
|
104
|
+
*lval = Qnil;
|
105
|
+
// fprintf(stderr, "parse is nil\n");
|
106
|
+
return 0;
|
107
|
+
}
|
108
|
+
|
109
|
+
if (vtok & 1) {
|
110
|
+
*lval = value;
|
111
|
+
// fprintf(stderr, "parse is int: %d\n", FIX2INT(vtok));
|
112
|
+
return FIX2INT(vtok);
|
113
|
+
}
|
114
|
+
|
115
|
+
if (RBASIC(vtok)->klass == rb_cString) {
|
116
|
+
c = StringValueCStr(vtok)[0];
|
117
|
+
*lval = rb_sprintf("%c", c);
|
118
|
+
// fprintf(stderr, "parse is char: '%c' (%d)\n", c, (int)c);
|
119
|
+
return c;
|
120
|
+
}
|
121
|
+
|
122
|
+
return 0;
|
123
|
+
}
|