mkbison 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,295 @@
1
+
2
+ %token IDENTIFIER
3
+ %token NUMBER
4
+ %token STRING
5
+ %token COLON
6
+ %token SEMICOLON
7
+ %token LBRACK
8
+ %token RBRACK
9
+ %token PIPE
10
+ %token HASH
11
+ %token DOUBLE_HASH
12
+ %token KW_TOKEN
13
+ %token KW_LEFT
14
+ %token KW_RIGHT
15
+ %token ACTIONS
16
+
17
+
18
+ %define api.pure true
19
+ %define parse.error verbose
20
+ %parse-param { VALUE __actions }
21
+ %lex-param { VALUE __actions }
22
+ %locations
23
+
24
+ %{
25
+ #include <ruby.h>
26
+ #define YYSTYPE VALUE
27
+ %}
28
+
29
+ %code provides {
30
+ static int yylex(YYSTYPE *, YYLTYPE *, VALUE);
31
+ static void yyerror(YYLTYPE *, VALUE, const char *);
32
+ }
33
+
34
+ %%
35
+
36
+ grammar_file:
37
+ token_list DOUBLE_HASH grammar_rules optional_code
38
+ {
39
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
40
+ rb_ivar_set(__actions, rb_intern("@tokens"), rb_ary_new3(2, INT2FIX(@1.first_line), INT2FIX(@1.first_column)));
41
+ rb_ivar_set(__actions, rb_intern("@rules"), rb_ary_new3(2, INT2FIX(@3.first_line), INT2FIX(@3.first_column)));
42
+ rb_ivar_set(__actions, rb_intern("@code"), rb_ary_new3(2, INT2FIX(@4.first_line), INT2FIX(@4.first_column)));
43
+ $$ = rb_funcall(__actions, rb_intern("_0_grammar_file"), 3, $1, $3, $4);
44
+ }
45
+
46
+ ;
47
+
48
+ optional_code:
49
+
50
+ {
51
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
52
+ $$ = rb_funcall(__actions, rb_intern("_0_optional_code"), 0);
53
+ }
54
+
55
+ |
56
+ DOUBLE_HASH ACTIONS
57
+ {
58
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
59
+ rb_ivar_set(__actions, rb_intern("@actions"), rb_ary_new3(2, INT2FIX(@2.first_line), INT2FIX(@2.first_column)));
60
+ $$ = rb_funcall(__actions, rb_intern("_1_optional_code"), 1, $2);
61
+ }
62
+
63
+ ;
64
+
65
+ token_list:
66
+
67
+ {
68
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
69
+ $$ = rb_funcall(__actions, rb_intern("_0_token_list"), 0);
70
+ }
71
+
72
+ |
73
+ token_list token
74
+ {
75
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
76
+ rb_ivar_set(__actions, rb_intern("@list"), rb_ary_new3(2, INT2FIX(@1.first_line), INT2FIX(@1.first_column)));
77
+ rb_ivar_set(__actions, rb_intern("@token"), rb_ary_new3(2, INT2FIX(@2.first_line), INT2FIX(@2.first_column)));
78
+ $$ = rb_funcall(__actions, rb_intern("_1_token_list"), 2, $1, $2);
79
+ }
80
+
81
+ ;
82
+
83
+ token:
84
+ HASH KW_TOKEN IDENTIFIER
85
+ {
86
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
87
+ rb_ivar_set(__actions, rb_intern("@name"), rb_ary_new3(2, INT2FIX(@3.first_line), INT2FIX(@3.first_column)));
88
+ $$ = rb_funcall(__actions, rb_intern("_0_token"), 1, $3);
89
+ }
90
+
91
+ |
92
+ HASH KW_LEFT IDENTIFIER
93
+ {
94
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
95
+ rb_ivar_set(__actions, rb_intern("@name"), rb_ary_new3(2, INT2FIX(@3.first_line), INT2FIX(@3.first_column)));
96
+ $$ = rb_funcall(__actions, rb_intern("_1_token"), 1, $3);
97
+ }
98
+
99
+ |
100
+ HASH KW_RIGHT IDENTIFIER
101
+ {
102
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
103
+ rb_ivar_set(__actions, rb_intern("@name"), rb_ary_new3(2, INT2FIX(@3.first_line), INT2FIX(@3.first_column)));
104
+ $$ = rb_funcall(__actions, rb_intern("_2_token"), 1, $3);
105
+ }
106
+
107
+ |
108
+ token NUMBER
109
+ {
110
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
111
+ rb_ivar_set(__actions, rb_intern("@token"), rb_ary_new3(2, INT2FIX(@1.first_line), INT2FIX(@1.first_column)));
112
+ rb_ivar_set(__actions, rb_intern("@num"), rb_ary_new3(2, INT2FIX(@2.first_line), INT2FIX(@2.first_column)));
113
+ $$ = rb_funcall(__actions, rb_intern("_3_token"), 2, $1, $2);
114
+ }
115
+
116
+ ;
117
+
118
+ grammar_rules:
119
+
120
+ {
121
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
122
+ $$ = rb_funcall(__actions, rb_intern("_0_grammar_rules"), 0);
123
+ }
124
+
125
+ |
126
+ grammar_rules grammar_rule
127
+ {
128
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
129
+ rb_ivar_set(__actions, rb_intern("@list"), rb_ary_new3(2, INT2FIX(@1.first_line), INT2FIX(@1.first_column)));
130
+ rb_ivar_set(__actions, rb_intern("@rule"), rb_ary_new3(2, INT2FIX(@2.first_line), INT2FIX(@2.first_column)));
131
+ $$ = rb_funcall(__actions, rb_intern("_1_grammar_rules"), 2, $1, $2);
132
+ }
133
+
134
+ ;
135
+
136
+ grammar_rule:
137
+ IDENTIFIER COLON components SEMICOLON
138
+ {
139
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
140
+ rb_ivar_set(__actions, rb_intern("@name"), rb_ary_new3(2, INT2FIX(@1.first_line), INT2FIX(@1.first_column)));
141
+ rb_ivar_set(__actions, rb_intern("@components"), rb_ary_new3(2, INT2FIX(@3.first_line), INT2FIX(@3.first_column)));
142
+ $$ = rb_funcall(__actions, rb_intern("_0_grammar_rule"), 2, $1, $3);
143
+ }
144
+
145
+ ;
146
+
147
+ components:
148
+ sequence
149
+ {
150
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
151
+ rb_ivar_set(__actions, rb_intern("@sequence"), rb_ary_new3(2, INT2FIX(@1.first_line), INT2FIX(@1.first_column)));
152
+ $$ = rb_funcall(__actions, rb_intern("_0_components"), 1, $1);
153
+ }
154
+
155
+ |
156
+ components PIPE sequence
157
+ {
158
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
159
+ rb_ivar_set(__actions, rb_intern("@sequences"), rb_ary_new3(2, INT2FIX(@1.first_line), INT2FIX(@1.first_column)));
160
+ rb_ivar_set(__actions, rb_intern("@sequence"), rb_ary_new3(2, INT2FIX(@3.first_line), INT2FIX(@3.first_column)));
161
+ $$ = rb_funcall(__actions, rb_intern("_1_components"), 2, $1, $3);
162
+ }
163
+
164
+ ;
165
+
166
+ sequence:
167
+
168
+ {
169
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
170
+ $$ = rb_funcall(__actions, rb_intern("_0_sequence"), 0);
171
+ }
172
+
173
+ |
174
+ sequence ACTIONS
175
+ {
176
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
177
+ rb_ivar_set(__actions, rb_intern("@sequence"), rb_ary_new3(2, INT2FIX(@1.first_line), INT2FIX(@1.first_column)));
178
+ rb_ivar_set(__actions, rb_intern("@code"), rb_ary_new3(2, INT2FIX(@2.first_line), INT2FIX(@2.first_column)));
179
+ $$ = rb_funcall(__actions, rb_intern("_1_sequence"), 2, $1, $2);
180
+ }
181
+
182
+ |
183
+ sequence IDENTIFIER
184
+ {
185
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
186
+ rb_ivar_set(__actions, rb_intern("@sequence"), rb_ary_new3(2, INT2FIX(@1.first_line), INT2FIX(@1.first_column)));
187
+ rb_ivar_set(__actions, rb_intern("@follower"), rb_ary_new3(2, INT2FIX(@2.first_line), INT2FIX(@2.first_column)));
188
+ $$ = rb_funcall(__actions, rb_intern("_2_sequence"), 2, $1, $2);
189
+ }
190
+
191
+ |
192
+ sequence IDENTIFIER LBRACK IDENTIFIER RBRACK
193
+ {
194
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
195
+ rb_ivar_set(__actions, rb_intern("@sequence"), rb_ary_new3(2, INT2FIX(@1.first_line), INT2FIX(@1.first_column)));
196
+ rb_ivar_set(__actions, rb_intern("@follower"), rb_ary_new3(2, INT2FIX(@2.first_line), INT2FIX(@2.first_column)));
197
+ rb_ivar_set(__actions, rb_intern("@tag"), rb_ary_new3(2, INT2FIX(@4.first_line), INT2FIX(@4.first_column)));
198
+ $$ = rb_funcall(__actions, rb_intern("_3_sequence"), 3, $1, $2, $4);
199
+ }
200
+
201
+ |
202
+ sequence STRING
203
+ {
204
+ rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));
205
+ rb_ivar_set(__actions, rb_intern("@sequence"), rb_ary_new3(2, INT2FIX(@1.first_line), INT2FIX(@1.first_column)));
206
+ rb_ivar_set(__actions, rb_intern("@follower"), rb_ary_new3(2, INT2FIX(@2.first_line), INT2FIX(@2.first_column)));
207
+ $$ = rb_funcall(__actions, rb_intern("_4_sequence"), 2, $1, $2);
208
+ }
209
+
210
+ ;
211
+
212
+
213
+ %%
214
+
215
+ static VALUE cBisonParser;
216
+ static VALUE cBisonParserTokens;
217
+ static VALUE cBisonParserActions;
218
+
219
+ static VALUE bison_parser_parse(VALUE);
220
+
221
+ void Init_bison_parser(void) {
222
+ cBisonParser = rb_define_class("BisonParser", rb_cObject);
223
+ cBisonParserTokens = rb_define_module_under(cBisonParser, "Tokens");
224
+ cBisonParserActions = rb_define_class_under(cBisonParser, "Actions", rb_cObject);
225
+
226
+ rb_define_const(cBisonParserTokens, "IDENTIFIER", INT2FIX(IDENTIFIER));
227
+ rb_define_const(cBisonParserTokens, "NUMBER", INT2FIX(NUMBER));
228
+ rb_define_const(cBisonParserTokens, "STRING", INT2FIX(STRING));
229
+ rb_define_const(cBisonParserTokens, "COLON", INT2FIX(COLON));
230
+ rb_define_const(cBisonParserTokens, "SEMICOLON", INT2FIX(SEMICOLON));
231
+ rb_define_const(cBisonParserTokens, "LBRACK", INT2FIX(LBRACK));
232
+ rb_define_const(cBisonParserTokens, "RBRACK", INT2FIX(RBRACK));
233
+ rb_define_const(cBisonParserTokens, "PIPE", INT2FIX(PIPE));
234
+ rb_define_const(cBisonParserTokens, "HASH", INT2FIX(HASH));
235
+ rb_define_const(cBisonParserTokens, "DOUBLE_HASH", INT2FIX(DOUBLE_HASH));
236
+ rb_define_const(cBisonParserTokens, "KW_TOKEN", INT2FIX(KW_TOKEN));
237
+ rb_define_const(cBisonParserTokens, "KW_LEFT", INT2FIX(KW_LEFT));
238
+ rb_define_const(cBisonParserTokens, "KW_RIGHT", INT2FIX(KW_RIGHT));
239
+ rb_define_const(cBisonParserTokens, "ACTIONS", INT2FIX(ACTIONS));
240
+
241
+ rb_define_method(cBisonParser, "parse", bison_parser_parse, 0);
242
+ }
243
+
244
+ VALUE bison_parser_parse(VALUE self) {
245
+ VALUE actions = rb_funcall(cBisonParserActions, rb_intern("new"), 0);
246
+ rb_funcall(actions, rb_intern("parser="), 1, self);
247
+ if (yyparse(actions))
248
+ return Qnil;
249
+ return rb_funcall(actions, rb_intern("result"), 0);
250
+ }
251
+
252
+ static void yyerror(YYLTYPE *loc, VALUE actions, const char *msg) {
253
+ VALUE parser = rb_funcall(actions, rb_intern("parser"), 0);
254
+ rb_funcall(parser, rb_intern("error"), 3,
255
+ rb_str_new_cstr(msg),
256
+ INT2FIX(loc->first_line),
257
+ INT2FIX(loc->first_column));
258
+ }
259
+
260
+ static int yylex(YYSTYPE *lval, YYLTYPE *lloc, VALUE actions) {
261
+ int c;
262
+ VALUE parser, value, vtok;
263
+
264
+ parser = rb_funcall(actions, rb_intern("parser"), 0);
265
+
266
+ rb_funcall(parser, rb_intern("lex_value="), 1, Qnil);
267
+ rb_funcall(parser, rb_intern("token_row="), 1, INT2FIX(lloc->last_line));
268
+ rb_funcall(parser, rb_intern("token_col="), 1, INT2FIX(lloc->last_column));
269
+
270
+ vtok = rb_funcall(parser, rb_intern("lex"), 0);
271
+ value = rb_funcall(parser, rb_intern("lex_value"), 0);
272
+
273
+ lloc->first_line = FIX2INT(rb_funcall(parser, rb_intern("token_row"), 0));
274
+ lloc->first_column = FIX2INT(rb_funcall(parser, rb_intern("token_col"), 0));
275
+ lloc->last_line = FIX2INT(rb_funcall(parser, rb_intern("row"), 0));
276
+ lloc->last_column = FIX2INT(rb_funcall(parser, rb_intern("col"), 0));
277
+
278
+ if (vtok == Qnil) {
279
+ *lval = Qnil;
280
+ return 0;
281
+ }
282
+
283
+ if (vtok & 1) {
284
+ *lval = value;
285
+ return FIX2INT(vtok);
286
+ }
287
+
288
+ if (RBASIC(vtok)->klass == rb_cString) {
289
+ c = StringValueCStr(vtok)[0];
290
+ *lval = rb_sprintf("%c", c);
291
+ return c;
292
+ }
293
+
294
+ return 0;
295
+ }
@@ -0,0 +1,9 @@
1
+ require 'mkmf'
2
+
3
+ output = "#{File.dirname(__FILE__)}/bison_parser.c"
4
+ bison_file = "#{File.dirname(__FILE__)}/bison_parser.y"
5
+
6
+ bison = ENV['BISON_PATH'] || 'bison'
7
+ system(bison, '-o', output, bison_file)
8
+
9
+ create_makefile 'bison_parser/bison_parser'
@@ -0,0 +1,14 @@
1
+
2
+ module Bison
3
+ end
4
+
5
+ require 'bison/version'
6
+ require 'bison/grammar_file'
7
+ require 'bison/token'
8
+ require 'bison/rule'
9
+ require 'bison/sequence'
10
+ require 'bison/action'
11
+ require 'bison/nonterminal'
12
+ require 'bison/string'
13
+
14
+ require 'bison_parser'
@@ -0,0 +1,63 @@
1
+
2
+ require 'digest'
3
+ require 'tempfile'
4
+
5
+ module Bison
6
+ class Action
7
+ attr_accessor :code
8
+ attr_accessor :location
9
+ attr_accessor :predecessors
10
+ attr_accessor :sequence
11
+
12
+ def initialize(code)
13
+ self.code = code
14
+ end
15
+
16
+ def to_bison
17
+ code = "\n {\n"
18
+ code << %( rb_ivar_set(__actions, rb_intern("@_"), rb_ary_new3(2, INT2FIX(@$.first_line), INT2FIX(@$.first_column)));\n)
19
+ predecessor_tags.each do |i, name|
20
+ code << %( rb_ivar_set(__actions, rb_intern("@#{name}"), rb_ary_new3(2, INT2FIX(@#{i}.first_line), INT2FIX(@#{i}.first_column)));\n)
21
+ end
22
+ code << %( $$ = #{funcall('__actions')};\n)
23
+ code << " }\n"
24
+ end
25
+
26
+ def predecessor_tags
27
+ tags = predecessors.each_with_index.map do |e, i|
28
+ [i+1, e.tag] if (Bison::Nonterminal === e) && e.tag
29
+ end.compact
30
+
31
+ Hash[tags]
32
+ end
33
+
34
+ def errors
35
+ tmp = Tempfile.new('action-src.rb').tap do |tmp|
36
+ location[0].times{ tmp.puts }
37
+ tmp.puts(code)
38
+ tmp.close
39
+ end
40
+
41
+ errors = `ruby -c "#{tmp.path}" 2>&1`
42
+ if $?.success?
43
+ return nil
44
+ else
45
+ return errors.gsub(tmp.path, '-')
46
+ end
47
+ end
48
+
49
+ def name
50
+ base = "_#{sequence.index}_#{sequence.rule.name}"
51
+ # base << "_#{Digest::MD5.hexdigest(predecessor_tags.inspect)}"
52
+ end
53
+
54
+ # What to do about default $1/Qnil?
55
+ def funcall(receiver)
56
+ method = "rb_intern(#{name.inspect})"
57
+ args = predecessor_tags.keys.map{ |i| "$#{i}" }.join(', ')
58
+ args = args.empty? ? '0' : "#{predecessor_tags.size}, #{args}"
59
+ "rb_funcall(#{receiver}, #{method}, #{args})"
60
+ end
61
+
62
+ end
63
+ end
@@ -0,0 +1,59 @@
1
+
2
+ require 'erb'
3
+
4
+ module Bison
5
+ class GrammarFile
6
+ attr_accessor :name
7
+ attr_reader :tokens, :rules, :code
8
+
9
+ def initialize(tokens, rules, code)
10
+ @tokens, @rules, @code = tokens, rules, code
11
+ end
12
+
13
+ def validate
14
+ errors = []
15
+ symbols = tokens.map(&:name) + rules.map(&:name)
16
+ rules.map(&:components).flatten.map(&:elements).flatten.each do |el|
17
+ if el.is_a?(Bison::Nonterminal) && !symbols.include?(el.name)
18
+ errors << "#{el.location.join('.')}: #{el.name} is not defined"
19
+ end
20
+ end
21
+ rules.map(&:components).flatten.each do |seq|
22
+ seq.elements.grep(Bison::Action).each do |action|
23
+ err = action.errors
24
+ errors << err unless err.nil?
25
+ end
26
+ end
27
+ abort(errors.join("\n")) unless errors.empty?
28
+ end
29
+
30
+ def print_class(out=$stdout)
31
+ template = File.expand_path('../../../templates/class.rb.erb', __FILE__)
32
+ out.puts(ERB.new(File.read(template), nil, '-').result(binding))
33
+ end
34
+
35
+ def print_base_module(out=$stdout)
36
+ template = File.expand_path('../../../templates/base.rb.erb', __FILE__)
37
+ out.puts(ERB.new(File.read(template), nil, '-').result(binding))
38
+ end
39
+
40
+ def print_actions_module(out=$stdout)
41
+ template = File.expand_path('../../../templates/actions.rb.erb', __FILE__)
42
+ out.puts(ERB.new(File.read(template), nil, '-').result(binding))
43
+ end
44
+
45
+ def print_bison(out=$stdout)
46
+ template = File.expand_path('../../../templates/parser.y.erb', __FILE__)
47
+ out.puts(ERB.new(File.read(template), nil, '-').result(binding))
48
+ end
49
+
50
+ def print_extconf(out=$stdout)
51
+ template = File.expand_path('../../../templates/extconf.rb.erb', __FILE__)
52
+ out.puts(ERB.new(File.read(template), nil, '-').result(binding))
53
+ end
54
+
55
+ def uname
56
+ name.gsub(/([a-z])([A-Z])/, '\1_\2').downcase
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,17 @@
1
+ module Bison
2
+ class Nonterminal
3
+ attr_reader :name
4
+ attr_reader :tag
5
+ attr_accessor :location
6
+ attr_accessor :sequence
7
+
8
+ def initialize(name, tag=nil)
9
+ @name = name
10
+ @tag = tag
11
+ end
12
+
13
+ def to_bison
14
+ name
15
+ end
16
+ end
17
+ end