smtlaissezfaire-gazelle 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,4 @@
1
+
2
+ Dir.glob(File.dirname(__FILE__) + "/tasks/**/**").each do |file|
3
+ load file
4
+ end
@@ -0,0 +1,6 @@
1
+ require 'mkmf'
2
+
3
+ $CFLAGS += " -W -Wall"
4
+
5
+ dir_config("gazelle_ruby_bindings")
6
+ create_makefile("gazelle_ruby_bindings")
@@ -0,0 +1,119 @@
1
+ #ifndef GAZELLE_RUBY_BINDINGS_C
2
+ #define GAZELLE_RUBY_BINDINGS_C
3
+
4
+ #include <stdbool.h>
5
+ #include <ruby.h>
6
+ #include <gazelle/dynarray.h>
7
+ #include "includes/bc_read_stream.c"
8
+ #include "includes/load_grammar.c"
9
+ #include "includes/parse.c"
10
+ #include "gazelle_ruby_bindings.h"
11
+
12
+ /* ERROR FUNCTIONS */
13
+ static int terminal_error = 0;
14
+
15
+ static void error_char_callback() {
16
+ // TODO: do something intelligent here
17
+ }
18
+
19
+ static void error_terminal_callback() {
20
+ terminal_error = 1;
21
+ }
22
+
23
+ static void reset_terminal_error() {
24
+ terminal_error = 0;
25
+ }
26
+
27
+ /* General Gazelle integration */
28
+ static void rb_gzl_parse(char *input, ParseState *state, BoundGrammar *bg) {
29
+ gzl_init_parse_state(state, bg);
30
+ gzl_parse(state, input, strlen(input) + 1);
31
+ }
32
+
33
+ static VALUE user_data_obj(RbUserData *user_data) {
34
+ return(user_data->self);
35
+ }
36
+
37
+ static char *user_data_input(RbUserData *user_data) {
38
+ return(user_data->input);
39
+ }
40
+
41
+ static void end_rule_callback(ParseState *parse_state)
42
+ {
43
+ struct gzl_parse_stack_frame *frame = DYNARRAY_GET_TOP(parse_state->parse_stack);
44
+ struct gzl_rtn_frame *rtn_frame = &frame->f.rtn_frame;
45
+
46
+ VALUE self = user_data_obj(parse_state->user_data);
47
+ char *rule_name = rtn_frame->rtn->name;
48
+ VALUE ruby_rule_name = rb_str_new2(rule_name);
49
+ char *input = user_data_input(parse_state->user_data);
50
+ VALUE ruby_input = rb_str_new2(input);
51
+
52
+ rb_funcall(self, rb_intern("run_rule"), 2, ruby_rule_name, ruby_input);
53
+ }
54
+
55
+ static void mk_user_data(ParseState *state, VALUE self, char *input) {
56
+ RbUserData *data = malloc(sizeof(RbUserData *));
57
+ data->self = self;
58
+ data->input = input;
59
+ state->user_data = data;
60
+ }
61
+
62
+ static int run_grammar(VALUE self, char *filename, char *input, bool run_callbacks) {
63
+ reset_terminal_error();
64
+
65
+ struct bc_read_stream *s = bc_rs_open_file(filename);
66
+ if (!s)
67
+ return 1; // should raise an invalid file format error in ruby instead
68
+
69
+ struct gzl_grammar *g = gzl_load_grammar(s);
70
+ bc_rs_close_stream(s);
71
+
72
+ ParseState *state = gzl_alloc_parse_state();
73
+ mk_user_data(state, self, input);
74
+
75
+ BoundGrammar bg = {
76
+ .grammar = g,
77
+ .error_char_cb = error_char_callback,
78
+ .error_terminal_cb = error_terminal_callback
79
+ };
80
+
81
+ if (run_callbacks) {
82
+ bg.end_rule_cb = end_rule_callback;
83
+ }
84
+
85
+ rb_gzl_parse(input, state, &bg);
86
+
87
+ return 0;
88
+ }
89
+
90
+ static VALUE run_gazelle_parse(VALUE self, VALUE input, bool run_callbacks) {
91
+ VALUE compiled_file_stream = rb_iv_get(self, "@filename");
92
+ char *filename = RSTRING_TO_PTR(compiled_file_stream);
93
+ char *input_string = RSTRING_TO_PTR(input);
94
+
95
+ if (run_grammar(self, filename, input_string, run_callbacks))
96
+ return Qfalse;
97
+
98
+ return(terminal_error ? Qfalse : Qtrue);
99
+ }
100
+
101
+ /* Public Ruby methods */
102
+ static VALUE rb_gazelle_parse_p(VALUE self, VALUE input) {
103
+ return run_gazelle_parse(self, input, false);
104
+ }
105
+
106
+ static VALUE rb_gazelle_parse(VALUE self, VALUE input) {
107
+ return run_gazelle_parse(self, input, true);
108
+ }
109
+
110
+ /* Hook up the ruby methods. Similar to lua's luaopen_(mod) functions */
111
+ void Init_gazelle_ruby_bindings() {
112
+ VALUE Gazelle = rb_const_get(rb_cObject, rb_intern("Gazelle"));
113
+ VALUE Gazelle_Parser = rb_const_get_at(Gazelle, rb_intern("Parser"));
114
+
115
+ rb_define_method(Gazelle_Parser, "parse?", rb_gazelle_parse_p, 1);
116
+ rb_define_method(Gazelle_Parser, "parse", rb_gazelle_parse, 1);
117
+ }
118
+
119
+ #endif /* GAZELLE_RUBY_BINDINGS_C */
@@ -0,0 +1,20 @@
1
+ #ifndef GAZELLE_RUBY_BINDINGS_H
2
+ #define GAZELLE_RUBY_BINDINGS_H
3
+
4
+ #define RSTRING_TO_PTR(x) RSTRING(x)->ptr
5
+
6
+ typedef struct gzl_parse_state ParseState;
7
+ typedef struct gzl_bound_grammar BoundGrammar;
8
+ typedef struct rb_gzl_user_data RbUserData;
9
+
10
+ struct rb_gzl_user_data {
11
+ /* The pointer to the current ruby parser object. */
12
+ VALUE self;
13
+
14
+ /* The input given to the parse function */
15
+ char *input;
16
+ };
17
+
18
+ void Init_gazelle_ruby_bindings();
19
+
20
+ #endif /* GAZELLE_RUBY_BINDINGS_H */
@@ -0,0 +1,872 @@
1
+ /*********************************************************************
2
+
3
+ Gazelle: a system for building fast, reusable parsers
4
+
5
+ bc_read_stream.c
6
+
7
+ This file contains routines for reading files in Bitcode format.
8
+ It is a stream interface -- the stream keeps only one record in
9
+ memory at a time, and is designed to have a very small memory
10
+ footprint.
11
+
12
+ Copyright (c) 2007 Joshua Haberman. See LICENSE for details.
13
+
14
+ *********************************************************************/
15
+
16
+ #include "gazelle/bc_read_stream.h"
17
+
18
+ #define OP_ENCODING_FIXED 1
19
+ #define OP_ENCODING_VBR 2
20
+ #define OP_ENCODING_ARRAY 3
21
+ #define OP_ENCODING_CHAR6 4
22
+
23
+ #define ABBREV_ID_END_BLOCK 0
24
+ #define ABBREV_ID_ENTER_SUBBLOCK 1
25
+ #define ABBREV_ID_DEFINE_ABBREV 2
26
+ #define ABBREV_ID_UNABBREV_RECORD 3
27
+
28
+ #define STDBLOCK_BLOCKINFO 0
29
+
30
+ #define BLOCKINFO_BLOCK_SETBID 1
31
+
32
+ #define RESIZE_ARRAY_IF_NECESSARY(ptr, size, desired_size) \
33
+ if(size < desired_size) \
34
+ { \
35
+ size *= 2; \
36
+ ptr = realloc(ptr, size*sizeof(*ptr)); \
37
+ }
38
+
39
+ #include <stdio.h>
40
+ #include <stdlib.h>
41
+ #include <string.h>
42
+
43
+ struct blockinfo {
44
+ uint32_t block_id;
45
+ int num_abbreviations;
46
+ int size_abbreviations;
47
+ struct blockinfo_abbrev {
48
+ int num_operands;
49
+ struct abbrev_operand *operands;
50
+ } *abbreviations;
51
+ };
52
+
53
+ struct stream_stack_entry
54
+ {
55
+ union {
56
+ struct block_metadata {
57
+ int abbrev_len;
58
+ int block_id;
59
+ int block_offset;
60
+ int block_len;
61
+ } block_metadata;
62
+
63
+ struct {
64
+ int first_operand_offset;
65
+ int num_operands;
66
+ } abbrev;
67
+ } e;
68
+
69
+ enum EntryType {
70
+ BlockMetadata,
71
+ Abbreviation
72
+ } type;
73
+ };
74
+
75
+ struct abbrev_operand
76
+ {
77
+ union {
78
+ long long literal_value;
79
+ struct {
80
+ unsigned char encoding;
81
+ int value;
82
+ } encoding_info;
83
+ } o;
84
+
85
+ enum OperandType {
86
+ Literal,
87
+ EncodingInfo
88
+ } type;
89
+ };
90
+
91
+ struct bc_read_stream
92
+ {
93
+ /* Values for the stream */
94
+ FILE *infile;
95
+ unsigned char *inmem;
96
+ uint32_t next_bits;
97
+ int num_next_bits;
98
+ int stream_err;
99
+ int stream_offset;
100
+
101
+ struct stream_stack_entry *old_block_metadata;
102
+
103
+ /* Values for the current block */
104
+ int abbrev_len;
105
+ int num_abbrevs;
106
+ struct stream_stack_entry *block_metadata;
107
+ struct blockinfo *blockinfo;
108
+
109
+ /* Values for the current record */
110
+ enum RecordType record_type;
111
+
112
+ /* - for data records */
113
+ int record_id;
114
+ int current_record_size;
115
+ int current_record_offset;
116
+ int record_buf_size;
117
+ uint64_t *record_buf;
118
+
119
+ /* - for StartBlock records */
120
+ int block_id;
121
+ int block_len;
122
+
123
+ /* - for DefineAbbrev records */
124
+ int record_size_abbrev;
125
+ int record_num_abbrev;
126
+ struct abbrev_operand *record_abbrev_operands;
127
+
128
+
129
+ /* The stream stack */
130
+ int stream_stack_size;
131
+ int stream_stack_len;
132
+ struct stream_stack_entry *stream_stack;
133
+
134
+ int abbrev_operands_size;
135
+ int abbrev_operands_len;
136
+ struct abbrev_operand *abbrev_operands;
137
+
138
+ /* Data about blockinfo records we have encountered */
139
+ int blockinfo_size;
140
+ int blockinfo_len;
141
+ struct blockinfo *blockinfos;
142
+ };
143
+
144
+ /*
145
+ void print_abbrev(struct abbrev_operand *operands, int num_operands)
146
+ {
147
+ printf("Abbrev: num_operands=%d\n", num_operands);
148
+ for(int i = 0; i < num_operands; i++)
149
+ {
150
+ struct abbrev_operand *o = &operands[i];
151
+ if(o->type == Literal)
152
+ {
153
+ printf(" Literal value: %llu\n", o->o.literal_value);
154
+ }
155
+ else if(o->type == EncodingInfo)
156
+ {
157
+ printf(" EncodingInfo: encoding=%u, value=%d\n", o->o.encoding_info.encoding,
158
+ o->o.encoding_info.value);
159
+ }
160
+ }
161
+ }
162
+
163
+ void dump_stack(struct bc_read_stream *s)
164
+ {
165
+ printf("Stream stack: %d entries\n", s->stream_stack_len);
166
+ for(int i = 0; i < s->stream_stack_len; i++)
167
+ {
168
+ printf("- ");
169
+ struct stream_stack_entry *e = &s->stream_stack[i];
170
+ if(e->type == Abbreviation)
171
+ {
172
+ print_abbrev(s->abbrev_operands + e->e.abbrev.first_operand_offset, e->e.abbrev.num_operands);
173
+ }
174
+ else if(e->type == BlockMetadata)
175
+ {
176
+ printf("BlockMetadata: abbrev_len=%d, block_id=%d\n", e->e.block_metadata.abbrev_len,
177
+ e->e.block_metadata.block_id);
178
+ }
179
+ }
180
+ }
181
+
182
+ void dump_blockinfo(struct blockinfo *bi)
183
+ {
184
+ if(bi)
185
+ {
186
+ printf("Blockinfo! BlockID: %u, Abbrevs:\n", bi->block_id);
187
+ for(int i = 0; i < bi->num_abbreviations; i++)
188
+ print_abbrev(bi->abbreviations[i].operands, bi->abbreviations[i].num_operands);
189
+ }
190
+ }
191
+ */
192
+
193
+ static int refill_next_bits(struct bc_read_stream *stream);
194
+ struct bc_read_stream *bc_read_stream_init();
195
+
196
+ struct bc_read_stream *bc_rs_open_mem(const char *data)
197
+ {
198
+ struct bc_read_stream *stream = bc_read_stream_init();
199
+ stream->inmem = (unsigned char *)data;
200
+ refill_next_bits(stream);
201
+ return stream;
202
+ }
203
+
204
+ struct bc_read_stream *bc_rs_open_file(const char *filename)
205
+ {
206
+ FILE *infile = fopen(filename, "r");
207
+
208
+ if(infile == NULL)
209
+ {
210
+ return NULL;
211
+ }
212
+
213
+ char magic[4];
214
+ int ret = fread(magic, 4, 1, infile);
215
+ if(ret < 1 || magic[0] != 'B' || magic[1] != 'C')
216
+ {
217
+ fclose(infile);
218
+ return NULL;
219
+ }
220
+
221
+ struct bc_read_stream *stream = bc_read_stream_init();
222
+ stream->infile = infile;
223
+ refill_next_bits(stream);
224
+ return stream;
225
+ }
226
+
227
+ struct bc_read_stream *bc_read_stream_init()
228
+ {
229
+ /* TODO: give the application a way to get the app-specific magic number */
230
+
231
+ struct bc_read_stream *stream = malloc(sizeof(*stream));
232
+ stream->infile = NULL;
233
+ stream->stream_err = 0;
234
+
235
+ stream->next_bits = 0;
236
+ stream->num_next_bits = 0;
237
+ stream->stream_offset = 0;
238
+
239
+ stream->abbrev_len = 2; /* its initial value according to the spec */
240
+ stream->num_abbrevs = 0;
241
+
242
+ stream->stream_stack_size = 8; /* enough for a few levels of nesting and a few abbrevs */
243
+ stream->stream_stack = malloc(stream->stream_stack_size*sizeof(*stream->stream_stack));
244
+
245
+ /* we create an outermose stack frame -- this exists mostly to store
246
+ * the abbrev length of the outermost scope, and to store a bogus
247
+ * block_id so that we'll never find a blockinfo for the outer scope */
248
+ stream->stream_stack_len = 1;
249
+ stream->block_metadata = &stream->stream_stack[0];
250
+ stream->block_metadata->type = BlockMetadata;
251
+ stream->block_metadata->e.block_metadata.abbrev_len = stream->abbrev_len;
252
+ stream->block_metadata->e.block_metadata.block_id = -1;
253
+
254
+ stream->record_type = DataRecord; /* anything besides Eof */
255
+
256
+ stream->abbrev_operands_size = 8;
257
+ stream->abbrev_operands_len = 0;
258
+ stream->abbrev_operands = malloc(stream->abbrev_operands_size*sizeof(*stream->abbrev_operands));
259
+
260
+ stream->blockinfo_size = 8;
261
+ stream->blockinfo_len = 0;
262
+ stream->blockinfos = malloc(stream->blockinfo_size*sizeof(*stream->blockinfos));
263
+
264
+ stream->record_buf_size = 8;
265
+ stream->record_buf = malloc(stream->record_buf_size*sizeof(*stream->record_buf));
266
+
267
+ stream->record_size_abbrev = 8;
268
+ stream->record_abbrev_operands = malloc(stream->record_size_abbrev*sizeof(*stream->record_abbrev_operands));
269
+
270
+ return stream;
271
+ }
272
+
273
+ void bc_rs_close_stream(struct bc_read_stream *stream)
274
+ {
275
+ free(stream->record_abbrev_operands);
276
+ free(stream->record_buf);
277
+ free(stream->abbrev_operands);
278
+ free(stream->stream_stack);
279
+
280
+ int i, j;
281
+
282
+ for(i = 0; i < stream->blockinfo_len; i++)
283
+ {
284
+ for(j = 0; j < stream->blockinfos[i].num_abbreviations; j++)
285
+ {
286
+ free(stream->blockinfos[i].abbreviations[j].operands);
287
+ }
288
+ free(stream->blockinfos[i].abbreviations);
289
+ }
290
+ free(stream->blockinfos);
291
+
292
+ if(stream->infile)
293
+ fclose(stream->infile);
294
+ free(stream);
295
+ }
296
+
297
+ uint64_t bc_rs_read_64(struct bc_read_stream *stream, int i)
298
+ {
299
+ if(i > stream->current_record_size)
300
+ {
301
+ stream->stream_err |= BITCODE_ERR_NO_SUCH_VALUE;
302
+ return 0;
303
+ }
304
+ else
305
+ {
306
+ return stream->record_buf[i];
307
+ }
308
+ }
309
+
310
+
311
+ #define GETTER_FUNC(type, bits) \
312
+ type bc_rs_read_ ## bits (struct bc_read_stream *stream, int i) \
313
+ { \
314
+ uint64_t val = bc_rs_read_64(stream, i); \
315
+ if(stream->record_buf[i] > ((1ULL << bits) - 1)) \
316
+ { \
317
+ stream->stream_err |= BITCODE_ERR_VALUE_TOO_LARGE; \
318
+ return 0; \
319
+ } \
320
+ else \
321
+ { \
322
+ return (type)val; \
323
+ } \
324
+ }
325
+
326
+ GETTER_FUNC(uint8_t, 8)
327
+ GETTER_FUNC(uint16_t, 16)
328
+ GETTER_FUNC(uint32_t, 32)
329
+
330
+ #define NEXT_GETTER_FUNC(type, bits) \
331
+ type bc_rs_read_next_ ## bits (struct bc_read_stream *stream) \
332
+ { \
333
+ return bc_rs_read_ ## bits(stream, stream->current_record_offset++); \
334
+ } \
335
+
336
+ NEXT_GETTER_FUNC(uint8_t, 8)
337
+ NEXT_GETTER_FUNC(uint16_t, 16)
338
+ NEXT_GETTER_FUNC(uint32_t, 32)
339
+ NEXT_GETTER_FUNC(uint64_t, 64)
340
+
341
+ static int refill_next_bits(struct bc_read_stream *stream)
342
+ {
343
+ unsigned char buf[4];
344
+
345
+ stream->stream_offset += 4;
346
+
347
+ if(stream->infile)
348
+ {
349
+ int ret = fread(buf, 4, 1, stream->infile);
350
+ if(ret < 1)
351
+ {
352
+ //if(feof(stream->infile))
353
+ // stream->record_type = Eof;
354
+
355
+ if(ferror(stream->infile))
356
+ stream->stream_err |= BITCODE_ERR_IO;
357
+
358
+ return -1;
359
+ }
360
+ }
361
+ else
362
+ {
363
+ memcpy(buf, stream->inmem + stream->stream_offset, 4);
364
+ }
365
+
366
+ stream->next_bits = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
367
+ stream->num_next_bits = 32;
368
+
369
+ return 0;
370
+ }
371
+
372
+ #define LOW_BITS(bitfield, num_bits) (bitfield & (~0U >> (32-num_bits)))
373
+
374
+ static uint32_t read_fixed(struct bc_read_stream *stream, int num_bits)
375
+ {
376
+ uint32_t ret;
377
+
378
+ if(stream->num_next_bits >= num_bits)
379
+ {
380
+ /* next_bits already contains all the bits we need -- take them */
381
+ ret = LOW_BITS(stream->next_bits, num_bits);
382
+ stream->next_bits >>= num_bits;
383
+ stream->num_next_bits -= num_bits;
384
+ }
385
+ else
386
+ {
387
+ /* we need all of next_bits, and then some */
388
+ ret = stream->next_bits;
389
+ int bits_filled = stream->num_next_bits;
390
+ int bits_left = num_bits - bits_filled;
391
+
392
+ if(refill_next_bits(stream) < 0) return 0;
393
+
394
+ /* take bits_left bits from the next_bits */
395
+ ret |= LOW_BITS(stream->next_bits, bits_left) << bits_filled;
396
+
397
+ if(bits_left != 32)
398
+ stream->next_bits >>= bits_left;
399
+ else
400
+ stream->next_bits = 0;
401
+
402
+ stream->num_next_bits -= bits_left;
403
+ }
404
+
405
+ if(stream->num_next_bits == 0)
406
+ {
407
+ /* We could defer this, but doing it now makes our stream_offset more accurate */
408
+ refill_next_bits(stream);
409
+ }
410
+
411
+ return ret;
412
+ }
413
+
414
+ static uint64_t read_fixed_64(struct bc_read_stream *stream, int num_bits)
415
+ {
416
+ if(num_bits <= 32)
417
+ {
418
+ return read_fixed(stream, num_bits);
419
+ }
420
+ else
421
+ {
422
+ uint64_t ret = read_fixed(stream, 32);
423
+ return ret | ((uint64_t)read_fixed(stream, num_bits-32) << 32);
424
+ }
425
+ }
426
+
427
+ static uint64_t read_vbr_64(struct bc_read_stream *stream, int bits)
428
+ {
429
+ uint64_t val = 0;
430
+ int read_bits = 0;
431
+ int continuation_bit = 1 << (bits-1);
432
+ int value_bits = continuation_bit - 1;
433
+ int continues = 0;
434
+
435
+ do {
436
+ uint32_t next_bits = read_fixed(stream, bits);
437
+ continues = next_bits & continuation_bit;
438
+ val |= (next_bits & value_bits) << read_bits;
439
+ read_bits += bits-1;
440
+ } while(continues);
441
+
442
+ return val;
443
+ }
444
+
445
+ static uint32_t read_vbr(struct bc_read_stream *stream, int bits)
446
+ {
447
+ uint64_t val = read_vbr_64(stream, bits);
448
+ if(val >> 32)
449
+ {
450
+ stream->stream_err |= BITCODE_ERR_CORRUPT_INPUT;
451
+ return 0;
452
+ }
453
+ else
454
+ {
455
+ return (uint32_t)val;
456
+ }
457
+ }
458
+
459
+ static uint8_t decode_char6(int num)
460
+ {
461
+ if(num < 26) return 'a' + num;
462
+ else if(num < 52) return 'A' + (num-26);
463
+ else if(num < 62) return '0' + (num-52);
464
+ else if(num < 63) return '.';
465
+ else return '_';
466
+ }
467
+
468
+ /* This can handle any abbreviated type except for arrays */
469
+ static uint64_t read_abbrev_value(struct bc_read_stream *stream, struct abbrev_operand *op)
470
+ {
471
+ if(op->type == Literal)
472
+ return op->o.literal_value;
473
+ else
474
+ {
475
+ switch(op->o.encoding_info.encoding) {
476
+ case OP_ENCODING_FIXED:
477
+ return read_fixed_64(stream, op->o.encoding_info.value);
478
+ case OP_ENCODING_VBR:
479
+ return read_vbr_64(stream, op->o.encoding_info.value);
480
+ case OP_ENCODING_CHAR6:
481
+ return decode_char6(read_fixed(stream, 6));
482
+ default:
483
+ stream->stream_err |= BITCODE_ERR_INTERNAL;
484
+ return 0;
485
+ }
486
+ }
487
+ }
488
+
489
+ static void append_value(struct bc_read_stream *stream, uint64_t val)
490
+ {
491
+ RESIZE_ARRAY_IF_NECESSARY(stream->record_buf, stream->record_buf_size, stream->current_record_size+1);
492
+ stream->record_buf[stream->current_record_size++] = val;
493
+ }
494
+
495
+ static void read_user_abbreviated_record(struct bc_read_stream *stream,
496
+ struct abbrev_operand *ops,
497
+ int num_operands)
498
+ {
499
+ stream->current_record_size = 0;
500
+ int i, j;
501
+
502
+ for(i = 0; i < num_operands; i++)
503
+ {
504
+ struct abbrev_operand *op = &ops[i];
505
+
506
+ if(op->type == EncodingInfo && op->o.encoding_info.encoding == OP_ENCODING_ARRAY)
507
+ {
508
+ int num_elements = read_vbr(stream, 6);
509
+ i += 1;
510
+ for(j = 0; j < num_elements; j++)
511
+ append_value(stream, read_abbrev_value(stream, &ops[i]));
512
+ }
513
+ else
514
+ {
515
+ uint64_t val = read_abbrev_value(stream, &ops[i]);
516
+ if(i == 0)
517
+ {
518
+ stream->record_id = val;
519
+ }
520
+ else
521
+ {
522
+ append_value(stream, val);
523
+ }
524
+ }
525
+ }
526
+ }
527
+
528
+ static int read_abbrev_op(struct bc_read_stream *stream, struct abbrev_operand *o, int array_ok)
529
+ {
530
+ int is_literal = read_fixed(stream, 1);
531
+ if(is_literal)
532
+ {
533
+ o->type = Literal;
534
+ o->o.literal_value = read_vbr(stream, 8);
535
+ }
536
+ else
537
+ {
538
+ o->type = EncodingInfo;
539
+ o->o.encoding_info.encoding = read_fixed(stream, 3);
540
+ switch(o->o.encoding_info.encoding)
541
+ {
542
+ case OP_ENCODING_FIXED:
543
+ case OP_ENCODING_VBR:
544
+ o->o.encoding_info.value = read_vbr(stream, 5);
545
+ break;
546
+
547
+ case OP_ENCODING_ARRAY:
548
+ if(!array_ok) return -1;
549
+ break;
550
+
551
+ case OP_ENCODING_CHAR6:
552
+ break;
553
+ }
554
+ }
555
+ return 0;
556
+ }
557
+
558
+
559
+ void align_32_bits(struct bc_read_stream *stream)
560
+ {
561
+ if(stream->num_next_bits != 32)
562
+ refill_next_bits(stream);
563
+ }
564
+
565
+ struct blockinfo *find_blockinfo(struct bc_read_stream *stream, uint32_t block_id)
566
+ {
567
+ int i;
568
+ for(i = 0; i < stream->blockinfo_len; i++)
569
+ if(stream->blockinfos[i].block_id == block_id)
570
+ return &stream->blockinfos[i];
571
+
572
+ return NULL;
573
+ }
574
+
575
+ struct blockinfo *find_or_create_blockinfo(struct bc_read_stream *stream, int block_id)
576
+ {
577
+ struct blockinfo *bi = find_blockinfo(stream, block_id);
578
+
579
+ if(bi)
580
+ {
581
+ return bi;
582
+ }
583
+ else
584
+ {
585
+ RESIZE_ARRAY_IF_NECESSARY(stream->blockinfos, stream->blockinfo_size, stream->blockinfo_len+1);
586
+
587
+ struct blockinfo *new_bi = &stream->blockinfos[stream->blockinfo_len++];
588
+
589
+ new_bi->block_id = block_id;
590
+ new_bi->num_abbreviations = 0;
591
+ new_bi->size_abbreviations = 8;
592
+ new_bi->abbreviations = malloc(new_bi->size_abbreviations * sizeof(*new_bi->abbreviations));
593
+
594
+ return new_bi;
595
+ }
596
+ }
597
+
598
+ static void pop_stack_frame(struct bc_read_stream *stream)
599
+ {
600
+ stream->stream_stack_len = stream->block_metadata - stream->stream_stack;
601
+ if(stream->stream_stack_len == 0)
602
+ {
603
+ stream->record_type = Eof;
604
+ return;
605
+ }
606
+
607
+ stream->num_abbrevs = 0;
608
+ stream->block_metadata--;
609
+ while(stream->block_metadata->type == Abbreviation)
610
+ {
611
+ stream->num_abbrevs++;
612
+ stream->block_metadata--;
613
+ }
614
+
615
+ stream->abbrev_len = stream->block_metadata->e.block_metadata.abbrev_len;
616
+ stream->block_id = stream->block_metadata->e.block_metadata.block_id;
617
+ stream->blockinfo = find_blockinfo(stream, stream->block_id);
618
+ }
619
+
620
+
621
+ void bc_rs_next_record(struct bc_read_stream *stream)
622
+ {
623
+ /* don't attempt to read past eof */
624
+ if(stream->record_type == Eof) return;
625
+
626
+ int abbrev_id = read_fixed(stream, stream->abbrev_len);
627
+ stream->current_record_offset = 0;
628
+ int i;
629
+
630
+ switch(abbrev_id) {
631
+ case ABBREV_ID_END_BLOCK:
632
+ stream->record_type = EndBlock;
633
+ stream->old_block_metadata = stream->block_metadata;
634
+
635
+ align_32_bits(stream);
636
+ pop_stack_frame(stream);
637
+
638
+ break;
639
+
640
+ case ABBREV_ID_ENTER_SUBBLOCK:
641
+ stream->block_id = read_vbr(stream, 8);
642
+ stream->abbrev_len = read_vbr(stream, 4);
643
+ align_32_bits(stream);
644
+ stream->block_len = read_fixed(stream, 32);
645
+ stream->record_type = StartBlock;
646
+
647
+ RESIZE_ARRAY_IF_NECESSARY(stream->stream_stack, stream->stream_stack_size,
648
+ stream->stream_stack_len+1);
649
+
650
+ stream->block_metadata = &stream->stream_stack[stream->stream_stack_len++];
651
+ stream->block_metadata->type = BlockMetadata;
652
+ stream->block_metadata->e.block_metadata.block_id = stream->block_id;
653
+ stream->block_metadata->e.block_metadata.abbrev_len = stream->abbrev_len;
654
+ stream->block_metadata->e.block_metadata.block_offset = stream->stream_offset;
655
+ stream->block_metadata->e.block_metadata.block_len = stream->block_len;
656
+
657
+ //printf("++ Entering block id=%d, offset=%d\n", stream->block_id, stream->stream_offset);
658
+
659
+ stream->blockinfo = find_or_create_blockinfo(stream, stream->block_id);
660
+ break;
661
+
662
+ case ABBREV_ID_DEFINE_ABBREV:
663
+ stream->record_type = DefineAbbrev;
664
+ stream->record_num_abbrev = read_vbr(stream, 5);
665
+
666
+ RESIZE_ARRAY_IF_NECESSARY(stream->record_abbrev_operands, stream->record_size_abbrev,
667
+ stream->record_num_abbrev);
668
+
669
+ for(i = 0; i < stream->record_num_abbrev; i++)
670
+ {
671
+ read_abbrev_op(stream, &stream->record_abbrev_operands[i], 0);
672
+ }
673
+
674
+ break;
675
+
676
+ case ABBREV_ID_UNABBREV_RECORD:
677
+ stream->record_type = DataRecord;
678
+ stream->record_id = read_vbr(stream, 6);
679
+
680
+ stream->current_record_size = read_vbr(stream, 6);
681
+
682
+ RESIZE_ARRAY_IF_NECESSARY(stream->record_buf, stream->record_buf_size,
683
+ stream->current_record_size+1);
684
+
685
+ for(i = 0; i < stream->current_record_size; i++)
686
+ stream->record_buf[i] = read_vbr(stream, 6);
687
+ break;
688
+
689
+ default:
690
+ {
691
+ /* This must be a user-defined abbreviation. It could come from the
692
+ * blockinfo-defined abbreviations or abbreviations defined in this
693
+ * block. */
694
+ stream->record_type = DataRecord;
695
+ int user_abbrev_id = abbrev_id - 4;
696
+ int num_blockinfo_abbrevs = stream->blockinfo ? stream->blockinfo->num_abbreviations : 0;
697
+ int block_abbrev_id = user_abbrev_id - num_blockinfo_abbrevs;
698
+ if(user_abbrev_id < num_blockinfo_abbrevs)
699
+ {
700
+ struct blockinfo_abbrev *a = &stream->blockinfo->abbreviations[user_abbrev_id];
701
+ read_user_abbreviated_record(stream, a->operands, a->num_operands);
702
+ }
703
+ else if(block_abbrev_id < stream->num_abbrevs)
704
+ {
705
+ struct stream_stack_entry *e = stream->block_metadata + block_abbrev_id + 1;
706
+ struct abbrev_operand *o = stream->abbrev_operands + e->e.abbrev.first_operand_offset;
707
+ read_user_abbreviated_record(stream, o, e->e.abbrev.num_operands);
708
+ }
709
+ else
710
+ {
711
+ stream->stream_err |= BITCODE_ERR_CORRUPT_INPUT;
712
+ }
713
+ break;
714
+ }
715
+ }
716
+ }
717
+
718
+ struct record_info bc_rs_next_data_record(struct bc_read_stream *stream)
719
+ {
720
+ int i;
721
+
722
+ while(1)
723
+ {
724
+ bc_rs_next_record(stream);
725
+
726
+ if(stream->record_type == DefineAbbrev)
727
+ {
728
+ int num_ops = stream->record_num_abbrev;
729
+
730
+ RESIZE_ARRAY_IF_NECESSARY(stream->stream_stack, stream->stream_stack_size,
731
+ stream->stream_stack_len+1);
732
+ RESIZE_ARRAY_IF_NECESSARY(stream->abbrev_operands, stream->abbrev_operands_size,
733
+ stream->abbrev_operands_len+num_ops+1);
734
+
735
+ struct stream_stack_entry *e = &stream->stream_stack[stream->stream_stack_len++];
736
+ e->type = Abbreviation;
737
+ e->e.abbrev.first_operand_offset = stream->abbrev_operands_len;
738
+ e->e.abbrev.num_operands = num_ops;
739
+ struct abbrev_operand *abbrev_operands = &stream->abbrev_operands[stream->abbrev_operands_len];
740
+ stream->abbrev_operands_len += num_ops;
741
+
742
+ for(i = 0; i < num_ops; i++)
743
+ abbrev_operands[i] = stream->record_abbrev_operands[i];
744
+
745
+ stream->num_abbrevs++;
746
+ }
747
+ else if(stream->record_type == StartBlock && stream->block_id == STDBLOCK_BLOCKINFO)
748
+ {
749
+ /* The first record must be a SETBID record */
750
+ bc_rs_next_record(stream);
751
+ struct blockinfo *bi = NULL;
752
+
753
+ while(1)
754
+ {
755
+ if(stream->record_type == EndBlock)
756
+ {
757
+ break;
758
+ }
759
+ else if(stream->record_type == Err || stream->record_type == Eof)
760
+ {
761
+ struct record_info ri;
762
+ ri.record_type = stream->record_type;
763
+ ri.id = 0;
764
+ return ri;
765
+ }
766
+ else if(stream->record_type == DataRecord)
767
+ {
768
+ if(stream->record_id == BLOCKINFO_BLOCK_SETBID)
769
+ {
770
+ if(stream->current_record_size != 1)
771
+ {
772
+ /* TODO */
773
+ stream->stream_err |= BITCODE_ERR_CORRUPT_INPUT;
774
+ }
775
+ bi = find_or_create_blockinfo(stream, stream->record_buf[0]);
776
+ }
777
+ }
778
+ else if(stream->record_type == DefineAbbrev)
779
+ {
780
+
781
+ if(bi == NULL)
782
+ {
783
+ /* TODO */
784
+ stream->stream_err |= BITCODE_ERR_CORRUPT_INPUT;
785
+ }
786
+
787
+ RESIZE_ARRAY_IF_NECESSARY(bi->abbreviations,
788
+ bi->size_abbreviations, bi->num_abbreviations+1);
789
+
790
+ struct blockinfo_abbrev *abbrev = &bi->abbreviations[bi->num_abbreviations++];
791
+ abbrev->num_operands = stream->record_num_abbrev;
792
+ abbrev->operands = malloc(sizeof(*abbrev->operands) * abbrev->num_operands);
793
+ for(i = 0; i < abbrev->num_operands; i++)
794
+ abbrev->operands[i] = stream->record_abbrev_operands[i];
795
+ }
796
+
797
+ bc_rs_next_record(stream);
798
+ }
799
+
800
+ }
801
+ else
802
+ {
803
+ struct record_info ri;
804
+ ri.record_type = stream->record_type;
805
+ ri.id = 0;
806
+
807
+ if(ri.record_type == StartBlock) ri.id = stream->block_id;
808
+ else if(ri.record_type == DataRecord) ri.id = stream->record_id;
809
+
810
+ return ri;
811
+ }
812
+ }
813
+ }
814
+
815
+ int bc_rs_get_error(struct bc_read_stream *stream)
816
+ {
817
+ return stream->stream_err;
818
+ }
819
+
820
+ int bc_rs_get_record_size(struct bc_read_stream *stream)
821
+ {
822
+ return stream->current_record_size;
823
+ }
824
+
825
+ int bc_rs_get_remaining_record_size(struct bc_read_stream *stream)
826
+ {
827
+ return stream->current_record_size - stream->current_record_offset;
828
+ }
829
+
830
+ void bc_rs_skip_block(struct bc_read_stream *stream)
831
+ {
832
+ int offset = stream->block_metadata->e.block_metadata.block_offset +
833
+ (stream->block_metadata->e.block_metadata.block_len * 4);
834
+
835
+ if(stream->infile)
836
+ fseek(stream->infile, offset, SEEK_SET);
837
+
838
+ stream->stream_offset = offset-4;
839
+ refill_next_bits(stream);
840
+ pop_stack_frame(stream);
841
+ }
842
+
843
+ void bc_rs_rewind_block(struct bc_read_stream *stream)
844
+ {
845
+ if(stream->record_type == EndBlock)
846
+ {
847
+ stream->num_abbrevs = stream->old_block_metadata - stream->block_metadata - 1;
848
+ stream->block_metadata = stream->old_block_metadata;
849
+ stream->abbrev_len = stream->block_metadata->e.block_metadata.abbrev_len;
850
+ stream->block_id = stream->block_metadata->e.block_metadata.block_id;
851
+ stream->blockinfo = find_or_create_blockinfo(stream, stream->block_id);
852
+ stream->stream_stack_len = stream->block_metadata - stream->stream_stack + 1;
853
+ }
854
+
855
+ int offset = stream->block_metadata->e.block_metadata.block_offset;
856
+
857
+ if(stream->infile)
858
+ fseek(stream->infile, offset, SEEK_SET);
859
+
860
+ stream->stream_offset = offset-4;
861
+ refill_next_bits(stream);
862
+ align_32_bits(stream);
863
+ }
864
+
865
+ /*
866
+ * Local Variables:
867
+ * c-file-style: "bsd"
868
+ * c-basic-offset: 4
869
+ * indent-tabs-mode: nil
870
+ * End:
871
+ * vim:et:sts=4:sw=4
872
+ */