RubyGems - smtlaissezfaire-gazelle - Versions diffs - 0.1.0 - Mend

smtlaissezfaire-gazelle 0.1.0

Files changed (23) hide show

data/Rakefile +4 -0
data/ext/gazelle_ruby_bindings/extconf.rb +6 -0
data/ext/gazelle_ruby_bindings/gazelle_ruby_bindings.c +119 -0
data/ext/gazelle_ruby_bindings/gazelle_ruby_bindings.h +20 -0
data/ext/gazelle_ruby_bindings/includes/bc_read_stream.c +872 -0
data/ext/gazelle_ruby_bindings/includes/load_grammar.c +563 -0
data/ext/gazelle_ruby_bindings/includes/parse.c +813 -0
data/lib/gazelle.rb +11 -0
data/lib/gazelle/gemspec.rb +39 -0
data/lib/gazelle/parser.rb +31 -0
data/spec/gazelle_integration_spec.rb +97 -0
data/spec/hello.gzc +0 -0
data/spec/hello.gzl +1 -0
data/spec/invalid_format.gzc +0 -0
data/spec/spec.opts +1 -0
data/spec/spec_helper.rb +5 -0
data/tasks/c_extensions.rake +4 -0
data/tasks/flog.rake +10 -0
data/tasks/gem.rake +8 -0
data/tasks/rspec.rake +20 -0
data/tasks/sloc.rake +16 -0
data/tasks/tags.rake +23 -0
metadata +75 -0

data/Rakefile ADDED Viewed

@@ -0,0 +1,4 @@
+Dir.glob(File.dirname(__FILE__) + "/tasks/**/**").each do |file|
+  load file
+end

data/ext/gazelle_ruby_bindings/extconf.rb ADDED Viewed

@@ -0,0 +1,6 @@
+require 'mkmf'
+$CFLAGS += " -W -Wall"
+dir_config("gazelle_ruby_bindings")
+create_makefile("gazelle_ruby_bindings")

data/ext/gazelle_ruby_bindings/gazelle_ruby_bindings.c ADDED Viewed

@@ -0,0 +1,119 @@
+#ifndef GAZELLE_RUBY_BINDINGS_C
+#define GAZELLE_RUBY_BINDINGS_C
+#include <stdbool.h>
+#include <ruby.h>
+#include <gazelle/dynarray.h>
+#include "includes/bc_read_stream.c"
+#include "includes/load_grammar.c"
+#include "includes/parse.c"
+#include "gazelle_ruby_bindings.h"
+/* ERROR FUNCTIONS */
+static int terminal_error = 0;
+static void error_char_callback() {
+  // TODO: do something intelligent here
+}
+static void error_terminal_callback() {
+  terminal_error = 1;
+}
+static void reset_terminal_error() {
+  terminal_error = 0;
+}
+/* General Gazelle integration */
+static void rb_gzl_parse(char *input, ParseState *state, BoundGrammar *bg) {
+  gzl_init_parse_state(state, bg);
+  gzl_parse(state, input, strlen(input) + 1);
+}
+static VALUE user_data_obj(RbUserData *user_data) {
+  return(user_data->self);
+}
+static char *user_data_input(RbUserData *user_data) {
+  return(user_data->input);
+}
+static void end_rule_callback(ParseState *parse_state)
+{
+  struct gzl_parse_stack_frame *frame      = DYNARRAY_GET_TOP(parse_state->parse_stack);
+  struct gzl_rtn_frame          *rtn_frame = &frame->f.rtn_frame;
+  VALUE self            = user_data_obj(parse_state->user_data);
+  char *rule_name       = rtn_frame->rtn->name;
+  VALUE ruby_rule_name  = rb_str_new2(rule_name);
+  char *input           = user_data_input(parse_state->user_data);
+  VALUE ruby_input      = rb_str_new2(input);
+  rb_funcall(self, rb_intern("run_rule"), 2, ruby_rule_name, ruby_input);
+}
+static void mk_user_data(ParseState *state, VALUE self, char *input) {
+  RbUserData *data = malloc(sizeof(RbUserData *));
+  data->self  = self;
+  data->input = input;
+  state->user_data = data;
+}
+static int run_grammar(VALUE self, char *filename, char *input, bool run_callbacks) {
+  reset_terminal_error();
+  struct bc_read_stream *s = bc_rs_open_file(filename);
+  if (!s)
+    return 1; // should raise an invalid file format error in ruby instead
+  struct gzl_grammar *g = gzl_load_grammar(s);
+  bc_rs_close_stream(s);
+  ParseState *state = gzl_alloc_parse_state();
+  mk_user_data(state, self, input);
+  BoundGrammar bg = {
+    .grammar           = g,
+    .error_char_cb     = error_char_callback,
+    .error_terminal_cb = error_terminal_callback
+  };
+  if (run_callbacks) {
+    bg.end_rule_cb = end_rule_callback;
+  }
+  rb_gzl_parse(input, state, &bg);
+  return 0;
+}
+static VALUE run_gazelle_parse(VALUE self, VALUE input, bool run_callbacks) {
+  VALUE compiled_file_stream = rb_iv_get(self, "@filename");
+  char *filename     = RSTRING_TO_PTR(compiled_file_stream);
+  char *input_string = RSTRING_TO_PTR(input);
+  if (run_grammar(self, filename, input_string, run_callbacks))
+    return Qfalse;
+  return(terminal_error ? Qfalse : Qtrue);
+}
+/* Public Ruby methods */
+static VALUE rb_gazelle_parse_p(VALUE self, VALUE input) {
+  return run_gazelle_parse(self, input, false);
+}
+static VALUE rb_gazelle_parse(VALUE self, VALUE input) {
+  return run_gazelle_parse(self, input, true);
+}
+/* Hook up the ruby methods.  Similar to lua's luaopen_(mod) functions */
+void Init_gazelle_ruby_bindings() {
+  VALUE Gazelle         = rb_const_get(rb_cObject, rb_intern("Gazelle"));
+  VALUE Gazelle_Parser  = rb_const_get_at(Gazelle, rb_intern("Parser"));
+  rb_define_method(Gazelle_Parser, "parse?", rb_gazelle_parse_p, 1);
+  rb_define_method(Gazelle_Parser, "parse",  rb_gazelle_parse, 1);
+}
+#endif /* GAZELLE_RUBY_BINDINGS_C */

data/ext/gazelle_ruby_bindings/gazelle_ruby_bindings.h ADDED Viewed

@@ -0,0 +1,20 @@
+#ifndef GAZELLE_RUBY_BINDINGS_H
+#define GAZELLE_RUBY_BINDINGS_H
+#define RSTRING_TO_PTR(x) RSTRING(x)->ptr
+typedef struct gzl_parse_state   ParseState;
+typedef struct gzl_bound_grammar BoundGrammar;
+typedef struct rb_gzl_user_data  RbUserData;
+struct rb_gzl_user_data {
+  /* The pointer to the current ruby parser object. */
+  VALUE self;
+  /* The input given to the parse function */
+  char *input;
+};
+void Init_gazelle_ruby_bindings();
+#endif /* GAZELLE_RUBY_BINDINGS_H */

data/ext/gazelle_ruby_bindings/includes/bc_read_stream.c ADDED Viewed

@@ -0,0 +1,872 @@
+/*********************************************************************
+  Gazelle: a system for building fast, reusable parsers
+  bc_read_stream.c
+  This file contains routines for reading files in Bitcode format.
+  It is a stream interface -- the stream keeps only one record in
+  memory at a time, and is designed to have a very small memory
+  footprint.
+  Copyright (c) 2007 Joshua Haberman.  See LICENSE for details.
+*********************************************************************/
+#include "gazelle/bc_read_stream.h"
+#define OP_ENCODING_FIXED 1
+#define OP_ENCODING_VBR   2
+#define OP_ENCODING_ARRAY 3
+#define OP_ENCODING_CHAR6 4
+#define ABBREV_ID_END_BLOCK       0
+#define ABBREV_ID_ENTER_SUBBLOCK  1
+#define ABBREV_ID_DEFINE_ABBREV   2
+#define ABBREV_ID_UNABBREV_RECORD 3
+#define STDBLOCK_BLOCKINFO 0
+#define BLOCKINFO_BLOCK_SETBID 1
+#define RESIZE_ARRAY_IF_NECESSARY(ptr, size, desired_size) \
+    if(size < desired_size) \
+    { \
+        size *= 2; \
+        ptr = realloc(ptr, size*sizeof(*ptr)); \
+    }
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+struct blockinfo {
+    uint32_t block_id;
+    int num_abbreviations;
+    int size_abbreviations;
+    struct blockinfo_abbrev {
+        int num_operands;
+        struct abbrev_operand *operands;
+    } *abbreviations;
+};
+struct stream_stack_entry
+{
+    union {
+        struct block_metadata {
+            int abbrev_len;
+            int block_id;
+            int block_offset;
+            int block_len;
+        } block_metadata;
+        struct {
+            int first_operand_offset;
+            int num_operands;
+        } abbrev;
+    } e;
+    enum EntryType {
+        BlockMetadata,
+        Abbreviation
+    } type;
+};
+struct abbrev_operand
+{
+    union {
+        long long literal_value;
+        struct {
+            unsigned char encoding;
+            int value;
+        } encoding_info;
+    } o;
+    enum OperandType {
+        Literal,
+        EncodingInfo
+    } type;
+};
+struct bc_read_stream
+{
+    /* Values for the stream */
+    FILE *infile;
+    unsigned char *inmem;
+    uint32_t next_bits;
+    int num_next_bits;
+    int stream_err;
+    int stream_offset;
+    struct stream_stack_entry *old_block_metadata;
+    /* Values for the current block */
+    int abbrev_len;
+    int num_abbrevs;
+    struct stream_stack_entry *block_metadata;
+    struct blockinfo *blockinfo;
+    /* Values for the current record */
+    enum RecordType record_type;
+    /*  - for data records */
+    int record_id;
+    int current_record_size;
+    int current_record_offset;
+    int record_buf_size;
+    uint64_t *record_buf;
+    /*  - for StartBlock records */
+    int block_id;
+    int block_len;
+    /*  - for DefineAbbrev records */
+    int record_size_abbrev;
+    int record_num_abbrev;
+    struct abbrev_operand *record_abbrev_operands;
+    /* The stream stack */
+    int stream_stack_size;
+    int stream_stack_len;
+    struct stream_stack_entry *stream_stack;
+    int abbrev_operands_size;
+    int abbrev_operands_len;
+    struct abbrev_operand *abbrev_operands;
+    /* Data about blockinfo records we have encountered */
+    int blockinfo_size;
+    int blockinfo_len;
+    struct blockinfo *blockinfos;
+};
+/*
+void print_abbrev(struct abbrev_operand *operands, int num_operands)
+{
+    printf("Abbrev: num_operands=%d\n", num_operands);
+    for(int i = 0; i < num_operands; i++)
+    {
+        struct abbrev_operand *o = &operands[i];
+        if(o->type == Literal)
+        {
+            printf("  Literal value: %llu\n", o->o.literal_value);
+        }
+        else if(o->type == EncodingInfo)
+        {
+            printf("  EncodingInfo: encoding=%u, value=%d\n", o->o.encoding_info.encoding,
+                                                            o->o.encoding_info.value);
+        }
+    }
+}
+void dump_stack(struct bc_read_stream *s)
+{
+    printf("Stream stack: %d entries\n", s->stream_stack_len);
+    for(int i = 0; i < s->stream_stack_len; i++)
+    {
+        printf("-  ");
+        struct stream_stack_entry *e = &s->stream_stack[i];
+        if(e->type == Abbreviation)
+        {
+            print_abbrev(s->abbrev_operands + e->e.abbrev.first_operand_offset, e->e.abbrev.num_operands);
+        }
+        else if(e->type == BlockMetadata)
+        {
+            printf("BlockMetadata: abbrev_len=%d, block_id=%d\n", e->e.block_metadata.abbrev_len,
+                                                                e->e.block_metadata.block_id);
+        }
+    }
+}
+void dump_blockinfo(struct blockinfo *bi)
+{
+    if(bi)
+    {
+        printf("Blockinfo! BlockID: %u,  Abbrevs:\n", bi->block_id);
+        for(int i = 0; i < bi->num_abbreviations; i++)
+            print_abbrev(bi->abbreviations[i].operands, bi->abbreviations[i].num_operands);
+    }
+}
+*/
+static int refill_next_bits(struct bc_read_stream *stream);
+struct bc_read_stream *bc_read_stream_init();
+struct bc_read_stream *bc_rs_open_mem(const char *data)
+{
+    struct bc_read_stream *stream = bc_read_stream_init();
+    stream->inmem = (unsigned char *)data;
+    refill_next_bits(stream);
+    return stream;
+}
+struct bc_read_stream *bc_rs_open_file(const char *filename)
+{
+    FILE *infile = fopen(filename, "r");
+    if(infile == NULL)
+    {
+        return NULL;
+    }
+    char magic[4];
+    int ret = fread(magic, 4, 1, infile);
+    if(ret < 1 || magic[0] != 'B' || magic[1] != 'C')
+    {
+        fclose(infile);
+        return NULL;
+    }
+    struct bc_read_stream *stream = bc_read_stream_init();
+    stream->infile = infile;
+    refill_next_bits(stream);
+    return stream;
+}
+struct bc_read_stream *bc_read_stream_init()
+{
+    /* TODO: give the application a way to get the app-specific magic number */
+    struct bc_read_stream *stream = malloc(sizeof(*stream));
+    stream->infile = NULL;
+    stream->stream_err = 0;
+    stream->next_bits = 0;
+    stream->num_next_bits = 0;
+    stream->stream_offset = 0;
+    stream->abbrev_len = 2;    /* its initial value according to the spec */
+    stream->num_abbrevs = 0;
+    stream->stream_stack_size = 8;  /* enough for a few levels of nesting and a few abbrevs */
+    stream->stream_stack      = malloc(stream->stream_stack_size*sizeof(*stream->stream_stack));
+    /* we create an outermose stack frame -- this exists mostly to store
+     * the abbrev length of the outermost scope, and to store a bogus
+     * block_id so that we'll never find a blockinfo for the outer scope */
+    stream->stream_stack_len  = 1;
+    stream->block_metadata    = &stream->stream_stack[0];
+    stream->block_metadata->type = BlockMetadata;
+    stream->block_metadata->e.block_metadata.abbrev_len = stream->abbrev_len;
+    stream->block_metadata->e.block_metadata.block_id = -1;
+    stream->record_type = DataRecord;  /* anything besides Eof */
+    stream->abbrev_operands_size = 8;
+    stream->abbrev_operands_len  = 0;
+    stream->abbrev_operands = malloc(stream->abbrev_operands_size*sizeof(*stream->abbrev_operands));
+    stream->blockinfo_size = 8;
+    stream->blockinfo_len  = 0;
+    stream->blockinfos = malloc(stream->blockinfo_size*sizeof(*stream->blockinfos));
+    stream->record_buf_size = 8;
+    stream->record_buf = malloc(stream->record_buf_size*sizeof(*stream->record_buf));
+    stream->record_size_abbrev = 8;
+    stream->record_abbrev_operands = malloc(stream->record_size_abbrev*sizeof(*stream->record_abbrev_operands));
+    return stream;
+}
+void bc_rs_close_stream(struct bc_read_stream *stream)
+{
+    free(stream->record_abbrev_operands);
+    free(stream->record_buf);
+    free(stream->abbrev_operands);
+    free(stream->stream_stack);
+    int i, j;
+    for(i = 0; i < stream->blockinfo_len; i++)
+    {
+        for(j = 0; j < stream->blockinfos[i].num_abbreviations; j++)
+        {
+            free(stream->blockinfos[i].abbreviations[j].operands);
+        }
+        free(stream->blockinfos[i].abbreviations);
+    }
+    free(stream->blockinfos);
+    if(stream->infile)
+        fclose(stream->infile);
+    free(stream);
+}
+uint64_t bc_rs_read_64(struct bc_read_stream *stream, int i)
+{
+    if(i > stream->current_record_size)
+    {
+        stream->stream_err |= BITCODE_ERR_NO_SUCH_VALUE;
+        return 0;
+    }
+    else
+    {
+        return stream->record_buf[i];
+    }
+}
+#define GETTER_FUNC(type, bits) \
+  type bc_rs_read_ ## bits (struct bc_read_stream *stream, int i) \
+  {                                                            \
+      uint64_t val = bc_rs_read_64(stream, i);                 \
+      if(stream->record_buf[i] > ((1ULL << bits) - 1))         \
+      {                                                        \
+          stream->stream_err |= BITCODE_ERR_VALUE_TOO_LARGE;   \
+          return 0;                                            \
+      }                                                        \
+      else                                                     \
+      {                                                        \
+          return (type)val;                                    \
+      }                                                        \
+  }
+GETTER_FUNC(uint8_t, 8)
+GETTER_FUNC(uint16_t, 16)
+GETTER_FUNC(uint32_t, 32)
+#define NEXT_GETTER_FUNC(type, bits) \
+  type bc_rs_read_next_ ## bits (struct bc_read_stream *stream)     \
+  {                                                                 \
+      return bc_rs_read_ ## bits(stream, stream->current_record_offset++); \
+  }                                                                 \
+NEXT_GETTER_FUNC(uint8_t, 8)
+NEXT_GETTER_FUNC(uint16_t, 16)
+NEXT_GETTER_FUNC(uint32_t, 32)
+NEXT_GETTER_FUNC(uint64_t, 64)
+static int refill_next_bits(struct bc_read_stream *stream)
+{
+    unsigned char buf[4];
+    stream->stream_offset += 4;
+    if(stream->infile)
+    {
+        int ret = fread(buf, 4, 1, stream->infile);
+        if(ret < 1)
+        {
+            //if(feof(stream->infile))
+            //    stream->record_type = Eof;
+            if(ferror(stream->infile))
+                stream->stream_err |= BITCODE_ERR_IO;
+            return -1;
+        }
+    }
+    else
+    {
+        memcpy(buf, stream->inmem + stream->stream_offset, 4);
+    }
+    stream->next_bits = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
+    stream->num_next_bits = 32;
+    return 0;
+}
+#define LOW_BITS(bitfield, num_bits) (bitfield & (~0U >> (32-num_bits)))
+static uint32_t read_fixed(struct bc_read_stream *stream, int num_bits)
+{
+    uint32_t ret;
+    if(stream->num_next_bits >= num_bits)
+    {
+        /* next_bits already contains all the bits we need -- take them */
+        ret = LOW_BITS(stream->next_bits, num_bits);
+        stream->next_bits >>= num_bits;
+        stream->num_next_bits -= num_bits;
+    }
+    else
+    {
+        /* we need all of next_bits, and then some */
+        ret = stream->next_bits;
+        int bits_filled = stream->num_next_bits;
+        int bits_left = num_bits - bits_filled;
+        if(refill_next_bits(stream) < 0) return 0;
+        /* take bits_left bits from the next_bits */
+        ret |= LOW_BITS(stream->next_bits, bits_left) << bits_filled;
+        if(bits_left != 32)
+            stream->next_bits >>= bits_left;
+        else
+            stream->next_bits = 0;
+        stream->num_next_bits -= bits_left;
+    }
+    if(stream->num_next_bits == 0)
+    {
+        /* We could defer this, but doing it now makes our stream_offset more accurate */
+        refill_next_bits(stream);
+    }
+    return ret;
+}
+static uint64_t read_fixed_64(struct bc_read_stream *stream, int num_bits)
+{
+    if(num_bits <= 32)
+    {
+        return read_fixed(stream, num_bits);
+    }
+    else
+    {
+        uint64_t ret = read_fixed(stream, 32);
+        return ret | ((uint64_t)read_fixed(stream, num_bits-32) << 32);
+    }
+}
+static uint64_t read_vbr_64(struct bc_read_stream *stream, int bits)
+{
+    uint64_t val = 0;
+    int read_bits = 0;
+    int continuation_bit = 1 << (bits-1);
+    int value_bits = continuation_bit - 1;
+    int continues = 0;
+    do {
+        uint32_t next_bits = read_fixed(stream, bits);
+        continues = next_bits & continuation_bit;
+        val |= (next_bits & value_bits) << read_bits;
+        read_bits += bits-1;
+    } while(continues);
+    return val;
+}
+static uint32_t read_vbr(struct bc_read_stream *stream, int bits)
+{
+    uint64_t val = read_vbr_64(stream, bits);
+    if(val >> 32)
+    {
+        stream->stream_err |= BITCODE_ERR_CORRUPT_INPUT;
+        return 0;
+    }
+    else
+    {
+        return (uint32_t)val;
+    }
+}
+static uint8_t decode_char6(int num)
+{
+    if(num < 26) return 'a' + num;
+    else if(num < 52) return 'A' + (num-26);
+    else if(num < 62) return '0' + (num-52);
+    else if(num < 63) return '.';
+    else return '_';
+}
+/* This can handle any abbreviated type except for arrays */
+static uint64_t read_abbrev_value(struct bc_read_stream *stream, struct abbrev_operand *op)
+{
+    if(op->type == Literal)
+        return op->o.literal_value;
+    else
+    {
+        switch(op->o.encoding_info.encoding) {
+            case OP_ENCODING_FIXED:
+                return read_fixed_64(stream, op->o.encoding_info.value);
+            case OP_ENCODING_VBR:
+                return read_vbr_64(stream, op->o.encoding_info.value);
+            case OP_ENCODING_CHAR6:
+                return decode_char6(read_fixed(stream, 6));
+            default:
+                stream->stream_err |= BITCODE_ERR_INTERNAL;
+                return 0;
+        }
+    }
+}
+static void append_value(struct bc_read_stream *stream, uint64_t val)
+{
+    RESIZE_ARRAY_IF_NECESSARY(stream->record_buf, stream->record_buf_size, stream->current_record_size+1);
+    stream->record_buf[stream->current_record_size++] = val;
+}
+static void read_user_abbreviated_record(struct bc_read_stream *stream,
+                                         struct abbrev_operand *ops,
+                                         int num_operands)
+{
+    stream->current_record_size = 0;
+    int i, j;
+    for(i = 0; i < num_operands; i++)
+    {
+        struct abbrev_operand *op = &ops[i];
+        if(op->type == EncodingInfo && op->o.encoding_info.encoding == OP_ENCODING_ARRAY)
+        {
+            int num_elements = read_vbr(stream, 6);
+            i += 1;
+            for(j = 0; j < num_elements; j++)
+                append_value(stream, read_abbrev_value(stream, &ops[i]));
+        }
+        else
+        {
+            uint64_t val = read_abbrev_value(stream, &ops[i]);
+            if(i == 0)
+            {
+                stream->record_id = val;
+            }
+            else
+            {
+                append_value(stream, val);
+            }
+        }
+    }
+}
+static int read_abbrev_op(struct bc_read_stream *stream, struct abbrev_operand *o, int array_ok)
+{
+    int is_literal = read_fixed(stream, 1);
+    if(is_literal)
+    {
+        o->type = Literal;
+        o->o.literal_value = read_vbr(stream, 8);
+    }
+    else
+    {
+        o->type = EncodingInfo;
+        o->o.encoding_info.encoding = read_fixed(stream, 3);
+        switch(o->o.encoding_info.encoding)
+        {
+            case OP_ENCODING_FIXED:
+            case OP_ENCODING_VBR:
+                o->o.encoding_info.value = read_vbr(stream, 5);
+                break;
+            case OP_ENCODING_ARRAY:
+                if(!array_ok) return -1;
+                break;
+            case OP_ENCODING_CHAR6:
+                break;
+        }
+    }
+    return 0;
+}
+void align_32_bits(struct bc_read_stream *stream)
+{
+    if(stream->num_next_bits != 32)
+        refill_next_bits(stream);
+}
+struct blockinfo *find_blockinfo(struct bc_read_stream *stream, uint32_t block_id)
+{
+    int i;
+    for(i = 0; i < stream->blockinfo_len; i++)
+        if(stream->blockinfos[i].block_id == block_id)
+            return &stream->blockinfos[i];
+    return NULL;
+}
+struct blockinfo *find_or_create_blockinfo(struct bc_read_stream *stream, int block_id)
+{
+    struct blockinfo *bi = find_blockinfo(stream, block_id);
+    if(bi)
+    {
+        return bi;
+    }
+    else
+    {
+        RESIZE_ARRAY_IF_NECESSARY(stream->blockinfos, stream->blockinfo_size, stream->blockinfo_len+1);
+        struct blockinfo *new_bi = &stream->blockinfos[stream->blockinfo_len++];
+        new_bi->block_id = block_id;
+        new_bi->num_abbreviations = 0;
+        new_bi->size_abbreviations = 8;
+        new_bi->abbreviations = malloc(new_bi->size_abbreviations * sizeof(*new_bi->abbreviations));
+        return new_bi;
+    }
+}
+static void pop_stack_frame(struct bc_read_stream *stream)
+{
+    stream->stream_stack_len = stream->block_metadata - stream->stream_stack;
+    if(stream->stream_stack_len == 0)
+    {
+        stream->record_type = Eof;
+        return;
+    }
+    stream->num_abbrevs = 0;
+    stream->block_metadata--;
+    while(stream->block_metadata->type == Abbreviation)
+    {
+        stream->num_abbrevs++;
+        stream->block_metadata--;
+    }
+    stream->abbrev_len = stream->block_metadata->e.block_metadata.abbrev_len;
+    stream->block_id   = stream->block_metadata->e.block_metadata.block_id;
+    stream->blockinfo  = find_blockinfo(stream, stream->block_id);
+}
+void bc_rs_next_record(struct bc_read_stream *stream)
+{
+    /* don't attempt to read past eof */
+    if(stream->record_type == Eof) return;
+    int abbrev_id = read_fixed(stream, stream->abbrev_len);
+    stream->current_record_offset = 0;
+    int i;
+    switch(abbrev_id) {
+        case ABBREV_ID_END_BLOCK:
+            stream->record_type = EndBlock;
+            stream->old_block_metadata = stream->block_metadata;
+            align_32_bits(stream);
+            pop_stack_frame(stream);
+            break;
+        case ABBREV_ID_ENTER_SUBBLOCK:
+            stream->block_id    = read_vbr(stream, 8);
+            stream->abbrev_len  = read_vbr(stream, 4);
+            align_32_bits(stream);
+            stream->block_len = read_fixed(stream, 32);
+            stream->record_type = StartBlock;
+            RESIZE_ARRAY_IF_NECESSARY(stream->stream_stack, stream->stream_stack_size,
+                                      stream->stream_stack_len+1);
+            stream->block_metadata = &stream->stream_stack[stream->stream_stack_len++];
+            stream->block_metadata->type = BlockMetadata;
+            stream->block_metadata->e.block_metadata.block_id   = stream->block_id;
+            stream->block_metadata->e.block_metadata.abbrev_len = stream->abbrev_len;
+            stream->block_metadata->e.block_metadata.block_offset = stream->stream_offset;
+            stream->block_metadata->e.block_metadata.block_len    = stream->block_len;
+            //printf("++ Entering block id=%d, offset=%d\n", stream->block_id, stream->stream_offset);
+            stream->blockinfo = find_or_create_blockinfo(stream, stream->block_id);
+            break;
+        case ABBREV_ID_DEFINE_ABBREV:
+            stream->record_type = DefineAbbrev;
+            stream->record_num_abbrev = read_vbr(stream, 5);
+            RESIZE_ARRAY_IF_NECESSARY(stream->record_abbrev_operands, stream->record_size_abbrev,
+                                      stream->record_num_abbrev);
+            for(i = 0; i < stream->record_num_abbrev; i++)
+            {
+                read_abbrev_op(stream, &stream->record_abbrev_operands[i], 0);
+            }
+            break;
+        case ABBREV_ID_UNABBREV_RECORD:
+            stream->record_type = DataRecord;
+            stream->record_id   = read_vbr(stream, 6);
+            stream->current_record_size = read_vbr(stream, 6);
+            RESIZE_ARRAY_IF_NECESSARY(stream->record_buf, stream->record_buf_size,
+                                      stream->current_record_size+1);
+            for(i = 0; i < stream->current_record_size; i++)
+                stream->record_buf[i] = read_vbr(stream, 6);
+            break;
+        default:
+        {
+            /* This must be a user-defined abbreviation.  It could come from the
+             * blockinfo-defined abbreviations or abbreviations defined in this
+             * block. */
+            stream->record_type = DataRecord;
+            int user_abbrev_id = abbrev_id - 4;
+            int num_blockinfo_abbrevs = stream->blockinfo ? stream->blockinfo->num_abbreviations : 0;
+            int block_abbrev_id = user_abbrev_id - num_blockinfo_abbrevs;
+            if(user_abbrev_id < num_blockinfo_abbrevs)
+            {
+                struct blockinfo_abbrev *a = &stream->blockinfo->abbreviations[user_abbrev_id];
+                read_user_abbreviated_record(stream, a->operands, a->num_operands);
+            }
+            else if(block_abbrev_id < stream->num_abbrevs)
+            {
+                struct stream_stack_entry *e = stream->block_metadata + block_abbrev_id + 1;
+                struct abbrev_operand *o = stream->abbrev_operands + e->e.abbrev.first_operand_offset;
+                read_user_abbreviated_record(stream, o, e->e.abbrev.num_operands);
+            }
+            else
+            {
+                stream->stream_err |= BITCODE_ERR_CORRUPT_INPUT;
+            }
+            break;
+        }
+    }
+}
+struct record_info bc_rs_next_data_record(struct bc_read_stream *stream)
+{
+    int i;
+    while(1)
+    {
+        bc_rs_next_record(stream);
+        if(stream->record_type == DefineAbbrev)
+        {
+            int num_ops = stream->record_num_abbrev;
+            RESIZE_ARRAY_IF_NECESSARY(stream->stream_stack, stream->stream_stack_size,
+                                      stream->stream_stack_len+1);
+            RESIZE_ARRAY_IF_NECESSARY(stream->abbrev_operands, stream->abbrev_operands_size,
+                                      stream->abbrev_operands_len+num_ops+1);
+            struct stream_stack_entry *e = &stream->stream_stack[stream->stream_stack_len++];
+            e->type = Abbreviation;
+            e->e.abbrev.first_operand_offset = stream->abbrev_operands_len;
+            e->e.abbrev.num_operands = num_ops;
+            struct abbrev_operand *abbrev_operands = &stream->abbrev_operands[stream->abbrev_operands_len];
+            stream->abbrev_operands_len += num_ops;
+            for(i = 0; i < num_ops; i++)
+                abbrev_operands[i] = stream->record_abbrev_operands[i];
+            stream->num_abbrevs++;
+        }
+        else if(stream->record_type == StartBlock && stream->block_id == STDBLOCK_BLOCKINFO)
+        {
+            /* The first record must be a SETBID record */
+            bc_rs_next_record(stream);
+            struct blockinfo *bi = NULL;
+            while(1)
+            {
+                if(stream->record_type == EndBlock)
+                {
+                    break;
+                }
+                else if(stream->record_type == Err || stream->record_type == Eof)
+                {
+                    struct record_info ri;
+                    ri.record_type = stream->record_type;
+                    ri.id = 0;
+                    return ri;
+                }
+                else if(stream->record_type == DataRecord)
+                {
+                    if(stream->record_id == BLOCKINFO_BLOCK_SETBID)
+                    {
+                        if(stream->current_record_size != 1)
+                        {
+                            /* TODO */
+                            stream->stream_err |= BITCODE_ERR_CORRUPT_INPUT;
+                        }
+                        bi = find_or_create_blockinfo(stream, stream->record_buf[0]);
+                    }
+                }
+                else if(stream->record_type == DefineAbbrev)
+                {
+                    if(bi == NULL)
+                    {
+                        /* TODO */
+                        stream->stream_err |= BITCODE_ERR_CORRUPT_INPUT;
+                    }
+                    RESIZE_ARRAY_IF_NECESSARY(bi->abbreviations,
+                                              bi->size_abbreviations, bi->num_abbreviations+1);
+                    struct blockinfo_abbrev *abbrev = &bi->abbreviations[bi->num_abbreviations++];
+                    abbrev->num_operands = stream->record_num_abbrev;
+                    abbrev->operands = malloc(sizeof(*abbrev->operands) * abbrev->num_operands);
+                    for(i = 0; i < abbrev->num_operands; i++)
+                        abbrev->operands[i] = stream->record_abbrev_operands[i];
+                }
+                bc_rs_next_record(stream);
+            }
+        }
+        else
+        {
+            struct record_info ri;
+            ri.record_type = stream->record_type;
+            ri.id = 0;
+            if(ri.record_type == StartBlock)      ri.id = stream->block_id;
+            else if(ri.record_type == DataRecord) ri.id = stream->record_id;
+            return ri;
+        }
+    }
+}
+int bc_rs_get_error(struct bc_read_stream *stream)
+{
+    return stream->stream_err;
+}
+int bc_rs_get_record_size(struct bc_read_stream *stream)
+{
+    return stream->current_record_size;
+}
+int bc_rs_get_remaining_record_size(struct bc_read_stream *stream)
+{
+    return stream->current_record_size - stream->current_record_offset;
+}
+void bc_rs_skip_block(struct bc_read_stream *stream)
+{
+    int offset = stream->block_metadata->e.block_metadata.block_offset  +
+                   (stream->block_metadata->e.block_metadata.block_len * 4);
+    if(stream->infile)
+        fseek(stream->infile, offset, SEEK_SET);
+    stream->stream_offset = offset-4;
+    refill_next_bits(stream);
+    pop_stack_frame(stream);
+}
+void bc_rs_rewind_block(struct bc_read_stream *stream)
+{
+    if(stream->record_type == EndBlock)
+    {
+        stream->num_abbrevs = stream->old_block_metadata - stream->block_metadata - 1;
+        stream->block_metadata = stream->old_block_metadata;
+        stream->abbrev_len = stream->block_metadata->e.block_metadata.abbrev_len;
+        stream->block_id   = stream->block_metadata->e.block_metadata.block_id;
+        stream->blockinfo  = find_or_create_blockinfo(stream, stream->block_id);
+        stream->stream_stack_len = stream->block_metadata - stream->stream_stack + 1;
+    }
+    int offset = stream->block_metadata->e.block_metadata.block_offset;
+    if(stream->infile)
+        fseek(stream->infile, offset, SEEK_SET);
+    stream->stream_offset = offset-4;
+    refill_next_bits(stream);
+    align_32_bits(stream);
+}
+/*
+ * Local Variables:
+ * c-file-style: "bsd"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim:et:sts=4:sw=4
+ */