RubyGems - yabfi - Versions diffs - 0.1.1 - Mend

yabfi 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +7 -0
data/.gitignore +10 -0
data/.rbenv-version +1 -0
data/.rspec +4 -0
data/.travis.yml +5 -0
data/Gemfile +3 -0
data/LICENSE.txt +21 -0
data/README.md +72 -0
data/Rakefile +53 -0
data/bin/yabfi +56 -0
data/example/brainfuck-to-c.b +31 -0
data/example/cat.b +1 -0
data/example/mandelbrot.b +144 -0
data/ext/yabfi/extconf.rb +7 -0
data/ext/yabfi/vm.c +340 -0
data/lib/yabfi/consumer.rb +131 -0
data/lib/yabfi/encoder.rb +21 -0
data/lib/yabfi/lexer.rb +73 -0
data/lib/yabfi/parser.rb +54 -0
data/lib/yabfi/unroll.rb +33 -0
data/lib/yabfi/version.rb +5 -0
data/lib/yabfi.rb +36 -0
data/spec/lib/yabfi/consumer_spec.rb +223 -0
data/spec/lib/yabfi/encoder_spec.rb +40 -0
data/spec/lib/yabfi/lexer_spec.rb +65 -0
data/spec/lib/yabfi/parser_spec.rb +13 -0
data/spec/lib/yabfi/unroll_spec.rb +46 -0
data/spec/lib/yabfi/vm_spec.rb +201 -0
data/spec/lib/yabfi_spec.rb +62 -0
data/spec/spec_helper.rb +21 -0
data/yabfi.gemspec +34 -0
metadata +219 -0

data/ext/yabfi/vm.c ADDED Viewed

@@ -0,0 +1,340 @@
+#include <ruby.h>
+/**
+ * Initially, the 256 ints are allocated for the VM. Whenever the memory_cursor
+ * advances beyond that, the memory size is doubled until it reaches 32768, at
+ * which point it will only allocate chunks of that size.
+ */
+#define INITIAL_MEMORY_SIZE 256
+#define MAX_REALLOCATION 32768
+/**
+ * Size of the temporary buffer used by PUT instructions.
+ */
+#define INITIAL_BUFFER_SIZE 32
+/**
+ * Constants that map human readable names to instruction codes.
+ */
+#define INSTRUCTION_CHANGE_VALUE 0
+#define INSTRUCTION_CHANGE_POINTER 1
+#define INSTRUCTION_GET 2
+#define INSTRUCTION_PUT 3
+#define INSTRUCTION_BRANCH_IF_ZERO 4
+#define INSTRUCTION_BRANCH_NOT_ZERO 5
+/**
+ * This struct represents a VM instruction.
+ */
+typedef struct {
+  int code;
+  int argument;
+} instruction;
+/**
+ * This struct contains the state of the VM.
+ */
+typedef struct {
+  VALUE input;
+  VALUE output;
+  int eof;
+  instruction *instructions;
+  size_t instructions_length;
+  size_t program_counter;
+  int *memory;
+  size_t memory_length;
+  size_t memory_cursor;
+} vm;
+/**
+ * Ruby classes in C!
+ */
+static VALUE rb_cYABFI;
+static VALUE rb_cBaseError;
+/**
+ * Document-class: YABFI::VM
+ *
+ * This class, which is implemented as a C extension, executes the
+ * instructions generated by the upstream ruby pipeline.
+ */
+static VALUE rb_cVM;
+/**
+ * Document-class: YABFI::VM::InvalidCommand
+ *
+ * Raised when an InvalidCommand is received by the VM.
+ */
+static VALUE rb_cInvalidCommand;
+/**
+ * Document-class: YABFI::VM::MemoryOutOfBounds
+ *
+ * Raised when the memory cursor is moved below zero.
+ */
+static VALUE rb_cMemoryOutOfBounds;
+/**
+ * Free the allocated memory for the virtual machine.
+ */
+static void
+vm_free(void *p) {
+  vm *ptr = p;
+  if (ptr->instructions_length > 0) {
+    free(ptr->instructions);
+  }
+  if (ptr->memory_length > 0) {
+    free(ptr->memory);
+  }
+}
+/**
+ * Allocate a new VM.
+ */
+static VALUE
+vm_alloc(VALUE klass) {
+  VALUE instance;
+  vm *ptr;
+  instance = Data_Make_Struct(klass, vm, NULL, vm_free, ptr);
+  ptr->input = Qnil;
+  ptr->output = Qnil;
+  ptr->eof = 0;
+  ptr->instructions = NULL;
+  ptr->instructions_length = 0;
+  ptr->program_counter = 0;
+  ptr->memory = NULL;
+  ptr->memory_length = 0;
+  ptr->memory_cursor = 0;
+  return instance;
+}
+/**
+ * Initialize a new VM.
+ *
+ * @param input [IO] the input from which the VM reads.
+ * @param output [IO] the output to which the VM writes.
+ * @param eof [Fixnum] the value to return when EOF is reached.
+ *
+ * @!parse [ruby]
+ *  class YABFI::VM
+ *    def initialize(input, output, eof)
+ *    end
+ *  end
+ */
+static VALUE
+vm_initialize(VALUE self, VALUE input, VALUE output, VALUE rb_eof) {
+  vm *ptr;
+  Check_Type(rb_eof, T_FIXNUM);
+  Data_Get_Struct(self, vm, ptr);
+  ptr->input = input;
+  ptr->output = output;
+  ptr->eof = NUM2INT(rb_eof);
+  return self;
+};
+/**
+ * Load the VM with new instructions.
+ *
+ * @param ary [Array<Object>] list of instructions to execute.
+ * @return [nil] unconditionally.
+ *
+ * @!parse [ruby]
+ *  class YABFI::VM
+ *    def load!(ary)
+ *    end
+ *  end
+ */
+static VALUE
+vm_load(VALUE self, VALUE ary) {
+  int iter;
+  vm *ptr;
+  VALUE entry, code, arg;
+  Data_Get_Struct(self, vm, ptr);
+  Check_Type(ary, T_ARRAY);
+  ptr->memory_cursor = 0;
+  ptr->memory_length = INITIAL_MEMORY_SIZE;
+  ptr->memory = calloc(INITIAL_MEMORY_SIZE, sizeof(int));
+  ptr->program_counter = 0;
+  ptr->instructions_length = RARRAY_LEN(ary);
+  ptr->instructions = malloc(sizeof(instruction) * ptr->instructions_length);
+  for (iter = 0; iter < (int) ptr->instructions_length; iter++) {
+    entry = rb_ary_entry(ary, iter);
+    Check_Type(entry, T_ARRAY);
+    if (RARRAY_LEN(entry) != 2) {
+      rb_raise(rb_cInvalidCommand, "Commands must be tuples");
+    }
+    code = rb_ary_entry(entry, 0);
+    arg = rb_ary_entry(entry, 1);
+    Check_Type(code, T_FIXNUM);
+    Check_Type(arg, T_FIXNUM);
+    ptr->instructions[iter] = (instruction) { FIX2INT(code), FIX2INT(arg) };
+  }
+  return Qnil;
+}
+/**
+ * Execute the instructions loaded into the VM.
+ *
+ * @raise [MemoryOutOfBounds] when the memory cursor goes below zero.
+ * @raise [InvalidCommand] when an invalid command is executed.
+ * @return [nil] unconditionally.
+ *
+ * @!parse [ruby]
+ *  class YABFI::VM
+ *    def execute!
+ *    end
+ *  end
+ */
+static VALUE
+vm_execute(VALUE self) {
+  vm *ptr;
+  char *buffer;
+  int *tmp_memory;
+  int buffer_size;
+  int delta;
+  int iter;
+  instruction curr;
+  Data_Get_Struct(self, vm, ptr);
+  buffer_size = INITIAL_BUFFER_SIZE;
+  buffer = malloc(buffer_size * sizeof(char));
+  while (ptr->program_counter < ptr->instructions_length) {
+    curr = ptr->instructions[ptr->program_counter];
+    switch (curr.code) {
+      case INSTRUCTION_CHANGE_VALUE:
+        ptr->memory[ptr->memory_cursor] += curr.argument;
+        ptr->program_counter++;
+        break;
+      case INSTRUCTION_CHANGE_POINTER:
+        if (((int) ptr->memory_cursor + curr.argument) < 0) {
+          rb_raise(rb_cMemoryOutOfBounds, "The memory cursor went below zero");
+        }
+        ptr->memory_cursor += curr.argument;
+        while (ptr->memory_cursor >= ptr->memory_length) {
+          delta = ptr->memory_length;
+          if (delta > MAX_REALLOCATION) {
+            delta = MAX_REALLOCATION;
+          }
+          tmp_memory = ptr->memory;
+          ptr->memory = malloc((ptr->memory_length + delta) * sizeof(int));
+          memcpy(ptr->memory, tmp_memory, ptr->memory_length * sizeof(int));
+          memset(ptr->memory + ptr->memory_length, 0, delta * sizeof(int));
+          ptr->memory_length += delta;
+          free(tmp_memory);
+        }
+        ptr->program_counter++;
+        break;
+      case INSTRUCTION_BRANCH_IF_ZERO:
+        if (ptr->memory[ptr->memory_cursor] == 0) {
+          ptr->program_counter += curr.argument;
+        } else {
+          ptr->program_counter++;
+        }
+        break;
+      case INSTRUCTION_BRANCH_NOT_ZERO:
+        if (ptr->memory[ptr->memory_cursor] != 0) {
+          ptr->program_counter += curr.argument;
+        } else {
+          ptr->program_counter++;
+        }
+        break;
+      case INSTRUCTION_GET:
+        for (iter = 0; iter < curr.argument; iter++) {
+          if (rb_funcall(ptr->input, rb_intern("eof?"), 0)) {
+            ptr->memory[ptr->memory_cursor] = ptr->eof;
+          } else {
+            ptr->memory[ptr->memory_cursor] =
+              FIX2INT(rb_funcall(ptr->input, rb_intern("getbyte"), 0));
+          }
+        }
+        ptr->program_counter++;
+        break;
+      case INSTRUCTION_PUT:
+        if (buffer_size < curr.argument) {
+          free(buffer);
+          buffer_size = curr.argument;
+          buffer = malloc(buffer_size * sizeof(char));
+        }
+        memset(buffer, ptr->memory[ptr->memory_cursor],
+            curr.argument * sizeof(char));
+        rb_funcall(ptr->output, rb_intern("write"), 1,
+            rb_str_new(buffer, curr.argument));
+        ptr->program_counter++;
+        break;
+      default:
+        free(buffer);
+        rb_raise(rb_cInvalidCommand, "Invalid command code: %i", curr.code);
+    }
+  }
+  free(buffer);
+  return Qnil;
+}
+/**
+ * Return the VM's internal state -- used in testing and debugging.
+ */
+static VALUE
+vm_state(VALUE self) {
+  vm *ptr;
+  VALUE hash;
+  Data_Get_Struct(self, vm, ptr);
+  hash = rb_hash_new();
+  rb_hash_aset(hash, ID2SYM(rb_intern("memory_cursor")),
+      INT2FIX(ptr->memory_cursor));
+  rb_hash_aset(hash, ID2SYM(rb_intern("memory_length")),
+      INT2FIX(ptr->memory_length));
+  rb_hash_aset(hash, ID2SYM(rb_intern("program_counter")),
+      INT2FIX(ptr->program_counter));
+  if (ptr->memory_cursor < ptr->memory_length) {
+    rb_hash_aset(hash, ID2SYM(rb_intern("current_value")),
+        INT2FIX(ptr->memory[ptr->memory_cursor]));
+  } else {
+    rb_hash_aset(hash, ID2SYM(rb_intern("current_value")), Qnil);
+  }
+  return hash;
+}
+/**
+ * Initialize the C extension by defining all of the classes and methods.
+ */
+void
+Init_vm(void) {
+  rb_cYABFI = rb_const_get(rb_cObject, rb_intern("YABFI"));
+  rb_cBaseError = rb_const_get(rb_cYABFI, rb_intern("BaseError"));
+  rb_cVM = rb_define_class_under(rb_cYABFI, "VM", rb_cObject);
+  rb_cInvalidCommand =
+    rb_define_class_under(rb_cVM, "InvalidCommand", rb_cBaseError);
+  rb_cMemoryOutOfBounds =
+    rb_define_class_under(rb_cVM, "MemoryOutOfBounds", rb_cBaseError);
+  rb_define_alloc_func(rb_cVM, vm_alloc);
+  rb_define_method(rb_cVM, "initialize", vm_initialize, 3);
+  rb_define_method(rb_cVM, "load!", vm_load, 1);
+  rb_define_method(rb_cVM, "execute!", vm_execute, 0);
+  rb_define_method(rb_cVM, "state", vm_state, 0);
+}

data/lib/yabfi/consumer.rb ADDED Viewed

@@ -0,0 +1,131 @@
+module YABFI
+  # This class provides generic methods to declaratively consume an Array of
+  # input.
+  class Consumer
+    # Raised when the expected input does not match the given input.
+    Unsatisfied = Class.new(BaseError)
+    # Raised when the end of input is reached.
+    EndOfInput = Class.new(BaseError)
+    # @attr_reader [Array<Object>] tokens to consume.
+    attr_reader :tokens
+    # Create a new Consumer.
+    #
+    # @param tokens [Array<Object>] consumer input.
+    def initialize(tokens)
+      @tokens = tokens
+    end
+    # Lazily evaluated _conumer_idx instnace variable.
+    #
+    # @return [Integer] of the current index of the input consumption.
+    def consume_index
+      @consume_index ||= 0
+    end
+    # Seek to the given posision.
+    #
+    # @param n [Integer] the integer to seek to.
+    def seek(n)
+      @consume_index = n
+    end
+    # Test if the parse has completed.
+    #
+    # @return [true, false] whether or not the input has been fully consumed.
+    def end_of_input?
+      consume_index >= tokens.length
+    end
+    # Look at the next character of input without advancing the consumption.
+    #
+    # @return [Object] the next token in the parse.
+    # @raise [EndOfInput] if the parse has completed.
+    def peek
+      fail EndOfInput, '#peek: end of input' if end_of_input?
+      tokens[consume_index]
+    end
+    # Look at the next character of input and advance the parse by one element.
+    #
+    # @return [Object] the next token in the parse.
+    # @raise [EndOfInput] if the parse has completed.
+    def advance
+      peek.tap { seek(consume_index.succ) }
+    end
+    # Given an optional error message and predicate, test if the next token in
+    # the parse satisfies the predicate.
+    #
+    # @param message [String] error message to throw when the condition is not
+    #                         satisfied.
+    # @yieldparam token [Object] the token to test.
+    # @return [Object] the satisfied token.
+    # @raise [EndOfInput] if the parse has completed.
+    # @raise [Unsatisfied] if the condition is not met.
+    def satisfy(message = nil)
+      message ||= '#satisfy:'
+      tok = peek
+      fail Unsatisfied, "#{message} '#{tok}'" unless yield(tok)
+      seek(consume_index.succ)
+      tok
+    end
+    # Declare that the next token in the stream should be the given token.
+    #
+    # @param expected [Object] next expected object in the parse.
+    # @return [Object] the satisfied token.
+    # @raise [EndOfInput] if the parse has completed.
+    # @raise [Unsatisfied] if the token does not equal the argument.
+    def eq(expected)
+      satisfy("Expected #{expected}, got:") { |tok| tok == expected }
+    end
+    # Declare that the next token in the stream should match the given token.
+    #
+    # @param toks [Array<Object>] list of objects that could match.
+    # @return [Object] the satisfied token.
+    # @raise [EndOfInput] if the parse has completed.
+    # @raise [Unsatisfied] if the token cannot me matched.
+    def one_of(*toks)
+      satisfy("Expected one of #{toks}, got:") { |tok| toks.include?(tok) }
+    end
+    # Try a block of code, resetting the parse state on failure.
+    #
+    # @return [Object, nil] the result of the block, or nil if the block fails.
+    def attempt
+      idx = consume_index
+      yield
+    rescue
+      seek(idx)
+      nil
+    end
+    # Consume 0 or more occurrences of the given block.
+    #
+    # @return [Object, nil] the result of the block, or nil if the block fails.
+    def many
+      idx = consume_index
+      results = []
+      loop do
+        idx = consume_index
+        results << yield
+      end
+    rescue
+      seek(idx)
+      results
+    end
+    # Consume 1 or more occurrences of the given block.
+    #
+    # @return [Object, nil] the result of the block, or nil if the block fails.
+    def many_one(&block)
+      many(&block).tap do |results|
+        fail Unsatisfied, '#many_one: got no results' if results.empty?
+      end
+    end
+  end
+end

data/lib/yabfi/encoder.rb ADDED Viewed

@@ -0,0 +1,21 @@
+module YABFI
+  # This module encodes the human-readable instruction names to integers.
+  module Encoder
+    # Mapping of human readable instruction names to their encoded integers.
+    INSTRUCTIONS = {
+      change_value: 0,
+      change_pointer: 1,
+      get: 2,
+      put: 3,
+      branch_if_zero: 4,
+      branch_not_zero: 5
+    }
+    module_function
+    # Encode a list of instructions into
+    def encode(ary)
+      ary.map { |(code, argument)| [INSTRUCTIONS[code], argument] }
+    end
+  end
+end

data/lib/yabfi/lexer.rb ADDED Viewed

@@ -0,0 +1,73 @@
+module YABFI
+  # This module consumes tokens produced by the Parser and produces an
+  # unoptimized syntax tree.
+  class Lexer < Consumer
+    # This Hash maps tokens to method names to optimize the performance of the
+    # lexer.
+    DISPATCH_TABLE = {
+      loop: :while_loop,
+      succ: :change_value,
+      pred: :change_value,
+      next: :change_pointer,
+      prev: :change_pointer,
+      get: :get,
+      put: :put
+    }
+    # Run the lexer on the given tokens.
+    #
+    # @param tokens [Array<Symbol>] the input tokens.
+    # @raise [Consumer::Error] when the lexing fails.
+    # @return [Array<Object>] the lexed syntax tree.
+    def self.run!(tokens)
+      new(tokens).send(:run!)
+    end
+    private
+    def run!
+      forest = commands
+      return forest if end_of_input?
+      fail Consumer::Unsatisfied, "Unexpected token #{peek}"
+    end
+    def commands
+      many { command }
+    end
+    def command
+      method = DISPATCH_TABLE[peek]
+      fail Consumer::Unsatisfied, "Unexpected token #{peek}" unless method
+      send(method)
+    end
+    def while_loop
+      eq(:loop)
+      inner = commands
+      eq(:end)
+      [:loop, inner]
+    end
+    def get
+      count = many_one { eq(:get) }.count
+      [:get, count]
+    end
+    def put
+      count = many_one { eq(:put) }.count
+      [:put, count]
+    end
+    def change_value
+      toks = many_one { one_of(:succ, :pred) }
+      total = toks.reduce(0) { |a, e| e == :succ ? a.succ : a.pred }
+      [:change_value, total]
+    end
+    def change_pointer
+      toks = many_one { one_of(:next, :prev) }
+      total = toks.reduce(0) { |a, e| e == :next ? a.succ : a.pred }
+      [:change_pointer, total]
+    end
+  end
+end

data/lib/yabfi/parser.rb ADDED Viewed

@@ -0,0 +1,54 @@
+module YABFI
+  # This module contains a set of functions that lazily parse an IO object and
+  # yield a symbol for each non-comment character that is read in.
+  module Parser
+    # Maximum number of bytes to read in from the IO object at a time.
+    DEFAULT_BUFFER_SIZE = 1_024
+    # Maps characters to human-readable Symbol command names.
+    COMMAND_MAPPINGS = {
+      '+' => :succ,
+      '-' => :pred,
+      '>' => :next,
+      '<' => :prev,
+      ',' => :get,
+      '.' => :put,
+      '[' => :loop,
+      ']' => :end
+    }
+    module_function
+    # Lazily parse an IO object while it still has input.
+    #
+    # @param io [IO] the object from which the parser lazily reads.
+    # @param buffer_size [Integer] maximum size to request from the IO at once.
+    # @yield [command] Symbol that represents the parsed command.
+    # @return [Enumator<Symbol>] of commands when no block is given.
+    def parse(io, buffer_size = DEFAULT_BUFFER_SIZE)
+      return enum_for(:parse, io, buffer_size) unless block_given?
+      loop do
+        buffer = read(io, buffer_size)
+        break unless buffer
+        buffer.each_char do |char|
+          command = COMMAND_MAPPINGS[char]
+          yield command if command
+        end
+      end
+    end
+    # Block waiting for the next set of commands.
+    #
+    # @param io [IO] the object from which the parser lazily reads.
+    # @param size [Integer] the maximum number of bytes to read in.
+    # @return [String, nil] the buffer of bytes read in, or nil on EOF.
+    def read(io, size)
+      io.read_nonblock(size)
+    rescue IO::WaitReadable
+      IO.select([io])
+      retry
+    rescue EOFError
+      nil
+    end
+  end
+end

data/lib/yabfi/unroll.rb ADDED Viewed

@@ -0,0 +1,33 @@
+module YABFI
+  # This module is used to transforms unrolls loops into multiple
+  # branch_if_zero and branch_not_zero instructions.
+  module Unroll
+    module_function
+    # Unroll an entire syntax forest.
+    #
+    # @param forest [Array<Object>] the forest to unroll.
+    # @return [Array<Object>] the unrolled commands.
+    def unroll(forest)
+      forest.each_with_object([]) do |(command, arg), ary|
+        if command == :loop
+          ary.push(*unroll_loop(arg))
+        else
+          ary.push([command, arg])
+        end
+      end
+    end
+    # Unroll a single loop of commands.
+    #
+    # @param commands [Array<Object>] the loop to unroll.
+    # @return [Array<Object>] the unrolled commands.
+    def unroll_loop(commands)
+      unroll(commands).tap do |unrolled|
+        offset = unrolled.length
+        unrolled.unshift([:branch_if_zero, offset + 2])
+        unrolled.push([:branch_not_zero, -1 * offset])
+      end
+    end
+  end
+end

data/lib/yabfi/version.rb ADDED Viewed

@@ -0,0 +1,5 @@
+# YABFI is the top level module for the gem.
+module YABFI
+  # Gem (semantic) version.
+  VERSION = '0.1.1'
+end

data/lib/yabfi.rb ADDED Viewed

@@ -0,0 +1,36 @@
+require 'stringio'
+# YABFI (Yet Another BrainFuck Interpreter) is the top level module for the gem.
+module YABFI
+  # This is the base error for the gem from which the rest of the errors
+  # subclass.
+  BaseError = Class.new(StandardError)
+  module_function
+  # Evaluate an IO of commands
+  #
+  # @param commands [String, IO] the commands to execute.
+  # @param input [IO] the input from which the commands read.
+  # @param output [IO] the output to which the commands write.
+  # @param eof [Integer] the value to set when EOF is reached.
+  # @raise [BaseError] when there is a compiling or execution error.
+  def eval!(commands, input: $stdin, output: $stdout, eof: 0)
+    io = commands.is_a?(String) ? StringIO.new(commands) : commands
+    tokens = Parser.parse(io)
+    lexed = Lexer.run!(tokens.to_a)
+    commands = Unroll.unroll(lexed)
+    encoded = Encoder.encode(commands)
+    vm = VM.new(input, output, eof)
+    vm.load!(encoded)
+    vm.execute!
+  end
+end
+require 'yabfi/version'
+require 'yabfi/consumer'
+require 'yabfi/parser'
+require 'yabfi/lexer'
+require 'yabfi/unroll'
+require 'yabfi/encoder'
+require 'yabfi/vm'