RubyGems - optimus-ep - Versions diffs - 0.5 - Mend

optimus-ep 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

data/Rakefile +9 -0
data/bin/eprime2tabfile +165 -0
data/bin/stim.times +5 -0
data/bin/stim1.times +5 -0
data/bin/stim1_b.times +5 -0
data/bin/stim1_c.times +5 -0
data/bin/stim1_d.times +5 -0
data/bin/test_data.txt +278 -0
data/bin/test_data2.txt +277 -0
data/bin/test_eprime_stimfile.rb +20 -0
data/lib/calculator.rb +49 -0
data/lib/column_calculator.rb +308 -0
data/lib/eprime.rb +23 -0
data/lib/eprime_data.rb +154 -0
data/lib/eprime_reader.rb +105 -0
data/lib/eprimetab_parser.rb +21 -0
data/lib/excel_parser.rb +21 -0
data/lib/log_file_parser.rb +208 -0
data/lib/row_filter.rb +40 -0
data/lib/tabfile_parser.rb +55 -0
data/lib/tabfile_writer.rb +44 -0
data/lib/writers/stimtimes_writer.rb +97 -0
data/spec/calculator_spec.rb +56 -0
data/spec/column_calculator_spec.rb +368 -0
data/spec/eprime_data_spec.rb +202 -0
data/spec/eprime_reader_spec.rb +115 -0
data/spec/eprimetab_parser_spec.rb +23 -0
data/spec/excel_parser_spec.rb +26 -0
data/spec/log_file_parser_spec.rb +156 -0
data/spec/row_filter_spec.rb +32 -0
data/spec/samples/bad_excel_tsv.txt +4 -0
data/spec/samples/corrupt_log_file.txt +116 -0
data/spec/samples/eprime_tsv.txt +7 -0
data/spec/samples/excel_tsv.txt +5 -0
data/spec/samples/optimus_log.txt +110 -0
data/spec/samples/short_columns.txt +1 -0
data/spec/samples/sorted_columns.txt +1 -0
data/spec/samples/std_columns.txt +1 -0
data/spec/samples/unknown_type.txt +2 -0
data/spec/samples/unreadable_file +1 -0
data/spec/spec_helper.rb +98 -0
data/spec/tabfile_parser_spec.rb +62 -0
data/spec/tabfile_writer_spec.rb +91 -0
data/spec/writers/stimtimes_writer_spec.rb +16 -0
metadata +106 -0

data/lib/column_calculator.rb ADDED Viewed

@@ -0,0 +1,308 @@
+# Part of the Optimus package for managing E-Prime data
+#
+# Copyright (C) 2008 Board of Regents of the University of Wisconsin System
+#
+# Written by Nathan Vack <njvack@wisc.edu>, at the Waisman Laborotory for Brain
+# Imaging and Behavior, University of Wisconsin - Madison
+require 'calculator'
+module Eprime
+  # This implements columnwise and accumulator-style calculations for
+  # Eprime data. It generally allows four main kinds of columns:
+  # 1: Data columns -- columns backed directly by data
+  # 2: Computed columns -- columns computed by numerical operations of other columns in the same row
+  # 3: Copydown columns -- Columns equal to the last non-empty value of another column
+  # 4: Counter columns -- Columns that change value based on the contents of other columns -- generally to count.
+  #
+  # It's worth noting: columns may depend on other columns, as long as the dependency isn't circular.
+  # Currently, counter columns may behave strangely when used in and using computed columns -- a parser
+  # like the computed columns' parser is really needed.
+  class ColumnCalculator
+    attr_writer :data
+    attr_reader :columns
+    COLUMN_TYPES = %w(data_cols computed_cols copydown_cols counter_cols)
+    include Enumerable
+    def initialize
+      @columns = []
+      @columns_intern = []
+      @column_indexes = {}
+      @rows = []
+      COLUMN_TYPES.each do |type|
+        instance_variable_set("@#{type}", [])
+      end
+    end
+    def data=(data)
+      @data = data
+      @data_cols = []
+      @data.columns.each do |col_name|
+        @data_cols << DataColumn.new(col_name, @data)
+      end
+      set_columns!
+    end
+    def [](index)
+      compute_data! unless @computed
+      return @rows[index]
+    end
+    def column_index(col_id)
+      if col_id.is_a? Fixnum
+        return (col_id >= 0 and col_id < @columns.size) ? col_id : nil
+      end
+      return @column_indexes[col_id]
+    end
+    def column(col_id)
+      index = column_index(col_id)
+      raise IndexError.new("#{col_id} does not exist") if index.nil?
+      return @columns_intern[index]
+    end
+    def size
+      @data.size
+    end
+    def computed_column(name, expression)
+      @computed_cols << ComputedColumn.new(name, Expression.new(expression))
+      set_columns!
+    end
+    def copydown_column(name, copied_name)
+      @copydown_cols << CopydownColumn.new(name, copied_name)
+      set_columns!
+    end
+    def counter_column(name, options = {})
+      @counter_cols << CounterColumn.new(name, options)
+      set_columns!
+    end
+    def each
+      @data.each_index do |row_index|
+        yield self[row_index]
+      end
+      @rows
+    end
+    def self.compute(numeric_expression)
+      @@calculator.compute(numeric_expression)
+    end
+    private
+    def add_column(column)
+      # Raise an error if the column already exists
+      if @column_indexes[column.name]
+        raise ComputationError.new("#{column.name} already exists!")
+      end
+      # Save the index
+      @column_indexes[column.name] = @columns_intern.size
+      @columns_intern << column
+      @columns << column.name
+    end
+    def set_columns!
+      @columns = []
+      @columns_intern = []
+      @column_indexes = {}
+      COLUMN_TYPES.each do |type|
+        ar = instance_variable_get("@#{type}")
+        ar.each do |col|
+          add_column(col)
+        end
+      end
+      @computed = false
+    end
+    # Creates the infix calculator -- called at class instantiation time
+    def self.make_calculator
+      @@calculator = ::Eprime::Calculator.new
+    end
+    make_calculator
+    def compute_data!
+      @rows = []
+      @data.each_index do |row_index|
+        row = Row.new(self, @data[row_index])
+        COLUMN_TYPES.each do |type|
+          ar = instance_variable_get("@#{type}")
+          ar.each do |col|
+            row.compute(col.name)
+          end
+        end
+        @rows << row
+      end
+      @computed = true
+    end
+    class Column
+      attr_accessor :name
+      def initialize(name)
+        @name = name
+      end
+      # This should be overridden by subclasses
+      def compute(row, path = [])
+        return row[@name]
+      end
+    end
+    class DataColumn < Column
+      def initialize(name, data)
+        @data_index = data.find_column_index(name)
+        @data = data
+        super(name)
+      end
+    end
+    class CopydownColumn < Column
+      def initialize(name, copied_name)
+        super(name)
+        @last_val = ''
+        @copied_name = copied_name
+      end
+      def compute(row, path = [])
+        if !row[@copied_name].to_s.empty?
+          @last_val = row[@copied_name].to_s
+        end
+        return @last_val
+      end
+    end
+    class ComputedColumn < Column
+      def initialize(name, expression)
+        @expression = expression
+        super(name)
+      end
+      def compute(row, path = [])
+        return super(row) if super(row)
+        compute_str = @expression.to_s
+        if path.include?(@name)
+          raise ComputationError.new("#{compute_str} contains a loop with #{@name} -- can't compute")
+        end
+        column_names = @expression.columns
+        column_names.each do |col_name|
+          col = row.find_column(col_name)
+          val = col.compute(row, path+[@name])
+          if val.to_s.empty?
+            val = "0"
+          end
+          compute_str.gsub!("{#{col_name}}", val)
+        end
+        return ::Eprime::ColumnCalculator.compute(compute_str)
+      end
+    end
+    class CounterColumn < Column
+      STANDARD_OPTS = {
+        :start_value  => 0,
+        :count_by     => :succ,
+        :count_when   => lambda {|row| true},
+        :reset_when   => lambda {|row| false}
+      }
+      def initialize(name, options)
+        @options = STANDARD_OPTS.merge(options)
+        @start_value = @options[:start_value]
+        @count_by = @options[:count_by]
+        @count_when = @options[:count_when]
+        @reset_when = @options[:reset_when]
+        @current_value = @start_value
+        super(name)
+      end
+      def compute(row, path = [])
+        if @reset_when.call(row)
+          @current_value = @start_value
+        end
+        if @count_when.call(row)
+          if @count_by.is_a? Proc
+            @current_value = @count_by.call(@current_value)
+          elsif @count_by.is_a?(Symbol) || @count_by.is_a?(String)
+            @current_value = @current_value.send(@count_by)
+          else
+            @current_value = @current_value + @count_by
+          end
+        end
+        return @current_value
+      end
+    end
+    class Row
+      attr_reader :computed_data
+      def initialize(parent, rowdata)
+        @parent = parent
+        @rowdata = rowdata
+        @computed_data = []
+        # Add all the data columns to computed_data
+        rowdata.columns.each do |dcol_name|
+          index = @parent.column_index(dcol_name)
+          @computed_data[index] = rowdata[dcol_name]
+        end
+      end
+      def [](col_id)
+        if @parent.column_index(col_id).nil?
+          raise IndexError.new("#{col_id} does not exist")
+        end
+        return @computed_data[@parent.column_index(col_id)]
+      end
+      def find_column(column_name)
+        @parent.column(column_name)
+      end
+      # Recursively compute this column name and every column on which it depends
+      def compute(col_name)
+        raise ArgumentError.new("compute requires a column name") unless col_name.is_a? String
+        index = @parent.column_index(col_name)
+        col = @parent.column(col_name)
+        @computed_data[index] = col.compute(self)
+        return @computed_data[index]
+      end
+      private
+    end
+    class Expression
+      attr_reader :columns
+      COLUMN_FINDER = /\{([^}]*)\}/ # Finds strings like {foo} and {bar}
+      def initialize(expr_string)
+        @expr = expr_string
+        @columns = find_columns(expr_string).freeze
+      end
+      def to_s
+        @expr.dup
+      end
+      private
+      def find_columns(str)
+        return str.scan(COLUMN_FINDER).flatten
+      end
+    end
+    class ComputationError < Exception
+    end
+  end
+end

data/lib/eprime.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# Part of the Optimus package for managing E-Prime data
+#
+# Copyright (C) 2008 Board of Regents of the University of Wisconsin System
+#
+# Written by Nathan Vack <njvack@wisc.edu>, at the Waisman Laborotory for Brain
+# Imaging and Behavior, University of Wisconsin - Madison
+# Add our lib to the search path
+$: << File.expand_path(File.join(File.dirname(__FILE__), "..", "lib"))
+require 'eprime_data'
+require 'tabfile_writer'
+require 'eprime_reader'
+module Eprime
+  # Raised whenever an input file's type can't be detemined by Eprime::Reader
+  class UnknownTypeError < Exception; end
+  # Raised whenever an input file seems to be damaged
+  class DamagedFileError < Exception; end
+end

data/lib/eprime_data.rb ADDED Viewed

@@ -0,0 +1,154 @@
+# Part of the Optimus package for managing E-Prime data
+#
+# Copyright (C) 2008 Board of Regents of the University of Wisconsin System
+#
+# Written by Nathan Vack <njvack@wisc.edu>, at the Waisman Laborotory for Brain
+# Imaging and Behavior, University of Wisconsin - Madison
+module Eprime
+  # Raised when columns were specified at initialization time, and a novel
+  # column is added. Generally, this is an indication that Something is Funny.
+  class ColumnAddedWarning < Exception
+    # We want to be able to get the index out of this
+    attr_reader :index
+    def initialize(message, index)
+      @index = index
+      super(message)
+    end
+  end
+  # A generalized data structure for eprime files -- essentially just
+  # a table structure.
+  # I should be able to say:
+  # e_data = Eprime::Data.new
+  # e_data[0][0] for the first row / col
+  # e_data[0]['ExperimentName'] for the experiment name
+  # e_data[0][0] = "foo"
+  # e_data.add_row
+  # e_data[0]['kitteh'] = "cheezburger"
+  # For querying:
+  # Indexing numerically out of bounds should raise an exception
+  # Indexing textwise out of bounds should raise an exception
+  # For setting:
+  # Indexing numerically out of bounds should raise an exception
+  # Indexing textwise out of bounds should add a column
+  # So... you might reasonably do
+  # r = e_data.new_row()
+  # r['Stim.OnsetTime'] = '3521'
+  # One last thing: if you care about column ordering, but may be adding
+  # data in an arbitrary order (example: reading E-Prime log files),
+  # you can force a column order by passing an array of strings to
+  # Eprime::Data.new
+  class Data
+    attr_reader :columns
+    def initialize(columns = [], options = {})
+      @options = options || []
+      @rows = []
+      @columns = []
+      @column_hash = {}
+      @columns_set_in_initialize = false
+      if (columns && columns.length > 0)
+        columns.each do |col|
+          idx = self.find_or_add_column_index(col)
+        end
+        @columns_set_in_initialize = true
+      end
+    end
+    # Returns a new Eprime::Data object containing the data from this
+    # and all other data sets
+    def merge(*datasets)
+      d = Eprime::Data.new
+      return d.merge!(self, *datasets)
+    end
+    # Combine more Eprime::Data objects into this one, in-place
+    def merge!(*datasets)
+      datasets.each do |source|
+        source.each do |row|
+          r = self.add_row
+          source.columns.each do |col|
+            r[col] = row[col]
+          end
+        end
+      end
+      return self
+    end
+    # We mostly delegate to our rows array
+    def method_missing(method, *args, &block)
+      @rows.send method, *args, &block
+    end
+    def add_row
+      row = Row.new(self)
+      @rows << row
+      return row
+    end
+    def find_column_index(col_id)
+      if col_id.is_a? Fixnum
+        return (col_id < @columns.size) ? col_id : nil
+      end
+      # Short-circuit this
+      @column_hash[col_id] if @column_hash[col_id]
+    end
+    def find_or_add_column_index(col_id)
+      index_id = find_column_index(col_id)
+      # If index_id was a string, nil means we may want to add it. If it's a
+      # numeric index, we want to return nil from here -- we're not gonna add unnamed
+      # indexes.
+      return index_id if index_id or col_id.is_a?(Fixnum)
+      # In this case, we're adding a column...
+      @columns << col_id
+      index = @columns.length - 1
+      @column_hash[col_id] = index
+      if @columns_set_in_initialize and not @options[:ignore_warnings]
+        raise ColumnAddedWarning.new("Error: Added column #{col_id} after specifying columns at init", index)
+      end
+      return index
+    end
+    class Row
+      def initialize(parent)
+        @data = []
+        @parent = parent
+      end
+      def [](index)
+        num_index = @parent.find_column_index(index)
+        unless (num_index.is_a?(Fixnum) and @parent.columns.length > num_index)
+          raise IndexError.new("Column #{num_index} does not exist")
+        end
+        return @data[num_index]
+      end
+      def []=(index, value)
+        num_index = @parent.find_or_add_column_index(index)
+        if num_index.nil?
+          raise IndexError.new("Column #{num_index} does not exist")
+        end
+        @data[num_index] = value
+      end
+      def columns
+        @parent.columns
+      end
+      def values
+        vals = []
+        @parent.columns.each_index do |i|
+          vals[i] = @data[i]
+        end
+        return vals
+      end
+    end
+  end
+end

data/lib/eprime_reader.rb ADDED Viewed

@@ -0,0 +1,105 @@
+# Part of the Optimus package for managing E-Prime data
+#
+# Copyright (C) 2008 Board of Regents of the University of Wisconsin System
+#
+# Written by Nathan Vack <njvack@wisc.edu>, at the Waisman Laborotory for Brain
+# Imaging and Behavior, University of Wisconsin - Madison
+require 'log_file_parser'
+require 'excel_parser'
+require 'eprimetab_parser'
+module Eprime
+  # A class that should open any type of E-Prime text file and read it into
+  # an E-Prime data structure.
+  # This class isn't yet used anywhere.
+  class Reader
+    attr_reader :type, :parser, :input
+    attr_accessor :options
+    TYPES = {:log => LogfileParser, :excel => ExcelParser, :eprime => EprimetabParser}
+    def initialize(input = nil, options = {})
+      @options = options || {}
+      set_input(input) unless input.nil?
+    end
+    def input=(input)
+      set_input(input)
+    end
+    def eprime_data
+      @eprime_data ||= @parser.to_eprime
+      return @eprime_data
+    end
+    def options=(options)
+      @options = options || {}
+      set_parser!
+    end
+    private
+    def set_input(input)
+      @input = input
+      read_input!
+    end
+    # Reads the input, sets @type and @parser.
+    def read_input!
+      begin
+        set_type(@input)
+      rescue Exception => e
+        raise UnknownTypeError.new(e.message)
+      end
+    end
+    # Sets @type to one of Eprime::Reader::TYPES or raises an Eprime::UnknownTypeError
+    # Does not change file position.
+    def set_type(file)
+      @file = file
+      original_pos = @file.pos
+      @file.rewind
+      first_lines = Array.new
+      # We can tell what kind of file this is from the first two lines
+      # If there aren't two lines, this can't be a good file.
+      2.times do
+        first_lines << @file.gets
+      end
+      @file.pos = original_pos
+      @type = determine_file_type(first_lines)
+      if @type.nil?
+        raise UnknownTypeError.new("Can't determine the type of #{file.path}")
+      end
+      set_parser!
+    end
+    def set_parser!
+      @eprime_data = nil
+      return unless @type && TYPES[@type]
+      @parser = TYPES[@type].new(@file, @options)
+    end
+    # Determines the type of an eprime file, based on its first two lines.
+    # Returns one of [:log, :eprime_csv, :excel_csv, nil]
+    def determine_file_type(first_lines)
+      # Log files start with *** Header Start ***
+      #
+      # Excel files have a filename on the first line (no tabs); the second line
+      # contains at least three elements, tab-delimted
+      #
+      # eprime CSV files will have at least three tab-delimited elements on the first line
+      if first_lines[0].index("*** Header Start ***")
+        return :log
+      elsif (first_lines[0]["\t"].nil? and first_lines[1].split("\t").size >= 3)
+        return :excel
+      elsif (first_lines[0].split("\t").size >= 3 and first_lines[1].split("\t").size >= 3)
+        return :eprime
+      end
+      # Don't know? Return nil.
+      return nil
+    end
+  end
+end

data/lib/eprimetab_parser.rb ADDED Viewed

@@ -0,0 +1,21 @@
+# Part of the Optimus package for managing E-Prime data
+#
+# Copyright (C) 2008 Board of Regents of the University of Wisconsin System
+#
+# Written by Nathan Vack <njvack@wisc.edu>, at the Waisman Laborotory for Brain
+# Imaging and Behavior, University of Wisconsin - Madison
+# This almost completely delegates to TabfileParser
+require 'tabfile_parser'
+module Eprime
+  class Reader
+    class EprimetabParser < TabfileParser
+      def initialize(file, options = {})
+        options = options.merge(:skip_lines => 3)
+        super(file, options)
+      end
+    end
+  end
+end

data/lib/excel_parser.rb ADDED Viewed

@@ -0,0 +1,21 @@
+# Part of the Optimus package for managing E-Prime data
+#
+# Copyright (C) 2008 Board of Regents of the University of Wisconsin System
+#
+# Written by Nathan Vack <njvack@wisc.edu>, at the Waisman Laborotory for Brain
+# Imaging and Behavior, University of Wisconsin - Madison
+# This almost completely delegates to TabfileParser
+require 'tabfile_parser'
+module Eprime
+  class Reader
+    class ExcelParser < TabfileParser
+      def initialize(file, options = {})
+        options = options.merge(:skip_lines => 1)
+        super(file, options)
+      end
+    end
+  end
+end