optimus-ep 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. data/Rakefile +9 -0
  2. data/bin/eprime2tabfile +165 -0
  3. data/bin/stim.times +5 -0
  4. data/bin/stim1.times +5 -0
  5. data/bin/stim1_b.times +5 -0
  6. data/bin/stim1_c.times +5 -0
  7. data/bin/stim1_d.times +5 -0
  8. data/bin/test_data.txt +278 -0
  9. data/bin/test_data2.txt +277 -0
  10. data/bin/test_eprime_stimfile.rb +20 -0
  11. data/lib/calculator.rb +49 -0
  12. data/lib/column_calculator.rb +308 -0
  13. data/lib/eprime.rb +23 -0
  14. data/lib/eprime_data.rb +154 -0
  15. data/lib/eprime_reader.rb +105 -0
  16. data/lib/eprimetab_parser.rb +21 -0
  17. data/lib/excel_parser.rb +21 -0
  18. data/lib/log_file_parser.rb +208 -0
  19. data/lib/row_filter.rb +40 -0
  20. data/lib/tabfile_parser.rb +55 -0
  21. data/lib/tabfile_writer.rb +44 -0
  22. data/lib/writers/stimtimes_writer.rb +97 -0
  23. data/spec/calculator_spec.rb +56 -0
  24. data/spec/column_calculator_spec.rb +368 -0
  25. data/spec/eprime_data_spec.rb +202 -0
  26. data/spec/eprime_reader_spec.rb +115 -0
  27. data/spec/eprimetab_parser_spec.rb +23 -0
  28. data/spec/excel_parser_spec.rb +26 -0
  29. data/spec/log_file_parser_spec.rb +156 -0
  30. data/spec/row_filter_spec.rb +32 -0
  31. data/spec/samples/bad_excel_tsv.txt +4 -0
  32. data/spec/samples/corrupt_log_file.txt +116 -0
  33. data/spec/samples/eprime_tsv.txt +7 -0
  34. data/spec/samples/excel_tsv.txt +5 -0
  35. data/spec/samples/optimus_log.txt +110 -0
  36. data/spec/samples/short_columns.txt +1 -0
  37. data/spec/samples/sorted_columns.txt +1 -0
  38. data/spec/samples/std_columns.txt +1 -0
  39. data/spec/samples/unknown_type.txt +2 -0
  40. data/spec/samples/unreadable_file +1 -0
  41. data/spec/spec_helper.rb +98 -0
  42. data/spec/tabfile_parser_spec.rb +62 -0
  43. data/spec/tabfile_writer_spec.rb +91 -0
  44. data/spec/writers/stimtimes_writer_spec.rb +16 -0
  45. metadata +106 -0
@@ -0,0 +1,308 @@
1
+ # Part of the Optimus package for managing E-Prime data
2
+ #
3
+ # Copyright (C) 2008 Board of Regents of the University of Wisconsin System
4
+ #
5
+ # Written by Nathan Vack <njvack@wisc.edu>, at the Waisman Laborotory for Brain
6
+ # Imaging and Behavior, University of Wisconsin - Madison
7
+
8
+ require 'calculator'
9
+
10
+ module Eprime
11
+
12
+ # This implements columnwise and accumulator-style calculations for
13
+ # Eprime data. It generally allows four main kinds of columns:
14
+ # 1: Data columns -- columns backed directly by data
15
+ # 2: Computed columns -- columns computed by numerical operations of other columns in the same row
16
+ # 3: Copydown columns -- Columns equal to the last non-empty value of another column
17
+ # 4: Counter columns -- Columns that change value based on the contents of other columns -- generally to count.
18
+ #
19
+ # It's worth noting: columns may depend on other columns, as long as the dependency isn't circular.
20
+ # Currently, counter columns may behave strangely when used in and using computed columns -- a parser
21
+ # like the computed columns' parser is really needed.
22
+
23
+ class ColumnCalculator
24
+ attr_writer :data
25
+ attr_reader :columns
26
+
27
+ COLUMN_TYPES = %w(data_cols computed_cols copydown_cols counter_cols)
28
+ include Enumerable
29
+
30
+ def initialize
31
+ @columns = []
32
+ @columns_intern = []
33
+ @column_indexes = {}
34
+ @rows = []
35
+ COLUMN_TYPES.each do |type|
36
+ instance_variable_set("@#{type}", [])
37
+ end
38
+ end
39
+
40
+ def data=(data)
41
+ @data = data
42
+
43
+ @data_cols = []
44
+ @data.columns.each do |col_name|
45
+ @data_cols << DataColumn.new(col_name, @data)
46
+ end
47
+ set_columns!
48
+ end
49
+
50
+ def [](index)
51
+ compute_data! unless @computed
52
+ return @rows[index]
53
+ end
54
+
55
+ def column_index(col_id)
56
+ if col_id.is_a? Fixnum
57
+ return (col_id >= 0 and col_id < @columns.size) ? col_id : nil
58
+ end
59
+ return @column_indexes[col_id]
60
+ end
61
+
62
+ def column(col_id)
63
+ index = column_index(col_id)
64
+ raise IndexError.new("#{col_id} does not exist") if index.nil?
65
+ return @columns_intern[index]
66
+ end
67
+
68
+ def size
69
+ @data.size
70
+ end
71
+
72
+ def computed_column(name, expression)
73
+ @computed_cols << ComputedColumn.new(name, Expression.new(expression))
74
+ set_columns!
75
+ end
76
+
77
+ def copydown_column(name, copied_name)
78
+ @copydown_cols << CopydownColumn.new(name, copied_name)
79
+ set_columns!
80
+ end
81
+
82
+ def counter_column(name, options = {})
83
+ @counter_cols << CounterColumn.new(name, options)
84
+ set_columns!
85
+ end
86
+
87
+ def each
88
+ @data.each_index do |row_index|
89
+ yield self[row_index]
90
+ end
91
+ @rows
92
+ end
93
+
94
+ def self.compute(numeric_expression)
95
+ @@calculator.compute(numeric_expression)
96
+ end
97
+
98
+ private
99
+
100
+ def add_column(column)
101
+ # Raise an error if the column already exists
102
+ if @column_indexes[column.name]
103
+ raise ComputationError.new("#{column.name} already exists!")
104
+ end
105
+ # Save the index
106
+ @column_indexes[column.name] = @columns_intern.size
107
+ @columns_intern << column
108
+ @columns << column.name
109
+ end
110
+
111
+ def set_columns!
112
+ @columns = []
113
+ @columns_intern = []
114
+ @column_indexes = {}
115
+ COLUMN_TYPES.each do |type|
116
+ ar = instance_variable_get("@#{type}")
117
+ ar.each do |col|
118
+ add_column(col)
119
+ end
120
+ end
121
+ @computed = false
122
+ end
123
+
124
+ # Creates the infix calculator -- called at class instantiation time
125
+ def self.make_calculator
126
+ @@calculator = ::Eprime::Calculator.new
127
+ end
128
+ make_calculator
129
+
130
+ def compute_data!
131
+ @rows = []
132
+ @data.each_index do |row_index|
133
+ row = Row.new(self, @data[row_index])
134
+ COLUMN_TYPES.each do |type|
135
+ ar = instance_variable_get("@#{type}")
136
+ ar.each do |col|
137
+ row.compute(col.name)
138
+ end
139
+ end
140
+ @rows << row
141
+ end
142
+ @computed = true
143
+ end
144
+
145
+ class Column
146
+ attr_accessor :name
147
+
148
+ def initialize(name)
149
+ @name = name
150
+ end
151
+
152
+ # This should be overridden by subclasses
153
+ def compute(row, path = [])
154
+ return row[@name]
155
+ end
156
+ end
157
+
158
+ class DataColumn < Column
159
+ def initialize(name, data)
160
+ @data_index = data.find_column_index(name)
161
+ @data = data
162
+ super(name)
163
+ end
164
+ end
165
+
166
+ class CopydownColumn < Column
167
+ def initialize(name, copied_name)
168
+ super(name)
169
+ @last_val = ''
170
+ @copied_name = copied_name
171
+ end
172
+
173
+ def compute(row, path = [])
174
+ if !row[@copied_name].to_s.empty?
175
+ @last_val = row[@copied_name].to_s
176
+ end
177
+ return @last_val
178
+ end
179
+ end
180
+
181
+ class ComputedColumn < Column
182
+
183
+ def initialize(name, expression)
184
+ @expression = expression
185
+ super(name)
186
+ end
187
+
188
+ def compute(row, path = [])
189
+ return super(row) if super(row)
190
+
191
+ compute_str = @expression.to_s
192
+ if path.include?(@name)
193
+ raise ComputationError.new("#{compute_str} contains a loop with #{@name} -- can't compute")
194
+ end
195
+
196
+ column_names = @expression.columns
197
+ column_names.each do |col_name|
198
+ col = row.find_column(col_name)
199
+ val = col.compute(row, path+[@name])
200
+ if val.to_s.empty?
201
+ val = "0"
202
+ end
203
+ compute_str.gsub!("{#{col_name}}", val)
204
+ end
205
+ return ::Eprime::ColumnCalculator.compute(compute_str)
206
+ end
207
+ end
208
+
209
+ class CounterColumn < Column
210
+ STANDARD_OPTS = {
211
+ :start_value => 0,
212
+ :count_by => :succ,
213
+ :count_when => lambda {|row| true},
214
+ :reset_when => lambda {|row| false}
215
+ }
216
+ def initialize(name, options)
217
+ @options = STANDARD_OPTS.merge(options)
218
+ @start_value = @options[:start_value]
219
+ @count_by = @options[:count_by]
220
+ @count_when = @options[:count_when]
221
+ @reset_when = @options[:reset_when]
222
+ @current_value = @start_value
223
+ super(name)
224
+ end
225
+
226
+ def compute(row, path = [])
227
+ if @reset_when.call(row)
228
+ @current_value = @start_value
229
+ end
230
+
231
+ if @count_when.call(row)
232
+ if @count_by.is_a? Proc
233
+ @current_value = @count_by.call(@current_value)
234
+ elsif @count_by.is_a?(Symbol) || @count_by.is_a?(String)
235
+ @current_value = @current_value.send(@count_by)
236
+ else
237
+ @current_value = @current_value + @count_by
238
+ end
239
+ end
240
+
241
+ return @current_value
242
+ end
243
+ end
244
+
245
+ class Row
246
+ attr_reader :computed_data
247
+
248
+ def initialize(parent, rowdata)
249
+ @parent = parent
250
+ @rowdata = rowdata
251
+ @computed_data = []
252
+ # Add all the data columns to computed_data
253
+ rowdata.columns.each do |dcol_name|
254
+ index = @parent.column_index(dcol_name)
255
+ @computed_data[index] = rowdata[dcol_name]
256
+ end
257
+ end
258
+
259
+ def [](col_id)
260
+ if @parent.column_index(col_id).nil?
261
+ raise IndexError.new("#{col_id} does not exist")
262
+ end
263
+ return @computed_data[@parent.column_index(col_id)]
264
+ end
265
+
266
+ def find_column(column_name)
267
+ @parent.column(column_name)
268
+ end
269
+
270
+
271
+ # Recursively compute this column name and every column on which it depends
272
+ def compute(col_name)
273
+ raise ArgumentError.new("compute requires a column name") unless col_name.is_a? String
274
+
275
+ index = @parent.column_index(col_name)
276
+ col = @parent.column(col_name)
277
+ @computed_data[index] = col.compute(self)
278
+ return @computed_data[index]
279
+ end
280
+
281
+ private
282
+
283
+ end
284
+
285
+ class Expression
286
+ attr_reader :columns
287
+
288
+ COLUMN_FINDER = /\{([^}]*)\}/ # Finds strings like {foo} and {bar}
289
+ def initialize(expr_string)
290
+ @expr = expr_string
291
+ @columns = find_columns(expr_string).freeze
292
+ end
293
+
294
+ def to_s
295
+ @expr.dup
296
+ end
297
+
298
+ private
299
+ def find_columns(str)
300
+ return str.scan(COLUMN_FINDER).flatten
301
+ end
302
+ end
303
+
304
+ class ComputationError < Exception
305
+
306
+ end
307
+ end
308
+ end
data/lib/eprime.rb ADDED
@@ -0,0 +1,23 @@
1
+ # Part of the Optimus package for managing E-Prime data
2
+ #
3
+ # Copyright (C) 2008 Board of Regents of the University of Wisconsin System
4
+ #
5
+ # Written by Nathan Vack <njvack@wisc.edu>, at the Waisman Laborotory for Brain
6
+ # Imaging and Behavior, University of Wisconsin - Madison
7
+
8
+ # Add our lib to the search path
9
+ $: << File.expand_path(File.join(File.dirname(__FILE__), "..", "lib"))
10
+
11
+ require 'eprime_data'
12
+ require 'tabfile_writer'
13
+ require 'eprime_reader'
14
+
15
+ module Eprime
16
+
17
+ # Raised whenever an input file's type can't be detemined by Eprime::Reader
18
+ class UnknownTypeError < Exception; end
19
+
20
+ # Raised whenever an input file seems to be damaged
21
+ class DamagedFileError < Exception; end
22
+
23
+ end
@@ -0,0 +1,154 @@
1
+ # Part of the Optimus package for managing E-Prime data
2
+ #
3
+ # Copyright (C) 2008 Board of Regents of the University of Wisconsin System
4
+ #
5
+ # Written by Nathan Vack <njvack@wisc.edu>, at the Waisman Laborotory for Brain
6
+ # Imaging and Behavior, University of Wisconsin - Madison
7
+
8
+ module Eprime
9
+
10
+ # Raised when columns were specified at initialization time, and a novel
11
+ # column is added. Generally, this is an indication that Something is Funny.
12
+ class ColumnAddedWarning < Exception
13
+ # We want to be able to get the index out of this
14
+ attr_reader :index
15
+ def initialize(message, index)
16
+ @index = index
17
+ super(message)
18
+ end
19
+ end
20
+
21
+ # A generalized data structure for eprime files -- essentially just
22
+ # a table structure.
23
+ # I should be able to say:
24
+ # e_data = Eprime::Data.new
25
+ # e_data[0][0] for the first row / col
26
+ # e_data[0]['ExperimentName'] for the experiment name
27
+ # e_data[0][0] = "foo"
28
+ # e_data.add_row
29
+ # e_data[0]['kitteh'] = "cheezburger"
30
+ # For querying:
31
+ # Indexing numerically out of bounds should raise an exception
32
+ # Indexing textwise out of bounds should raise an exception
33
+ # For setting:
34
+ # Indexing numerically out of bounds should raise an exception
35
+ # Indexing textwise out of bounds should add a column
36
+ # So... you might reasonably do
37
+ # r = e_data.new_row()
38
+ # r['Stim.OnsetTime'] = '3521'
39
+ # One last thing: if you care about column ordering, but may be adding
40
+ # data in an arbitrary order (example: reading E-Prime log files),
41
+ # you can force a column order by passing an array of strings to
42
+ # Eprime::Data.new
43
+
44
+ class Data
45
+
46
+ attr_reader :columns
47
+
48
+ def initialize(columns = [], options = {})
49
+ @options = options || []
50
+ @rows = []
51
+ @columns = []
52
+ @column_hash = {}
53
+ @columns_set_in_initialize = false
54
+ if (columns && columns.length > 0)
55
+ columns.each do |col|
56
+ idx = self.find_or_add_column_index(col)
57
+ end
58
+ @columns_set_in_initialize = true
59
+ end
60
+ end
61
+
62
+ # Returns a new Eprime::Data object containing the data from this
63
+ # and all other data sets
64
+ def merge(*datasets)
65
+ d = Eprime::Data.new
66
+ return d.merge!(self, *datasets)
67
+ end
68
+
69
+ # Combine more Eprime::Data objects into this one, in-place
70
+ def merge!(*datasets)
71
+ datasets.each do |source|
72
+ source.each do |row|
73
+ r = self.add_row
74
+ source.columns.each do |col|
75
+ r[col] = row[col]
76
+ end
77
+ end
78
+ end
79
+ return self
80
+ end
81
+
82
+ # We mostly delegate to our rows array
83
+ def method_missing(method, *args, &block)
84
+ @rows.send method, *args, &block
85
+ end
86
+
87
+ def add_row
88
+ row = Row.new(self)
89
+ @rows << row
90
+ return row
91
+ end
92
+
93
+ def find_column_index(col_id)
94
+ if col_id.is_a? Fixnum
95
+ return (col_id < @columns.size) ? col_id : nil
96
+ end
97
+ # Short-circuit this
98
+ @column_hash[col_id] if @column_hash[col_id]
99
+ end
100
+
101
+ def find_or_add_column_index(col_id)
102
+ index_id = find_column_index(col_id)
103
+ # If index_id was a string, nil means we may want to add it. If it's a
104
+ # numeric index, we want to return nil from here -- we're not gonna add unnamed
105
+ # indexes.
106
+ return index_id if index_id or col_id.is_a?(Fixnum)
107
+ # In this case, we're adding a column...
108
+ @columns << col_id
109
+ index = @columns.length - 1
110
+ @column_hash[col_id] = index
111
+ if @columns_set_in_initialize and not @options[:ignore_warnings]
112
+ raise ColumnAddedWarning.new("Error: Added column #{col_id} after specifying columns at init", index)
113
+ end
114
+ return index
115
+ end
116
+
117
+ class Row
118
+ def initialize(parent)
119
+ @data = []
120
+ @parent = parent
121
+ end
122
+
123
+ def [](index)
124
+ num_index = @parent.find_column_index(index)
125
+ unless (num_index.is_a?(Fixnum) and @parent.columns.length > num_index)
126
+ raise IndexError.new("Column #{num_index} does not exist")
127
+ end
128
+ return @data[num_index]
129
+ end
130
+
131
+ def []=(index, value)
132
+ num_index = @parent.find_or_add_column_index(index)
133
+ if num_index.nil?
134
+ raise IndexError.new("Column #{num_index} does not exist")
135
+ end
136
+ @data[num_index] = value
137
+ end
138
+
139
+ def columns
140
+ @parent.columns
141
+ end
142
+
143
+ def values
144
+ vals = []
145
+ @parent.columns.each_index do |i|
146
+ vals[i] = @data[i]
147
+ end
148
+ return vals
149
+ end
150
+
151
+ end
152
+ end
153
+
154
+ end
@@ -0,0 +1,105 @@
1
+ # Part of the Optimus package for managing E-Prime data
2
+ #
3
+ # Copyright (C) 2008 Board of Regents of the University of Wisconsin System
4
+ #
5
+ # Written by Nathan Vack <njvack@wisc.edu>, at the Waisman Laborotory for Brain
6
+ # Imaging and Behavior, University of Wisconsin - Madison
7
+
8
+ require 'log_file_parser'
9
+ require 'excel_parser'
10
+ require 'eprimetab_parser'
11
+
12
+ module Eprime
13
+
14
+ # A class that should open any type of E-Prime text file and read it into
15
+ # an E-Prime data structure.
16
+ # This class isn't yet used anywhere.
17
+ class Reader
18
+
19
+ attr_reader :type, :parser, :input
20
+ attr_accessor :options
21
+
22
+ TYPES = {:log => LogfileParser, :excel => ExcelParser, :eprime => EprimetabParser}
23
+ def initialize(input = nil, options = {})
24
+ @options = options || {}
25
+ set_input(input) unless input.nil?
26
+ end
27
+
28
+ def input=(input)
29
+ set_input(input)
30
+ end
31
+
32
+ def eprime_data
33
+ @eprime_data ||= @parser.to_eprime
34
+ return @eprime_data
35
+ end
36
+
37
+ def options=(options)
38
+ @options = options || {}
39
+ set_parser!
40
+ end
41
+
42
+ private
43
+ def set_input(input)
44
+ @input = input
45
+ read_input!
46
+ end
47
+
48
+ # Reads the input, sets @type and @parser.
49
+ def read_input!
50
+ begin
51
+ set_type(@input)
52
+ rescue Exception => e
53
+ raise UnknownTypeError.new(e.message)
54
+ end
55
+ end
56
+
57
+
58
+ # Sets @type to one of Eprime::Reader::TYPES or raises an Eprime::UnknownTypeError
59
+ # Does not change file position.
60
+ def set_type(file)
61
+ @file = file
62
+ original_pos = @file.pos
63
+ @file.rewind
64
+ first_lines = Array.new
65
+ # We can tell what kind of file this is from the first two lines
66
+ # If there aren't two lines, this can't be a good file.
67
+ 2.times do
68
+ first_lines << @file.gets
69
+ end
70
+ @file.pos = original_pos
71
+ @type = determine_file_type(first_lines)
72
+ if @type.nil?
73
+ raise UnknownTypeError.new("Can't determine the type of #{file.path}")
74
+ end
75
+ set_parser!
76
+ end
77
+
78
+ def set_parser!
79
+ @eprime_data = nil
80
+ return unless @type && TYPES[@type]
81
+ @parser = TYPES[@type].new(@file, @options)
82
+ end
83
+
84
+ # Determines the type of an eprime file, based on its first two lines.
85
+ # Returns one of [:log, :eprime_csv, :excel_csv, nil]
86
+ def determine_file_type(first_lines)
87
+ # Log files start with *** Header Start ***
88
+ #
89
+ # Excel files have a filename on the first line (no tabs); the second line
90
+ # contains at least three elements, tab-delimted
91
+ #
92
+ # eprime CSV files will have at least three tab-delimited elements on the first line
93
+
94
+ if first_lines[0].index("*** Header Start ***")
95
+ return :log
96
+ elsif (first_lines[0]["\t"].nil? and first_lines[1].split("\t").size >= 3)
97
+ return :excel
98
+ elsif (first_lines[0].split("\t").size >= 3 and first_lines[1].split("\t").size >= 3)
99
+ return :eprime
100
+ end
101
+ # Don't know? Return nil.
102
+ return nil
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,21 @@
1
+ # Part of the Optimus package for managing E-Prime data
2
+ #
3
+ # Copyright (C) 2008 Board of Regents of the University of Wisconsin System
4
+ #
5
+ # Written by Nathan Vack <njvack@wisc.edu>, at the Waisman Laborotory for Brain
6
+ # Imaging and Behavior, University of Wisconsin - Madison
7
+
8
+ # This almost completely delegates to TabfileParser
9
+
10
+ require 'tabfile_parser'
11
+
12
+ module Eprime
13
+ class Reader
14
+ class EprimetabParser < TabfileParser
15
+ def initialize(file, options = {})
16
+ options = options.merge(:skip_lines => 3)
17
+ super(file, options)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ # Part of the Optimus package for managing E-Prime data
2
+ #
3
+ # Copyright (C) 2008 Board of Regents of the University of Wisconsin System
4
+ #
5
+ # Written by Nathan Vack <njvack@wisc.edu>, at the Waisman Laborotory for Brain
6
+ # Imaging and Behavior, University of Wisconsin - Madison
7
+
8
+ # This almost completely delegates to TabfileParser
9
+
10
+ require 'tabfile_parser'
11
+
12
+ module Eprime
13
+ class Reader
14
+ class ExcelParser < TabfileParser
15
+ def initialize(file, options = {})
16
+ options = options.merge(:skip_lines => 1)
17
+ super(file, options)
18
+ end
19
+ end
20
+ end
21
+ end