RubyGems - datafarming - Versions diffs - 1.0.0 - Mend

datafarming 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +7 -0
data/README.md +49 -0
data/datafarming.gemspec +24 -0
data/exe/augment_design.rb +66 -0
data/exe/batchrunner.rb +46 -0
data/exe/blank2csv.rb +40 -0
data/exe/cat.rb +36 -0
data/exe/convert_line_endings.rb +38 -0
data/exe/cross.rb +44 -0
data/exe/csv2blank.rb +39 -0
data/exe/mser.rb +71 -0
data/exe/pool_files.rb +70 -0
data/exe/rundesign_general.rb +85 -0
data/exe/stack_nolhs.rb +172 -0
data/exe/stripheaderdups.rb +53 -0
data/exe/stripheaders.rb +49 -0
data/lib/datafarming/cross.rb +18 -0
data/lib/datafarming/error_handling.rb +27 -0
data/lib/datafarming/nolh_designs.rb +901 -0
metadata +117 -0

data/exe/pool_files.rb ADDED Viewed

@@ -0,0 +1,70 @@
+#!/usr/bin/env ruby -w
+# Ruby script to pool the columns of multiple csv files
+require 'colorize'
+String.disable_colorization false
+require 'optparse'
+require 'datafarming/error_handling'
+help_msg = [
+  'Pool the output from two or more CSV files to a single output file.', '',
+  'The first line of output is the list of filenames that were the',
+  'source files of the data to be merged.  Subsequent lines are the',
+  'contents of those files, and are assumed to be in CSV format.',
+  'Output is written to ' + 'stdout'.blue + ' in CSV format.', '',
+  'Syntax:',
+  "\n\t#{ErrorHandling.prog_name} [--help] ".yellow +
+    '[--no-labels] filenames...'.yellow, '',
+  "Arguments in square brackets are optional.  A vertical bar '|'",
+  'indicates valid alternatives for invoking the option.  Prefix',
+  'the command with "' + 'ruby'.yellow +
+  '" if it is not on your PATH.', '',
+  '  --help | -h | -? | ?'.green,
+  "\tProduce this help message.",
+  '  --no-labels | -n'.green,
+  "\tSpecify that individual files do not have labels.",
+  '  filenames...'.green,
+  "\tThe names of two or more files containing data to be pooled.",
+  "\tInput file data can be delimited by commas, semicolons,",
+  "\tcolons, or whitespace."
+]
+no_labels = false
+OptionParser.new do |opts|
+  opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...[]"
+  opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
+  opts.on('-n', '--no-labels') { no_labels = true }
+end.parse!
+ErrorHandling.clean_abort help_msg if ARGV[0] == '?' || ARGV.length < 2
+old_filename = nil
+line_set = nil
+allfiles = []
+# Read in all data from all files, resetting the line_set
+# for each new file
+ARGF.each do |line|
+  if ARGF.filename == old_filename
+    line_set << line.strip
+  else
+    old_filename = ARGF.filename
+    line_set && allfiles << line_set
+    line_set = []
+    line_set << if no_labels
+      line.strip
+    else
+      line.strip.split(',').map{ |elt| old_filename + '::' + elt }.join(',')
+    end
+  end
+end
+allfiles << line_set
+# Equalize all vectors to same length by padding with nils if needed...
+max_length = allfiles.map(&:length).max
+allfiles.each { |v| v[max_length - 1] = nil unless v.length == max_length }
+# ...and output all the data
+allfiles.transpose.each { |row| puts row.join(',') }

data/exe/rundesign_general.rb ADDED Viewed

@@ -0,0 +1,85 @@
+#!/usr/bin/env ruby -w
+require 'colorize'
+String.disable_colorization false
+require 'optparse'
+require 'datafarming/error_handling'
+help_msg = [
+  'Run control to apply a designed experiment to a model with replication.', '',
+  'This script assumes that the model uses command-line arguments to',
+  'set factor values at run-time.', '',
+  'Syntax:',
+  "\n\t#{ErrorHandling.prog_name} [OPTIONS] ".yellow +
+    "'CMD' DOE_FILE #REPS OUTPUT_FILE".yellow, '',
+    "Arguments in square brackets are optional.  A vertical bar '|'",
+    'indicates valid alternatives for invoking the option.  Prefix',
+    'the command with "' + 'ruby'.yellow +
+    '" if it is not on your PATH.', '',
+  '  --help | -h | -? | ?'.green,
+  "\tProduce this help message.",
+  '  --print | -p'.green,
+  "\tPrint generated commands rather than executing them,",
+  "\tuseful for debugging.",
+  '  --destructive | -d'.green,
+  "\tOverwrite any prior contents in the output file.  Default",
+  "\tbehavior is to append new results to an existing output file.", '',
+  'Required arguments are:', '',
+  "  'CMD'".green,
+  "\tThe command to run the model.  " +
+    'MUST be placed in single quotes'.red,
+  "\tif the command contains any white space or special characters.",
+  "\tExample: " + "'java MyModel.jar'".blue,
+  '  DOE_FILE'.green,
+  "\tThe name of a text file containing the experimental design",
+  "\tto be used.  The design file should have one line per design",
+  "\tpoint with factor settings separated by white space.  Factor",
+  "\tsettings must be provided in the order expected by the model.",
+  '  #REPS'.green,
+  "\tAn integer specifying the number of times each design point",
+  "\tshould be replicated.  All design points are completed before",
+  "\tmoving to the next replication to minimize the risk of missing",
+  "\tdesign points if the run gets interrupted for any reason.",
+  '  OUTPUT_FILE'.green,
+  "\tThe name of a text file to which all output will be written."
+]
+print_cmds = false # default is to run rather than print
+destructive = false # default is non-destructive for output file
+OptionParser.new do |opts|
+  opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...[]"
+  opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
+  opts.on('-p', '--print') { print_cmds = true }
+  opts.on('-d', '--destructive') { destructive = true }
+end.parse!
+ErrorHandling.clean_abort help_msg if ARGV[0] == '?' || ARGV.length != 4
+begin
+  # What shall we run today?
+  cmd = ARGV.shift
+  # Suck in all the design points from the file specified as the next
+  # argument, strip the whitespace, and put the results in an array
+  design_pts = File.readlines(ARGV.shift).map(&:strip!)
+  # How many times do we want to do this?
+  reps = ARGV.shift.to_i
+  # Where do the results go?
+  output_file_name = ARGV.shift
+  File.delete(output_file_name) if destructive && File.exist?(output_file_name)
+  reps.times do
+    design_pts.each do |design_pt|
+      exe_line = "#{cmd} #{design_pt} >> #{output_file_name}"
+      if print_cmds
+        puts exe_line
+      else
+        result = `#{exe_line}`
+        STDERR.puts result if result =~ /\S/
+      end
+    end
+  end
+rescue StandardError => e
+  ErrorHandling.message [e.message.red]
+  ErrorHandling.clean_abort help_msg
+end

data/exe/stack_nolhs.rb ADDED Viewed

@@ -0,0 +1,172 @@
+#!/usr/bin/env ruby -w
+require 'colorize'
+String.disable_colorization false
+require 'datafarming/error_handling'
+require 'datafarming/nolh_designs'
+help_msg = [
+  'Generate scaled Latin hypercube designs with shifting and stacking. ',
+  'Results are a white-space delimited NOLH design written to ' +
+    'stdout'.light_blue + '.', '',
+  'Syntax:',
+  "\n\t#{$PROGRAM_NAME.split(%r{/|\\})[-1]} [--help]".yellow +
+    " [--stack #] [--levels #] [-e] [file_name]\n".yellow,
+  "Arguments in square brackets are optional.  A vertical bar '|'",
+  'indicates valid alternatives for invoking the option.  Prefix',
+  'the command with "' + 'ruby'.yellow +
+  '" if it is not on your PATH.', '',
+  '  --help | -h | -? | ?'.green,
+  "\tProduce this help message.  Supersedes any other choices.",
+  '  --stack # | -s #'.green,
+  "\t# specifies the number of stackings. A value of 1 means print the",
+  "\tbase design.  If this option is not specified the number of stackings",
+  "\tdefaults to the number of columns in the design.  The specified value",
+  "\tcannot exceed the number of columns in the design being used.",
+  '  --levels # | -l #'.green,
+  "\t# specifies the desired number of levels in the NOLH (17, 33, 65, 129,",
+  "\tor 257).  Defaults to the smallest design which can accommodate the",
+  "\tnumber of factors if this option is not specified.",
+  '  --excel-style-input | -e'.green,
+  "\tSpecify factor ranges and decimals as in the NOLH spreadsheet, i.e.,",
+  "\tthe first line is the set of minimum range values for each factor;",
+  "\tthe second line is maximum range values; and the third is the number",
+  "\tof decimal places to use for the range scaling.  Without this option,",
+  "\tthe default input format is one line per factor, comprised of the min,",
+  "\tmax, and number of decimal places separated by commas or whitespace.",
+  '  file_name'.green,
+  "\tThe name of a file containing the factor specifications.  If no",
+  "\tfilename is given, the user can enter the values interactively in",
+  "\tthe desired form or use file redirection with '<'.", '',
+  'Options may be given in any order, but must come before the file name',
+  'if one is provided.'
+]
+# Scaler objects will rescale a Latin Hypercube design from standard units
+# to a range as specified by min, max, and num_decimals
+class Scaler
+  def initialize(min, max, num_decimals, lh_max = 17)
+    @min = min
+    @range = (max - min) / (lh_max - 1).to_r
+    @scale_factor = 10.to_r**num_decimals
+  end
+  def scale(value)
+    new_value = @min + @range * (value.to_r - 1.to_r)
+    if @scale_factor == 1
+      new_value.round
+    else
+      ((@scale_factor * new_value).round / @scale_factor).to_f
+    end
+  end
+end
+excel_style_inputs = false
+while ARGV[0] && (ARGV[0][0] == '-' || ARGV[0][0] == 45 || ARGV[0][0] == '?')
+  current_value = ARGV.shift
+  case current_value
+  when '--stack', '-s'
+    num_stackings = ARGV.shift.to_i
+  when '--levels', '-l'
+    lh_levels = ARGV.shift.to_i
+    unless NOLH::DESIGN_TABLE.keys.include?(lh_levels)
+      ErrorHandling.clean_abort [
+        "Invalid number of levels for Latin hypercube: #{lh_levels}".red,
+        'Use 17, 33, 65, 129, or 257.'.yellow
+      ]
+    end
+  when '--excel-style-input', '-e'
+    excel_style_inputs = true
+  when '--help', '-h', '-help', '-?', '?'
+    ErrorHandling.clean_abort help_msg
+  else
+    ErrorHandling.message ['Unknown argument: '.red + current_value.yellow]
+    ErrorHandling.clean_abort help_msg
+  end
+end
+begin
+  if excel_style_inputs
+    if ARGV.empty?
+      STDERR.puts  'Enter one line of min values, one of max values,'.green +
+      ' and one of #decimals.'.green
+    end
+    min_values = ARGF.gets.strip.split(/\s*[,;:]\s*|\s+/).map(&:to_f)
+    max_values = ARGF.gets.strip.split(/\s*[,;:]\s*|\s+/).map(&:to_f)
+    decimals = ARGF.gets.strip.split(/\s*[,;:]\s*|\s+/).map(&:to_i)
+  else
+    if ARGV.empty?
+      STDERR.puts  'To terminate input enter '.green + 'ctrl-d'.cyan +
+        ' (Mac/Unix/Linux)'.green + ' or '.green + 'ctrl-z'.cyan +
+        ' (Windows).'.green
+      STDERR.puts  'Enter ranges for each factor on a separate line.'.green
+      STDERR.puts  "\nMIN\tMAX\t#DIGITS".cyan
+    end
+    min_values = []
+    max_values = []
+    decimals = []
+    while line = ARGF.gets
+      values = line.strip.split(/\s*[,;:]\s*|\s+/)
+      min_values << values.shift.to_f
+      max_values << values.shift.to_f
+      decimals << values.shift.to_i
+    end
+  end
+rescue StandardError => e
+  ErrorHandling.message [e.message.red]
+  ErrorHandling.clean_abort help_msg
+end
+n = min_values.size
+if max_values.size != n || decimals.size != n
+  ErrorHandling.message ['Unequal counts for min, max, and decimals'.red]
+  ErrorHandling.clean_abort help_msg
+end
+minimal_size = case min_values.size
+               when 1..7
+                 17
+               when 8..11
+                 33
+               when 12..16
+                 65
+               when 17..22
+                 129
+               when 23..29
+                 257
+               else
+                 ErrorHandling.message ['invalid number of factors'.red]
+                 ErrorHandling.clean_abort help_msg
+               end
+lh_levels ||= minimal_size
+if lh_levels < minimal_size
+  ErrorHandling.clean_abort [
+    "Latin hypercube with #{lh_levels} levels is too small for #{n} factors.".red
+  ]
+end
+factor = Array.new(n) do |i|
+  Scaler.new(min_values[i], max_values[i], decimals[i], lh_levels)
+end
+design = NOLH::DESIGN_TABLE[lh_levels]
+num_columns = design[0].length
+num_stackings ||= num_columns
+if num_stackings > num_columns
+  ErrorHandling.clean_abort [
+    'Requested stacking exceeds number of columns in latin hypercube '.red +
+    "(#{num_columns})".red
+  ]
+end
+mid_range = lh_levels / 2
+num_stackings.times do |stack_num|
+  design.each_with_index do |dp, i|
+    scaled_dp = dp.slice(0, n).map.with_index { |x, k| factor[k].scale(x) }
+    puts scaled_dp.join "\t" unless stack_num > 0 && i == mid_range
+    design[i] = dp.rotate
+  end
+end

data/exe/stripheaderdups.rb ADDED Viewed

@@ -0,0 +1,53 @@
+#! /usr/bin/env ruby -w
+# Strip duplicate headers out of file(s)
+require 'colorize'
+String.disable_colorization false
+require 'optparse'
+require 'datafarming/error_handling'
+help_msg = [
+  'Strip duplicate headers out of one or more files.', '',
+  'If filenames are specified, a backup is made for each file with',
+  "suffix '.orig' appended to the original filename and changes will",
+  'be made in-place in the original file.  If no filenames are given,',
+  'the script reads from ' + 'stdin'.blue + ' and writes to ' +
+    'stdout'.blue + '.  In either case,',
+  'all occurrences of lines which duplicate the first line in each',
+  'file are removed.', '',
+  'Syntax:',
+  "\n\t#{ErrorHandling.prog_name} [--help] [filenames...]".yellow, '',
+  "Arguments in square brackets are optional.  A vertical bar '|'",
+  'indicates valid alternatives for invoking the option.  Prefix',
+  'the command with "' + 'ruby'.yellow +
+  '" if it is not on your PATH.', '',
+  '  --help | -h | -? | ?'.green,
+  "\tProduce this help message.",
+  '  filenames...'.green,
+  "\tThe name[s] of the file[s] to be converted."
+]
+OptionParser.new do |opts|
+  opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...[]"
+  opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
+end.parse!
+ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
+$-i = '.orig' # specify backup suffix
+oldfilename = ''
+header = ''
+ARGF.each do |line|
+  if ARGF.filename == oldfilename   # if it's an old file
+    puts line unless line == header # copy non-header lines
+  else                              # if it's a different file
+    oldfilename = ARGF.filename     # make it the old file
+    header = line                   # remember its header
+    puts line                       # and copy it just this once
+  end
+end

data/exe/stripheaders.rb ADDED Viewed

@@ -0,0 +1,49 @@
+#! /usr/bin/env ruby -w
+# Strip header line out of file(s)
+require 'colorize'
+String.disable_colorization false
+require 'optparse'
+require 'datafarming/error_handling'
+help_msg = [
+  'Strip headers out of one or more file(s) to convert them to data-only.', '',
+  'If filenames are specified, a backup is made for each file with',
+  "suffix '.orig' appended to the original filename and changes will",
+  'be made in-place in the original file.  If no filenames are given,',
+  'the script reads from ' + 'stdin'.blue + ' and writes to ' +
+    'stdout'.blue + '.  In either case,',
+  'the first line of each input file is removed.', '',
+  'Syntax:',
+  "\n\t#{ErrorHandling.prog_name} [--help] [filenames...]".yellow, '',
+  "Arguments in square brackets are optional.  A vertical bar '|'",
+  'indicates valid alternatives for invoking the option.  Prefix',
+  'the command with "' + 'ruby'.yellow +
+  '" if it is not on your PATH.', '',
+  '  --help | -h | -? | ?'.green,
+  "\tProduce this help message.",
+  '  filenames...'.green,
+  "\tThe name[s] of the file[s] to be converted."
+]
+OptionParser.new do |opts|
+  opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...[]"
+  opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
+end.parse!
+ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
+$-i = '.orig' # specify backup suffix
+oldfilename = ''
+ARGF.each do |line|
+  if ARGF.filename == oldfilename   # If it's an old file
+    puts line                       # copy lines through.
+  else                              # If it's a new file remember it
+    oldfilename = ARGF.filename     # but don't copy the first line.
+  end
+end

data/lib/datafarming/cross.rb ADDED Viewed

@@ -0,0 +1,18 @@
+#!/usr/bin/env ruby -w
+module CrossedDesigns
+  # The "cross" method creates a large combinatorial design by crossing all
+  # combinations of individual smaller designs.  It uses recursion to do so
+  # because we don't know how many designs there may be in the input set.
+  #
+  # The method takes an array of arrays, where each sub-array contains a
+  # single component design, and kicks off the recursive build process.
+  def self.cross(inputs, idx = 0, tmp = [], solution = [])
+    if idx >= inputs.size
+      solution << tmp
+    else
+      inputs[idx].each { |dp| cross(inputs, idx + 1, tmp + dp, solution) }
+    end
+    solution
+  end
+end