datafarming 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/exe/pool_files.rb ADDED
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ # Ruby script to pool the columns of multiple csv files
4
+
5
+ require 'colorize'
6
+
7
+ String.disable_colorization false
8
+
9
+ require 'optparse'
10
+ require 'datafarming/error_handling'
11
+
12
+ help_msg = [
13
+ 'Pool the output from two or more CSV files to a single output file.', '',
14
+ 'The first line of output is the list of filenames that were the',
15
+ 'source files of the data to be merged. Subsequent lines are the',
16
+ 'contents of those files, and are assumed to be in CSV format.',
17
+ 'Output is written to ' + 'stdout'.blue + ' in CSV format.', '',
18
+ 'Syntax:',
19
+ "\n\t#{ErrorHandling.prog_name} [--help] ".yellow +
20
+ '[--no-labels] filenames...'.yellow, '',
21
+ "Arguments in square brackets are optional. A vertical bar '|'",
22
+ 'indicates valid alternatives for invoking the option. Prefix',
23
+ 'the command with "' + 'ruby'.yellow +
24
+ '" if it is not on your PATH.', '',
25
+ ' --help | -h | -? | ?'.green,
26
+ "\tProduce this help message.",
27
+ ' --no-labels | -n'.green,
28
+ "\tSpecify that individual files do not have labels.",
29
+ ' filenames...'.green,
30
+ "\tThe names of two or more files containing data to be pooled.",
31
+ "\tInput file data can be delimited by commas, semicolons,",
32
+ "\tcolons, or whitespace."
33
+ ]
34
+
35
+ no_labels = false
36
+ OptionParser.new do |opts|
37
+ opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...[]"
38
+ opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
39
+ opts.on('-n', '--no-labels') { no_labels = true }
40
+ end.parse!
41
+
42
+ ErrorHandling.clean_abort help_msg if ARGV[0] == '?' || ARGV.length < 2
43
+
44
+ old_filename = nil
45
+ line_set = nil
46
+ allfiles = []
47
+
48
+ # Read in all data from all files, resetting the line_set
49
+ # for each new file
50
+ ARGF.each do |line|
51
+ if ARGF.filename == old_filename
52
+ line_set << line.strip
53
+ else
54
+ old_filename = ARGF.filename
55
+ line_set && allfiles << line_set
56
+ line_set = []
57
+ line_set << if no_labels
58
+ line.strip
59
+ else
60
+ line.strip.split(',').map{ |elt| old_filename + '::' + elt }.join(',')
61
+ end
62
+ end
63
+ end
64
+ allfiles << line_set
65
+
66
+ # Equalize all vectors to same length by padding with nils if needed...
67
+ max_length = allfiles.map(&:length).max
68
+ allfiles.each { |v| v[max_length - 1] = nil unless v.length == max_length }
69
+ # ...and output all the data
70
+ allfiles.transpose.each { |row| puts row.join(',') }
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ require 'colorize'
4
+
5
+ String.disable_colorization false
6
+
7
+ require 'optparse'
8
+ require 'datafarming/error_handling'
9
+
10
+ help_msg = [
11
+ 'Run control to apply a designed experiment to a model with replication.', '',
12
+ 'This script assumes that the model uses command-line arguments to',
13
+ 'set factor values at run-time.', '',
14
+ 'Syntax:',
15
+ "\n\t#{ErrorHandling.prog_name} [OPTIONS] ".yellow +
16
+ "'CMD' DOE_FILE #REPS OUTPUT_FILE".yellow, '',
17
+ "Arguments in square brackets are optional. A vertical bar '|'",
18
+ 'indicates valid alternatives for invoking the option. Prefix',
19
+ 'the command with "' + 'ruby'.yellow +
20
+ '" if it is not on your PATH.', '',
21
+ ' --help | -h | -? | ?'.green,
22
+ "\tProduce this help message.",
23
+ ' --print | -p'.green,
24
+ "\tPrint generated commands rather than executing them,",
25
+ "\tuseful for debugging.",
26
+ ' --destructive | -d'.green,
27
+ "\tOverwrite any prior contents in the output file. Default",
28
+ "\tbehavior is to append new results to an existing output file.", '',
29
+ 'Required arguments are:', '',
30
+ " 'CMD'".green,
31
+ "\tThe command to run the model. " +
32
+ 'MUST be placed in single quotes'.red,
33
+ "\tif the command contains any white space or special characters.",
34
+ "\tExample: " + "'java MyModel.jar'".blue,
35
+ ' DOE_FILE'.green,
36
+ "\tThe name of a text file containing the experimental design",
37
+ "\tto be used. The design file should have one line per design",
38
+ "\tpoint with factor settings separated by white space. Factor",
39
+ "\tsettings must be provided in the order expected by the model.",
40
+ ' #REPS'.green,
41
+ "\tAn integer specifying the number of times each design point",
42
+ "\tshould be replicated. All design points are completed before",
43
+ "\tmoving to the next replication to minimize the risk of missing",
44
+ "\tdesign points if the run gets interrupted for any reason.",
45
+ ' OUTPUT_FILE'.green,
46
+ "\tThe name of a text file to which all output will be written."
47
+ ]
48
+
49
+ print_cmds = false # default is to run rather than print
50
+ destructive = false # default is non-destructive for output file
51
+ OptionParser.new do |opts|
52
+ opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...[]"
53
+ opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
54
+ opts.on('-p', '--print') { print_cmds = true }
55
+ opts.on('-d', '--destructive') { destructive = true }
56
+ end.parse!
57
+
58
+ ErrorHandling.clean_abort help_msg if ARGV[0] == '?' || ARGV.length != 4
59
+
60
+ begin
61
+ # What shall we run today?
62
+ cmd = ARGV.shift
63
+ # Suck in all the design points from the file specified as the next
64
+ # argument, strip the whitespace, and put the results in an array
65
+ design_pts = File.readlines(ARGV.shift).map(&:strip!)
66
+ # How many times do we want to do this?
67
+ reps = ARGV.shift.to_i
68
+ # Where do the results go?
69
+ output_file_name = ARGV.shift
70
+ File.delete(output_file_name) if destructive && File.exist?(output_file_name)
71
+ reps.times do
72
+ design_pts.each do |design_pt|
73
+ exe_line = "#{cmd} #{design_pt} >> #{output_file_name}"
74
+ if print_cmds
75
+ puts exe_line
76
+ else
77
+ result = `#{exe_line}`
78
+ STDERR.puts result if result =~ /\S/
79
+ end
80
+ end
81
+ end
82
+ rescue StandardError => e
83
+ ErrorHandling.message [e.message.red]
84
+ ErrorHandling.clean_abort help_msg
85
+ end
@@ -0,0 +1,172 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ require 'colorize'
4
+ String.disable_colorization false
5
+
6
+ require 'datafarming/error_handling'
7
+ require 'datafarming/nolh_designs'
8
+
9
+ help_msg = [
10
+ 'Generate scaled Latin hypercube designs with shifting and stacking. ',
11
+ 'Results are a white-space delimited NOLH design written to ' +
12
+ 'stdout'.light_blue + '.', '',
13
+ 'Syntax:',
14
+ "\n\t#{$PROGRAM_NAME.split(%r{/|\\})[-1]} [--help]".yellow +
15
+ " [--stack #] [--levels #] [-e] [file_name]\n".yellow,
16
+ "Arguments in square brackets are optional. A vertical bar '|'",
17
+ 'indicates valid alternatives for invoking the option. Prefix',
18
+ 'the command with "' + 'ruby'.yellow +
19
+ '" if it is not on your PATH.', '',
20
+ ' --help | -h | -? | ?'.green,
21
+ "\tProduce this help message. Supersedes any other choices.",
22
+ ' --stack # | -s #'.green,
23
+ "\t# specifies the number of stackings. A value of 1 means print the",
24
+ "\tbase design. If this option is not specified the number of stackings",
25
+ "\tdefaults to the number of columns in the design. The specified value",
26
+ "\tcannot exceed the number of columns in the design being used.",
27
+ ' --levels # | -l #'.green,
28
+ "\t# specifies the desired number of levels in the NOLH (17, 33, 65, 129,",
29
+ "\tor 257). Defaults to the smallest design which can accommodate the",
30
+ "\tnumber of factors if this option is not specified.",
31
+ ' --excel-style-input | -e'.green,
32
+ "\tSpecify factor ranges and decimals as in the NOLH spreadsheet, i.e.,",
33
+ "\tthe first line is the set of minimum range values for each factor;",
34
+ "\tthe second line is maximum range values; and the third is the number",
35
+ "\tof decimal places to use for the range scaling. Without this option,",
36
+ "\tthe default input format is one line per factor, comprised of the min,",
37
+ "\tmax, and number of decimal places separated by commas or whitespace.",
38
+ ' file_name'.green,
39
+ "\tThe name of a file containing the factor specifications. If no",
40
+ "\tfilename is given, the user can enter the values interactively in",
41
+ "\tthe desired form or use file redirection with '<'.", '',
42
+ 'Options may be given in any order, but must come before the file name',
43
+ 'if one is provided.'
44
+ ]
45
+
46
+ # Scaler objects will rescale a Latin Hypercube design from standard units
47
+ # to a range as specified by min, max, and num_decimals
48
+ class Scaler
49
+ def initialize(min, max, num_decimals, lh_max = 17)
50
+ @min = min
51
+ @range = (max - min) / (lh_max - 1).to_r
52
+ @scale_factor = 10.to_r**num_decimals
53
+ end
54
+
55
+ def scale(value)
56
+ new_value = @min + @range * (value.to_r - 1.to_r)
57
+ if @scale_factor == 1
58
+ new_value.round
59
+ else
60
+ ((@scale_factor * new_value).round / @scale_factor).to_f
61
+ end
62
+ end
63
+ end
64
+
65
+ excel_style_inputs = false
66
+ while ARGV[0] && (ARGV[0][0] == '-' || ARGV[0][0] == 45 || ARGV[0][0] == '?')
67
+ current_value = ARGV.shift
68
+ case current_value
69
+ when '--stack', '-s'
70
+ num_stackings = ARGV.shift.to_i
71
+ when '--levels', '-l'
72
+ lh_levels = ARGV.shift.to_i
73
+ unless NOLH::DESIGN_TABLE.keys.include?(lh_levels)
74
+ ErrorHandling.clean_abort [
75
+ "Invalid number of levels for Latin hypercube: #{lh_levels}".red,
76
+ 'Use 17, 33, 65, 129, or 257.'.yellow
77
+ ]
78
+ end
79
+ when '--excel-style-input', '-e'
80
+ excel_style_inputs = true
81
+ when '--help', '-h', '-help', '-?', '?'
82
+ ErrorHandling.clean_abort help_msg
83
+ else
84
+ ErrorHandling.message ['Unknown argument: '.red + current_value.yellow]
85
+ ErrorHandling.clean_abort help_msg
86
+ end
87
+ end
88
+
89
+ begin
90
+ if excel_style_inputs
91
+ if ARGV.empty?
92
+ STDERR.puts 'Enter one line of min values, one of max values,'.green +
93
+ ' and one of #decimals.'.green
94
+ end
95
+ min_values = ARGF.gets.strip.split(/\s*[,;:]\s*|\s+/).map(&:to_f)
96
+ max_values = ARGF.gets.strip.split(/\s*[,;:]\s*|\s+/).map(&:to_f)
97
+ decimals = ARGF.gets.strip.split(/\s*[,;:]\s*|\s+/).map(&:to_i)
98
+ else
99
+ if ARGV.empty?
100
+ STDERR.puts 'To terminate input enter '.green + 'ctrl-d'.cyan +
101
+ ' (Mac/Unix/Linux)'.green + ' or '.green + 'ctrl-z'.cyan +
102
+ ' (Windows).'.green
103
+ STDERR.puts 'Enter ranges for each factor on a separate line.'.green
104
+ STDERR.puts "\nMIN\tMAX\t#DIGITS".cyan
105
+ end
106
+ min_values = []
107
+ max_values = []
108
+ decimals = []
109
+ while line = ARGF.gets
110
+ values = line.strip.split(/\s*[,;:]\s*|\s+/)
111
+ min_values << values.shift.to_f
112
+ max_values << values.shift.to_f
113
+ decimals << values.shift.to_i
114
+ end
115
+ end
116
+ rescue StandardError => e
117
+ ErrorHandling.message [e.message.red]
118
+ ErrorHandling.clean_abort help_msg
119
+ end
120
+
121
+ n = min_values.size
122
+ if max_values.size != n || decimals.size != n
123
+ ErrorHandling.message ['Unequal counts for min, max, and decimals'.red]
124
+ ErrorHandling.clean_abort help_msg
125
+ end
126
+ minimal_size = case min_values.size
127
+ when 1..7
128
+ 17
129
+ when 8..11
130
+ 33
131
+ when 12..16
132
+ 65
133
+ when 17..22
134
+ 129
135
+ when 23..29
136
+ 257
137
+ else
138
+ ErrorHandling.message ['invalid number of factors'.red]
139
+ ErrorHandling.clean_abort help_msg
140
+ end
141
+
142
+ lh_levels ||= minimal_size
143
+
144
+ if lh_levels < minimal_size
145
+ ErrorHandling.clean_abort [
146
+ "Latin hypercube with #{lh_levels} levels is too small for #{n} factors.".red
147
+ ]
148
+ end
149
+
150
+ factor = Array.new(n) do |i|
151
+ Scaler.new(min_values[i], max_values[i], decimals[i], lh_levels)
152
+ end
153
+
154
+ design = NOLH::DESIGN_TABLE[lh_levels]
155
+
156
+ num_columns = design[0].length
157
+ num_stackings ||= num_columns
158
+ if num_stackings > num_columns
159
+ ErrorHandling.clean_abort [
160
+ 'Requested stacking exceeds number of columns in latin hypercube '.red +
161
+ "(#{num_columns})".red
162
+ ]
163
+ end
164
+
165
+ mid_range = lh_levels / 2
166
+ num_stackings.times do |stack_num|
167
+ design.each_with_index do |dp, i|
168
+ scaled_dp = dp.slice(0, n).map.with_index { |x, k| factor[k].scale(x) }
169
+ puts scaled_dp.join "\t" unless stack_num > 0 && i == mid_range
170
+ design[i] = dp.rotate
171
+ end
172
+ end
@@ -0,0 +1,53 @@
1
+ #! /usr/bin/env ruby -w
2
+
3
+ # Strip duplicate headers out of file(s)
4
+
5
+ require 'colorize'
6
+
7
+ String.disable_colorization false
8
+
9
+ require 'optparse'
10
+ require 'datafarming/error_handling'
11
+
12
+ help_msg = [
13
+ 'Strip duplicate headers out of one or more files.', '',
14
+ 'If filenames are specified, a backup is made for each file with',
15
+ "suffix '.orig' appended to the original filename and changes will",
16
+ 'be made in-place in the original file. If no filenames are given,',
17
+ 'the script reads from ' + 'stdin'.blue + ' and writes to ' +
18
+ 'stdout'.blue + '. In either case,',
19
+ 'all occurrences of lines which duplicate the first line in each',
20
+ 'file are removed.', '',
21
+ 'Syntax:',
22
+ "\n\t#{ErrorHandling.prog_name} [--help] [filenames...]".yellow, '',
23
+ "Arguments in square brackets are optional. A vertical bar '|'",
24
+ 'indicates valid alternatives for invoking the option. Prefix',
25
+ 'the command with "' + 'ruby'.yellow +
26
+ '" if it is not on your PATH.', '',
27
+ ' --help | -h | -? | ?'.green,
28
+ "\tProduce this help message.",
29
+ ' filenames...'.green,
30
+ "\tThe name[s] of the file[s] to be converted."
31
+ ]
32
+
33
+ OptionParser.new do |opts|
34
+ opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...[]"
35
+ opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
36
+ end.parse!
37
+
38
+ ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
39
+
40
+ $-i = '.orig' # specify backup suffix
41
+
42
+ oldfilename = ''
43
+ header = ''
44
+
45
+ ARGF.each do |line|
46
+ if ARGF.filename == oldfilename # if it's an old file
47
+ puts line unless line == header # copy non-header lines
48
+ else # if it's a different file
49
+ oldfilename = ARGF.filename # make it the old file
50
+ header = line # remember its header
51
+ puts line # and copy it just this once
52
+ end
53
+ end
@@ -0,0 +1,49 @@
1
+ #! /usr/bin/env ruby -w
2
+
3
+ # Strip header line out of file(s)
4
+
5
+ require 'colorize'
6
+
7
+ String.disable_colorization false
8
+
9
+ require 'optparse'
10
+ require 'datafarming/error_handling'
11
+
12
+ help_msg = [
13
+ 'Strip headers out of one or more file(s) to convert them to data-only.', '',
14
+ 'If filenames are specified, a backup is made for each file with',
15
+ "suffix '.orig' appended to the original filename and changes will",
16
+ 'be made in-place in the original file. If no filenames are given,',
17
+ 'the script reads from ' + 'stdin'.blue + ' and writes to ' +
18
+ 'stdout'.blue + '. In either case,',
19
+ 'the first line of each input file is removed.', '',
20
+ 'Syntax:',
21
+ "\n\t#{ErrorHandling.prog_name} [--help] [filenames...]".yellow, '',
22
+ "Arguments in square brackets are optional. A vertical bar '|'",
23
+ 'indicates valid alternatives for invoking the option. Prefix',
24
+ 'the command with "' + 'ruby'.yellow +
25
+ '" if it is not on your PATH.', '',
26
+ ' --help | -h | -? | ?'.green,
27
+ "\tProduce this help message.",
28
+ ' filenames...'.green,
29
+ "\tThe name[s] of the file[s] to be converted."
30
+ ]
31
+
32
+ OptionParser.new do |opts|
33
+ opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...[]"
34
+ opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
35
+ end.parse!
36
+
37
+ ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
38
+
39
+ $-i = '.orig' # specify backup suffix
40
+
41
+ oldfilename = ''
42
+
43
+ ARGF.each do |line|
44
+ if ARGF.filename == oldfilename # If it's an old file
45
+ puts line # copy lines through.
46
+ else # If it's a new file remember it
47
+ oldfilename = ARGF.filename # but don't copy the first line.
48
+ end
49
+ end
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ module CrossedDesigns
4
+ # The "cross" method creates a large combinatorial design by crossing all
5
+ # combinations of individual smaller designs. It uses recursion to do so
6
+ # because we don't know how many designs there may be in the input set.
7
+ #
8
+ # The method takes an array of arrays, where each sub-array contains a
9
+ # single component design, and kicks off the recursive build process.
10
+ def self.cross(inputs, idx = 0, tmp = [], solution = [])
11
+ if idx >= inputs.size
12
+ solution << tmp
13
+ else
14
+ inputs[idx].each { |dp| cross(inputs, idx + 1, tmp + dp, solution) }
15
+ end
16
+ solution
17
+ end
18
+ end