datafarming 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/exe/pool_files.rb ADDED
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ # Ruby script to pool the columns of multiple csv files
4
+
5
+ require 'colorize'
6
+
7
+ String.disable_colorization false
8
+
9
+ require 'optparse'
10
+ require 'datafarming/error_handling'
11
+
12
+ help_msg = [
13
+ 'Pool the output from two or more CSV files to a single output file.', '',
14
+ 'The first line of output is the list of filenames that were the',
15
+ 'source files of the data to be merged. Subsequent lines are the',
16
+ 'contents of those files, and are assumed to be in CSV format.',
17
+ 'Output is written to ' + 'stdout'.blue + ' in CSV format.', '',
18
+ 'Syntax:',
19
+ "\n\t#{ErrorHandling.prog_name} [--help] ".yellow +
20
+ '[--no-labels] filenames...'.yellow, '',
21
+ "Arguments in square brackets are optional. A vertical bar '|'",
22
+ 'indicates valid alternatives for invoking the option. Prefix',
23
+ 'the command with "' + 'ruby'.yellow +
24
+ '" if it is not on your PATH.', '',
25
+ ' --help | -h | -? | ?'.green,
26
+ "\tProduce this help message.",
27
+ ' --no-labels | -n'.green,
28
+ "\tSpecify that individual files do not have labels.",
29
+ ' filenames...'.green,
30
+ "\tThe names of two or more files containing data to be pooled.",
31
+ "\tInput file data can be delimited by commas, semicolons,",
32
+ "\tcolons, or whitespace."
33
+ ]
34
+
35
+ no_labels = false
36
+ OptionParser.new do |opts|
37
+ opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...[]"
38
+ opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
39
+ opts.on('-n', '--no-labels') { no_labels = true }
40
+ end.parse!
41
+
42
+ ErrorHandling.clean_abort help_msg if ARGV[0] == '?' || ARGV.length < 2
43
+
44
+ old_filename = nil
45
+ line_set = nil
46
+ allfiles = []
47
+
48
+ # Read in all data from all files, resetting the line_set
49
+ # for each new file
50
+ ARGF.each do |line|
51
+ if ARGF.filename == old_filename
52
+ line_set << line.strip
53
+ else
54
+ old_filename = ARGF.filename
55
+ line_set && allfiles << line_set
56
+ line_set = []
57
+ line_set << if no_labels
58
+ line.strip
59
+ else
60
+ line.strip.split(',').map{ |elt| old_filename + '::' + elt }.join(',')
61
+ end
62
+ end
63
+ end
64
+ allfiles << line_set
65
+
66
+ # Equalize all vectors to same length by padding with nils if needed...
67
+ max_length = allfiles.map(&:length).max
68
+ allfiles.each { |v| v[max_length - 1] = nil unless v.length == max_length }
69
+ # ...and output all the data
70
+ allfiles.transpose.each { |row| puts row.join(',') }
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ require 'colorize'
4
+
5
+ String.disable_colorization false
6
+
7
+ require 'optparse'
8
+ require 'datafarming/error_handling'
9
+
10
+ help_msg = [
11
+ 'Run control to apply a designed experiment to a model with replication.', '',
12
+ 'This script assumes that the model uses command-line arguments to',
13
+ 'set factor values at run-time.', '',
14
+ 'Syntax:',
15
+ "\n\t#{ErrorHandling.prog_name} [OPTIONS] ".yellow +
16
+ "'CMD' DOE_FILE #REPS OUTPUT_FILE".yellow, '',
17
+ "Arguments in square brackets are optional. A vertical bar '|'",
18
+ 'indicates valid alternatives for invoking the option. Prefix',
19
+ 'the command with "' + 'ruby'.yellow +
20
+ '" if it is not on your PATH.', '',
21
+ ' --help | -h | -? | ?'.green,
22
+ "\tProduce this help message.",
23
+ ' --print | -p'.green,
24
+ "\tPrint generated commands rather than executing them,",
25
+ "\tuseful for debugging.",
26
+ ' --destructive | -d'.green,
27
+ "\tOverwrite any prior contents in the output file. Default",
28
+ "\tbehavior is to append new results to an existing output file.", '',
29
+ 'Required arguments are:', '',
30
+ " 'CMD'".green,
31
+ "\tThe command to run the model. " +
32
+ 'MUST be placed in single quotes'.red,
33
+ "\tif the command contains any white space or special characters.",
34
+ "\tExample: " + "'java MyModel.jar'".blue,
35
+ ' DOE_FILE'.green,
36
+ "\tThe name of a text file containing the experimental design",
37
+ "\tto be used. The design file should have one line per design",
38
+ "\tpoint with factor settings separated by white space. Factor",
39
+ "\tsettings must be provided in the order expected by the model.",
40
+ ' #REPS'.green,
41
+ "\tAn integer specifying the number of times each design point",
42
+ "\tshould be replicated. All design points are completed before",
43
+ "\tmoving to the next replication to minimize the risk of missing",
44
+ "\tdesign points if the run gets interrupted for any reason.",
45
+ ' OUTPUT_FILE'.green,
46
+ "\tThe name of a text file to which all output will be written."
47
+ ]
48
+
49
+ print_cmds = false # default is to run rather than print
50
+ destructive = false # default is non-destructive for output file
51
+ OptionParser.new do |opts|
52
+ opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...[]"
53
+ opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
54
+ opts.on('-p', '--print') { print_cmds = true }
55
+ opts.on('-d', '--destructive') { destructive = true }
56
+ end.parse!
57
+
58
+ ErrorHandling.clean_abort help_msg if ARGV[0] == '?' || ARGV.length != 4
59
+
60
+ begin
61
+ # What shall we run today?
62
+ cmd = ARGV.shift
63
+ # Suck in all the design points from the file specified as the next
64
+ # argument, strip the whitespace, and put the results in an array
65
+ design_pts = File.readlines(ARGV.shift).map(&:strip!)
66
+ # How many times do we want to do this?
67
+ reps = ARGV.shift.to_i
68
+ # Where do the results go?
69
+ output_file_name = ARGV.shift
70
+ File.delete(output_file_name) if destructive && File.exist?(output_file_name)
71
+ reps.times do
72
+ design_pts.each do |design_pt|
73
+ exe_line = "#{cmd} #{design_pt} >> #{output_file_name}"
74
+ if print_cmds
75
+ puts exe_line
76
+ else
77
+ result = `#{exe_line}`
78
+ STDERR.puts result if result =~ /\S/
79
+ end
80
+ end
81
+ end
82
+ rescue StandardError => e
83
+ ErrorHandling.message [e.message.red]
84
+ ErrorHandling.clean_abort help_msg
85
+ end
@@ -0,0 +1,172 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ require 'colorize'
4
+ String.disable_colorization false
5
+
6
+ require 'datafarming/error_handling'
7
+ require 'datafarming/nolh_designs'
8
+
9
+ help_msg = [
10
+ 'Generate scaled Latin hypercube designs with shifting and stacking. ',
11
+ 'Results are a white-space delimited NOLH design written to ' +
12
+ 'stdout'.light_blue + '.', '',
13
+ 'Syntax:',
14
+ "\n\t#{$PROGRAM_NAME.split(%r{/|\\})[-1]} [--help]".yellow +
15
+ " [--stack #] [--levels #] [-e] [file_name]\n".yellow,
16
+ "Arguments in square brackets are optional. A vertical bar '|'",
17
+ 'indicates valid alternatives for invoking the option. Prefix',
18
+ 'the command with "' + 'ruby'.yellow +
19
+ '" if it is not on your PATH.', '',
20
+ ' --help | -h | -? | ?'.green,
21
+ "\tProduce this help message. Supersedes any other choices.",
22
+ ' --stack # | -s #'.green,
23
+ "\t# specifies the number of stackings. A value of 1 means print the",
24
+ "\tbase design. If this option is not specified the number of stackings",
25
+ "\tdefaults to the number of columns in the design. The specified value",
26
+ "\tcannot exceed the number of columns in the design being used.",
27
+ ' --levels # | -l #'.green,
28
+ "\t# specifies the desired number of levels in the NOLH (17, 33, 65, 129,",
29
+ "\tor 257). Defaults to the smallest design which can accommodate the",
30
+ "\tnumber of factors if this option is not specified.",
31
+ ' --excel-style-input | -e'.green,
32
+ "\tSpecify factor ranges and decimals as in the NOLH spreadsheet, i.e.,",
33
+ "\tthe first line is the set of minimum range values for each factor;",
34
+ "\tthe second line is maximum range values; and the third is the number",
35
+ "\tof decimal places to use for the range scaling. Without this option,",
36
+ "\tthe default input format is one line per factor, comprised of the min,",
37
+ "\tmax, and number of decimal places separated by commas or whitespace.",
38
+ ' file_name'.green,
39
+ "\tThe name of a file containing the factor specifications. If no",
40
+ "\tfilename is given, the user can enter the values interactively in",
41
+ "\tthe desired form or use file redirection with '<'.", '',
42
+ 'Options may be given in any order, but must come before the file name',
43
+ 'if one is provided.'
44
+ ]
45
+
46
+ # Scaler objects will rescale a Latin Hypercube design from standard units
47
+ # to a range as specified by min, max, and num_decimals
48
+ class Scaler
49
+ def initialize(min, max, num_decimals, lh_max = 17)
50
+ @min = min
51
+ @range = (max - min) / (lh_max - 1).to_r
52
+ @scale_factor = 10.to_r**num_decimals
53
+ end
54
+
55
+ def scale(value)
56
+ new_value = @min + @range * (value.to_r - 1.to_r)
57
+ if @scale_factor == 1
58
+ new_value.round
59
+ else
60
+ ((@scale_factor * new_value).round / @scale_factor).to_f
61
+ end
62
+ end
63
+ end
64
+
65
+ excel_style_inputs = false
66
+ while ARGV[0] && (ARGV[0][0] == '-' || ARGV[0][0] == 45 || ARGV[0][0] == '?')
67
+ current_value = ARGV.shift
68
+ case current_value
69
+ when '--stack', '-s'
70
+ num_stackings = ARGV.shift.to_i
71
+ when '--levels', '-l'
72
+ lh_levels = ARGV.shift.to_i
73
+ unless NOLH::DESIGN_TABLE.keys.include?(lh_levels)
74
+ ErrorHandling.clean_abort [
75
+ "Invalid number of levels for Latin hypercube: #{lh_levels}".red,
76
+ 'Use 17, 33, 65, 129, or 257.'.yellow
77
+ ]
78
+ end
79
+ when '--excel-style-input', '-e'
80
+ excel_style_inputs = true
81
+ when '--help', '-h', '-help', '-?', '?'
82
+ ErrorHandling.clean_abort help_msg
83
+ else
84
+ ErrorHandling.message ['Unknown argument: '.red + current_value.yellow]
85
+ ErrorHandling.clean_abort help_msg
86
+ end
87
+ end
88
+
89
+ begin
90
+ if excel_style_inputs
91
+ if ARGV.empty?
92
+ STDERR.puts 'Enter one line of min values, one of max values,'.green +
93
+ ' and one of #decimals.'.green
94
+ end
95
+ min_values = ARGF.gets.strip.split(/\s*[,;:]\s*|\s+/).map(&:to_f)
96
+ max_values = ARGF.gets.strip.split(/\s*[,;:]\s*|\s+/).map(&:to_f)
97
+ decimals = ARGF.gets.strip.split(/\s*[,;:]\s*|\s+/).map(&:to_i)
98
+ else
99
+ if ARGV.empty?
100
+ STDERR.puts 'To terminate input enter '.green + 'ctrl-d'.cyan +
101
+ ' (Mac/Unix/Linux)'.green + ' or '.green + 'ctrl-z'.cyan +
102
+ ' (Windows).'.green
103
+ STDERR.puts 'Enter ranges for each factor on a separate line.'.green
104
+ STDERR.puts "\nMIN\tMAX\t#DIGITS".cyan
105
+ end
106
+ min_values = []
107
+ max_values = []
108
+ decimals = []
109
+ while line = ARGF.gets
110
+ values = line.strip.split(/\s*[,;:]\s*|\s+/)
111
+ min_values << values.shift.to_f
112
+ max_values << values.shift.to_f
113
+ decimals << values.shift.to_i
114
+ end
115
+ end
116
+ rescue StandardError => e
117
+ ErrorHandling.message [e.message.red]
118
+ ErrorHandling.clean_abort help_msg
119
+ end
120
+
121
+ n = min_values.size
122
+ if max_values.size != n || decimals.size != n
123
+ ErrorHandling.message ['Unequal counts for min, max, and decimals'.red]
124
+ ErrorHandling.clean_abort help_msg
125
+ end
126
+ minimal_size = case min_values.size
127
+ when 1..7
128
+ 17
129
+ when 8..11
130
+ 33
131
+ when 12..16
132
+ 65
133
+ when 17..22
134
+ 129
135
+ when 23..29
136
+ 257
137
+ else
138
+ ErrorHandling.message ['invalid number of factors'.red]
139
+ ErrorHandling.clean_abort help_msg
140
+ end
141
+
142
+ lh_levels ||= minimal_size
143
+
144
+ if lh_levels < minimal_size
145
+ ErrorHandling.clean_abort [
146
+ "Latin hypercube with #{lh_levels} levels is too small for #{n} factors.".red
147
+ ]
148
+ end
149
+
150
+ factor = Array.new(n) do |i|
151
+ Scaler.new(min_values[i], max_values[i], decimals[i], lh_levels)
152
+ end
153
+
154
+ design = NOLH::DESIGN_TABLE[lh_levels]
155
+
156
+ num_columns = design[0].length
157
+ num_stackings ||= num_columns
158
+ if num_stackings > num_columns
159
+ ErrorHandling.clean_abort [
160
+ 'Requested stacking exceeds number of columns in latin hypercube '.red +
161
+ "(#{num_columns})".red
162
+ ]
163
+ end
164
+
165
+ mid_range = lh_levels / 2
166
+ num_stackings.times do |stack_num|
167
+ design.each_with_index do |dp, i|
168
+ scaled_dp = dp.slice(0, n).map.with_index { |x, k| factor[k].scale(x) }
169
+ puts scaled_dp.join "\t" unless stack_num > 0 && i == mid_range
170
+ design[i] = dp.rotate
171
+ end
172
+ end
@@ -0,0 +1,53 @@
1
+ #! /usr/bin/env ruby -w
2
+
3
+ # Strip duplicate headers out of file(s)
4
+
5
+ require 'colorize'
6
+
7
+ String.disable_colorization false
8
+
9
+ require 'optparse'
10
+ require 'datafarming/error_handling'
11
+
12
+ help_msg = [
13
+ 'Strip duplicate headers out of one or more files.', '',
14
+ 'If filenames are specified, a backup is made for each file with',
15
+ "suffix '.orig' appended to the original filename and changes will",
16
+ 'be made in-place in the original file. If no filenames are given,',
17
+ 'the script reads from ' + 'stdin'.blue + ' and writes to ' +
18
+ 'stdout'.blue + '. In either case,',
19
+ 'all occurrences of lines which duplicate the first line in each',
20
+ 'file are removed.', '',
21
+ 'Syntax:',
22
+ "\n\t#{ErrorHandling.prog_name} [--help] [filenames...]".yellow, '',
23
+ "Arguments in square brackets are optional. A vertical bar '|'",
24
+ 'indicates valid alternatives for invoking the option. Prefix',
25
+ 'the command with "' + 'ruby'.yellow +
26
+ '" if it is not on your PATH.', '',
27
+ ' --help | -h | -? | ?'.green,
28
+ "\tProduce this help message.",
29
+ ' filenames...'.green,
30
+ "\tThe name[s] of the file[s] to be converted."
31
+ ]
32
+
33
+ OptionParser.new do |opts|
34
+ opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...[]"
35
+ opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
36
+ end.parse!
37
+
38
+ ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
39
+
40
+ $-i = '.orig' # specify backup suffix
41
+
42
+ oldfilename = ''
43
+ header = ''
44
+
45
+ ARGF.each do |line|
46
+ if ARGF.filename == oldfilename # if it's an old file
47
+ puts line unless line == header # copy non-header lines
48
+ else # if it's a different file
49
+ oldfilename = ARGF.filename # make it the old file
50
+ header = line # remember its header
51
+ puts line # and copy it just this once
52
+ end
53
+ end
@@ -0,0 +1,49 @@
1
+ #! /usr/bin/env ruby -w
2
+
3
+ # Strip header line out of file(s)
4
+
5
+ require 'colorize'
6
+
7
+ String.disable_colorization false
8
+
9
+ require 'optparse'
10
+ require 'datafarming/error_handling'
11
+
12
+ help_msg = [
13
+ 'Strip headers out of one or more file(s) to convert them to data-only.', '',
14
+ 'If filenames are specified, a backup is made for each file with',
15
+ "suffix '.orig' appended to the original filename and changes will",
16
+ 'be made in-place in the original file. If no filenames are given,',
17
+ 'the script reads from ' + 'stdin'.blue + ' and writes to ' +
18
+ 'stdout'.blue + '. In either case,',
19
+ 'the first line of each input file is removed.', '',
20
+ 'Syntax:',
21
+ "\n\t#{ErrorHandling.prog_name} [--help] [filenames...]".yellow, '',
22
+ "Arguments in square brackets are optional. A vertical bar '|'",
23
+ 'indicates valid alternatives for invoking the option. Prefix',
24
+ 'the command with "' + 'ruby'.yellow +
25
+ '" if it is not on your PATH.', '',
26
+ ' --help | -h | -? | ?'.green,
27
+ "\tProduce this help message.",
28
+ ' filenames...'.green,
29
+ "\tThe name[s] of the file[s] to be converted."
30
+ ]
31
+
32
+ OptionParser.new do |opts|
33
+ opts.banner = "Usage: #{$PROGRAM_NAME} [-h|--help] [filenames...[]"
34
+ opts.on('-h', '-?', '--help') { ErrorHandling.clean_abort help_msg }
35
+ end.parse!
36
+
37
+ ErrorHandling.clean_abort help_msg if ARGV[0] == '?'
38
+
39
+ $-i = '.orig' # specify backup suffix
40
+
41
+ oldfilename = ''
42
+
43
+ ARGF.each do |line|
44
+ if ARGF.filename == oldfilename # If it's an old file
45
+ puts line # copy lines through.
46
+ else # If it's a new file remember it
47
+ oldfilename = ARGF.filename # but don't copy the first line.
48
+ end
49
+ end
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ module CrossedDesigns
4
+ # The "cross" method creates a large combinatorial design by crossing all
5
+ # combinations of individual smaller designs. It uses recursion to do so
6
+ # because we don't know how many designs there may be in the input set.
7
+ #
8
+ # The method takes an array of arrays, where each sub-array contains a
9
+ # single component design, and kicks off the recursive build process.
10
+ def self.cross(inputs, idx = 0, tmp = [], solution = [])
11
+ if idx >= inputs.size
12
+ solution << tmp
13
+ else
14
+ inputs[idx].each { |dp| cross(inputs, idx + 1, tmp + dp, solution) }
15
+ end
16
+ solution
17
+ end
18
+ end