batch_experiment 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/examples/sample_batch.rb +48 -0
- data/examples/ukp_batch.rb +39 -0
- data/lib/batch_experiment/extractor.rb +29 -0
- data/lib/batch_experiment/sample_extractors.rb +49 -0
- data/lib/batch_experiment.rb +345 -0
- metadata +64 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4d594a77b4909c00eb2c07a05dbc6c4f0cc68c9a
|
4
|
+
data.tar.gz: 979f0d1800486061aff5d9a1728e55790e675e34
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cc0d896402fc7820e7c1f2741ae9e07ffdea6ab14edc3156e9bdcd5d3f4c24b71e753087e1980fdb97c8b85006cc2c7ef1bbb3bdad3a181b0853a7c94f76ea82
|
7
|
+
data.tar.gz: bfde027e77f78360a9728eacca5075a00a2b59e47cd0224e692c2e44bc6118e942c93c4913a0bd65a5861612191f92deb4402052440037e416391c9433d2629c
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/bin/ruby
|
2
|
+
|
3
|
+
require 'batch_experiment'
|
4
|
+
require 'batch_experiment/sample_extractors'
|
5
|
+
|
6
|
+
comms_info = [{
|
7
|
+
# String with command to be executed. Must have 'pattern' as substring.
|
8
|
+
command: 'sleep 1 && echo X X',
|
9
|
+
# Substring present in 'command'. Often replaced by the instance filename.
|
10
|
+
pattern: 'X',
|
11
|
+
# Extractor object. Receives the output of the command and return
|
12
|
+
# the most important fields.
|
13
|
+
extractor: SampleExtractor.new,
|
14
|
+
# String used to identify the command. Will be used to prefix the return of
|
15
|
+
# extractor.names.
|
16
|
+
prefix: 'doubled',
|
17
|
+
}, {
|
18
|
+
command: 'sleep 3 && echo "banana X"',
|
19
|
+
pattern: 'X',
|
20
|
+
extractor: SampleExtractor.new,
|
21
|
+
prefix: 'banana',
|
22
|
+
}, {
|
23
|
+
command: 'sleep 100 && echo "never gonna happen X"',
|
24
|
+
pattern: 'X',
|
25
|
+
extractor: SampleExtractor.new,
|
26
|
+
prefix: 'timeout',
|
27
|
+
}]
|
28
|
+
|
29
|
+
execution_info = {
|
30
|
+
# IDs of the CPU cores that can be used for executing tests.
|
31
|
+
cpus_available: [1, 2, 3],
|
32
|
+
# Maximum number of seconds that a command can run. After this a kill command
|
33
|
+
# (TERM signal) will be issued.
|
34
|
+
timeout: 5,
|
35
|
+
# Maximum number of seconds that a command can run after a kill command was
|
36
|
+
# issued. After this a kill -9 command (KILL signal) will be issued.
|
37
|
+
post_timeout: 1,
|
38
|
+
}
|
39
|
+
|
40
|
+
conf = {
|
41
|
+
# The name of the file where will be written the CSV data.
|
42
|
+
csvfname: 'sample.csv',
|
43
|
+
}
|
44
|
+
|
45
|
+
files = ['apple', 'orange'] # Applejack would be proud
|
46
|
+
|
47
|
+
BatchExperiment::experiment(comms_info, execution_info, conf, files)
|
48
|
+
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require_relative 'batch_experiment'
|
4
|
+
require_relative 'batch_experiment/sample_extractors'
|
5
|
+
|
6
|
+
# I run the three lines below in the console to disable hyperthreading cores on
|
7
|
+
# my computer before examining the cores with the top command.
|
8
|
+
# for i in 4 5 6 7; do
|
9
|
+
# sudo sh -c "echo 0 > /sys/devices/system/cpu/cpu$i/online";
|
10
|
+
# done
|
11
|
+
|
12
|
+
comms_info = [{
|
13
|
+
command: 'pyasukpt -src INST_FILE',
|
14
|
+
pattern: 'INST_FILE',
|
15
|
+
extractor: PyaExtractor.new,
|
16
|
+
prefix: 'PYAsUKP',
|
17
|
+
}, {
|
18
|
+
command: 'run_ukp5.out INST_FILE',
|
19
|
+
pattern: 'INST_FILE',
|
20
|
+
extractor: UKP5Extractor.new,
|
21
|
+
prefix: 'UKP5',
|
22
|
+
}]
|
23
|
+
|
24
|
+
execution_info = {
|
25
|
+
cpus_available: [1, 2, 3],
|
26
|
+
timeout: 10,
|
27
|
+
post_timeout: 5,
|
28
|
+
}
|
29
|
+
|
30
|
+
conf = { csvfname: 'pya_site8.csv' }
|
31
|
+
|
32
|
+
files = ['corepb.ukp', 'exnsd18.ukp', 'exnsd26.ukp', 'exnsdbis18.ukp', 'exnsd16.ukp', 'exnsd20.ukp', 'exnsdbis10.ukp', 'exnsds12.ukp']
|
33
|
+
# If you don't execute the script from the ukp files folder you need to put the
|
34
|
+
# folder relative or absolute path here (with trailing slash).
|
35
|
+
path = ''
|
36
|
+
files.map! { | f | path + f }
|
37
|
+
|
38
|
+
experiment(comms_info, execution_info, conf, files)
|
39
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Extractor
|
2
|
+
# For when there's a field whose value is after '<field>: '.
|
3
|
+
def self.get_field(lines, field)
|
4
|
+
lines.grep(/^#{field}: .*/).each { | l | return l.match(/:[\t ]+(.*)/)[1] }
|
5
|
+
''
|
6
|
+
end
|
7
|
+
|
8
|
+
# For when there's a field whose value is in the next line.
|
9
|
+
def self.get_hfield(lines, field)
|
10
|
+
if ix = lines.find_index(field) then lines[ix + 1] else '' end
|
11
|
+
end
|
12
|
+
|
13
|
+
# Return the field names for each of the elements returned by
|
14
|
+
# extract. Ex.: ['Time', 'Max Mem Use', 'opt', ... ]
|
15
|
+
def names
|
16
|
+
fail 'This method should have been overwritten by a subclass.'
|
17
|
+
end
|
18
|
+
|
19
|
+
def extract(content)
|
20
|
+
extract_from_lines(content.lines.map! { | l | l.chomp! })
|
21
|
+
end
|
22
|
+
|
23
|
+
# Extract an array of values from the command output. This array has the same
|
24
|
+
# size as the one returned by field_names.
|
25
|
+
def extract_from_lines(lines)
|
26
|
+
fail 'This method should have been overwritten by a subclass.'
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'require_relative'
|
2
|
+
require_relative './extractor.rb'
|
3
|
+
|
4
|
+
# Sample extractors used at https://github.com/henriquebecker91/masters, where
|
5
|
+
# this code had its beggining. This file contains the code used to extract info
|
6
|
+
# from the different outputs generated by UKP solving programs.
|
7
|
+
|
8
|
+
class SampleExtractor
|
9
|
+
include Extractor
|
10
|
+
def names
|
11
|
+
['first word', 'second word', 'ext_time', 'ext_mem']
|
12
|
+
end
|
13
|
+
|
14
|
+
def extract_from_lines(lines)
|
15
|
+
words = lines.empty? || lines[0].nil? ? ['',''] : lines[0].split().take(2)
|
16
|
+
words << Extractor.get_field(lines, 'ext_time')
|
17
|
+
words << Extractor.get_field(lines, 'ext_mem')
|
18
|
+
words
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class UKP5Extractor
|
23
|
+
include Extractor
|
24
|
+
def names
|
25
|
+
['internal time', 'external time', 'external memory', 'opt']
|
26
|
+
end
|
27
|
+
|
28
|
+
def extract_from_lines(lines)
|
29
|
+
['Seconds', 'ext_time', 'ext_mem', 'opt'].map do | label |
|
30
|
+
Extractor.get_field(lines, label)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class PyaExtractor
|
36
|
+
include Extractor
|
37
|
+
def names
|
38
|
+
['internal time', 'external time', 'external memory', 'opt']
|
39
|
+
end
|
40
|
+
|
41
|
+
def extract_from_lines(lines)
|
42
|
+
values = ['Total Time ', 'ext_time', 'ext_mem'].map do | label |
|
43
|
+
Extractor.get_field(lines, label)
|
44
|
+
end
|
45
|
+
opt_key = '#The optimal value for the given capacity'
|
46
|
+
values << Extractor.get_hfield(lines, opt_key)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
@@ -0,0 +1,345 @@
|
|
1
|
+
require 'childprocess'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
module BatchExperiment
|
5
|
+
# The default callable class used by batch to convert a command into a
|
6
|
+
# filename.
|
7
|
+
class FilenameSanitizer
|
8
|
+
def call(command)
|
9
|
+
fname = command.strip
|
10
|
+
fname.gsub!(/[^[:alnum:]]/, '_')
|
11
|
+
fname.gsub!(/_+/, '_')
|
12
|
+
fname.gsub!(/^_/, '')
|
13
|
+
fname.gsub!(/_$/, '')
|
14
|
+
fname
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# Internal use only. DO NOT DEPEND.
|
19
|
+
# Remove any finished commands from comms_running, insert the cpus
|
20
|
+
# freed by the commands termination to the free_cpus, insert the
|
21
|
+
# terminated commands on comms_executed.
|
22
|
+
def self.update_finished(free_cpus, comms_running, comms_executed)
|
23
|
+
comms_running.delete_if do | job |
|
24
|
+
if job[:proc].exited?
|
25
|
+
free_cpus.push(job[:cpu])
|
26
|
+
File.delete(job[:lockfname])
|
27
|
+
comms_executed << job[:command]
|
28
|
+
end
|
29
|
+
job[:proc].exited? # bool returned to delete_if
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Takes a list of commands, execute them only on the designed core/cpus, and
|
34
|
+
# kill them if the timeout expires, never lets a core/cpu rest for more than
|
35
|
+
# conf[:busy_loop_sleep] seconds between a command and another. The
|
36
|
+
# conf[:fname_sanitizer] is called over the commands to generate partial
|
37
|
+
# filenames. Appending '.out' to one of the partial filenames will give the
|
38
|
+
# filename were the command stdout was redirected. The analogue is valid for
|
39
|
+
# '.err' and stderr. The first partial filename corresponds to the first
|
40
|
+
# command in commands, and so on. Right before a command begans to run, a
|
41
|
+
# "partial_filename.#{conf[:unfinished_ext]}" file is created. After the
|
42
|
+
# command ends its execution this file is removed. If the command ends its
|
43
|
+
# execution by means of a timeout the file is also removed. The file only
|
44
|
+
# remains if the batch procedure is interrupted (not a specific command).
|
45
|
+
#
|
46
|
+
# @param commands [Array<String>] The shell commands.
|
47
|
+
# @param conf [Hash] The configurations, as follows:
|
48
|
+
# :cpus_available [Array<Fixnum>] Cpu cores that can be used to run the
|
49
|
+
# commands. Required parameter. The cpu numbers begin at 0, despite what
|
50
|
+
# htop tells you;
|
51
|
+
# :timeout [Number] Number of seconds before killing a command. Required
|
52
|
+
# parameter. Is the same for all the commands;
|
53
|
+
# :time_fmt [String] A string in the time (external command) format. See
|
54
|
+
# http://linux.die.net/man/1/time. Default: 'ext_time: %e\next_mem: %M\n'.
|
55
|
+
# :busy_loop_sleep [Number] How many seconds to wait before checking if a
|
56
|
+
# command ended execution. This is max time a cpu will be vacant between
|
57
|
+
# two commands. Default: 0.1;
|
58
|
+
# :post_timeout [Number] A command isn't guaranteed to end after receiving
|
59
|
+
# a TERM signal. If the command hasn't stopped, waits post_timeout seconds
|
60
|
+
# before sending a KILL signal (give it a chance to end gracefully).
|
61
|
+
# Default: 5;
|
62
|
+
# :fname_sanitizer [Callable Object] The call method of this object
|
63
|
+
# should take a String and convert it (possibly losing information), to a
|
64
|
+
# valid filename. Used over the commands to define the output files of
|
65
|
+
# commands.
|
66
|
+
# Default: BatchExperiment::FilenameSanitizer.new;
|
67
|
+
# :skip_done_comms [FalseClass,TrueClass] Skip any command for what a
|
68
|
+
# corresponding '.out' file exists, except if both a '.out' and a
|
69
|
+
# '.unfinished' file exist, in the last case the command is executed.
|
70
|
+
# Default: true;
|
71
|
+
# :unfinished_ext [String] Extension to be used in place of '.unfinished'.
|
72
|
+
# Default: '.unfinished';
|
73
|
+
# :out_ext [String] Extension to be used in place of '.out'.
|
74
|
+
# Default: '.out';
|
75
|
+
# :err_ext [String] Extension to be used in place of '.err'.
|
76
|
+
# Default: '.err';
|
77
|
+
# @return [String] Which commands were executed. Can be different from
|
78
|
+
# the 'commands' argument if commands are skipped (see :skip_done_comms).
|
79
|
+
#
|
80
|
+
# @note This procedure was not designed to support equal commands (the last
|
81
|
+
# equal command executed will subscribe the '.out', '.err' and '.unfinished'
|
82
|
+
# files used by any previous equal command). But the parameter
|
83
|
+
# conf[:fname_sanitizer] can be used to circumvent the restriction over
|
84
|
+
# equal commands (if the object has state it can return a different
|
85
|
+
# filename for every time it's called with the same argument).
|
86
|
+
# @note This procedure makes use of the following linux commands: time (not
|
87
|
+
# the bash internal one, but the package one, i.e.
|
88
|
+
# https://www.archlinux.org/packages/extra/x86_64/time/); timeout (from
|
89
|
+
# coreutils); taskset (from util-linux,
|
90
|
+
# https://www.archlinux.org/packages/core/x86_64/util-linux/); sh (the
|
91
|
+
# shell).
|
92
|
+
# @note The command is executed inside a call to "sh -c command", so it has
|
93
|
+
# to be a valid sh command.
|
94
|
+
# @note The output of the command "time -f #{conf[:time_fmt]}" will be
|
95
|
+
# appended to the '.out' file of every command. If you set conf[:time_fmt]
|
96
|
+
# to a empty string only a newline will be appended.
|
97
|
+
def self.batch(commands, conf)
|
98
|
+
# Throw exceptions if required configurations aren't provided.
|
99
|
+
fail "conf[:cpus_available] not set" unless conf[:cpus_available]
|
100
|
+
fail "conf[:timeout] not set" unless conf[:timeout]
|
101
|
+
|
102
|
+
# Initialize optional configurations with default values if they weren't
|
103
|
+
# provided. Don't change the conf argument, only our version of conf.
|
104
|
+
conf = conf.clone
|
105
|
+
conf[:time_fmt] ||= 'ext_time: %e\\next_mem: %M\\n'
|
106
|
+
conf[:unfinished_ext] ||= '.unfinished'
|
107
|
+
conf[:out_ext] ||= '.out'
|
108
|
+
conf[:err_ext] ||= '.err'
|
109
|
+
conf[:busy_loop_sleep] ||= 0.1
|
110
|
+
conf[:post_timeout] ||= 5
|
111
|
+
conf[:fname_sanitizer] ||= BatchExperiment::FilenameSanitizer.new
|
112
|
+
conf[:skip_done_comms] = true if conf[:skip_done_comms].nil?
|
113
|
+
|
114
|
+
# Initialize main variables
|
115
|
+
free_cpus = conf[:cpus_available].clone
|
116
|
+
comms_running = []
|
117
|
+
cpu = nil
|
118
|
+
comms_executed = []
|
119
|
+
|
120
|
+
commands.each do | command |
|
121
|
+
commfname = conf[:fname_sanitizer].call(command)
|
122
|
+
out_fname = commfname + conf[:out_ext]
|
123
|
+
err_fname = commfname + conf[:err_ext]
|
124
|
+
lockfname = commfname + conf[:unfinished_ext]
|
125
|
+
|
126
|
+
if conf[:skip_done_comms] && File.exists?(out_fname)
|
127
|
+
if File.exists?(lockfname)
|
128
|
+
puts "found file #{out_fname}, but a #{lockfname} also exists"
|
129
|
+
puts "will execute command '#{command}' anyway"
|
130
|
+
else
|
131
|
+
puts "found file #{commfname}, skipping command: #{command}"
|
132
|
+
STDOUT.flush
|
133
|
+
next
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
puts "waiting to execute command: #{command}"
|
138
|
+
STDOUT.flush
|
139
|
+
|
140
|
+
while free_cpus.empty? do
|
141
|
+
sleep conf[:busy_loop_sleep]
|
142
|
+
update_finished(free_cpus, comms_running, comms_executed)
|
143
|
+
end
|
144
|
+
|
145
|
+
cpu = free_cpus.pop
|
146
|
+
|
147
|
+
cproc = ChildProcess.build(
|
148
|
+
'taskset', '-c', cpu.to_s,
|
149
|
+
'time', '-f', conf[:time_fmt], '--append', '-o', out_fname,
|
150
|
+
'timeout', '--preserve-status', '-k', "#{conf[:post_timeout]}s",
|
151
|
+
"#{conf[:timeout]}s",
|
152
|
+
'sh', '-c', command
|
153
|
+
)
|
154
|
+
|
155
|
+
File.open(lockfname, 'w') {} # empty on purpose
|
156
|
+
out = File.open(out_fname, 'w')
|
157
|
+
err = File.open(err_fname, 'w')
|
158
|
+
|
159
|
+
cproc.io.stdout = out
|
160
|
+
cproc.io.stderr = err
|
161
|
+
|
162
|
+
cproc.start
|
163
|
+
|
164
|
+
comms_running << {
|
165
|
+
proc: cproc,
|
166
|
+
cpu: cpu,
|
167
|
+
lockfname: lockfname,
|
168
|
+
command: command
|
169
|
+
}
|
170
|
+
|
171
|
+
puts "command assigned to cpu#{cpu}"
|
172
|
+
STDOUT.flush
|
173
|
+
end
|
174
|
+
|
175
|
+
until comms_running.empty? do
|
176
|
+
sleep conf[:busy_loop_sleep]
|
177
|
+
update_finished(free_cpus, comms_running, comms_executed)
|
178
|
+
end
|
179
|
+
|
180
|
+
comms_executed
|
181
|
+
end
|
182
|
+
|
183
|
+
# gencommff: GENerate COMMands For Files
|
184
|
+
#
|
185
|
+
# @param comm [String] A string with 'patt' as a substring.
|
186
|
+
# @param patt [String] A string contained in 'comm'.
|
187
|
+
# @param files [Enumerable<String>] A list of strings to substitute patt at
|
188
|
+
# comm.
|
189
|
+
# @return [Array<String>] Example: gencommff('echo STR', 'STR', ['a', 'b',
|
190
|
+
# 'c']) returns ['echo a', 'echo b', 'echo c'].
|
191
|
+
def self.gencommff(comm, patt, files)
|
192
|
+
ret = []
|
193
|
+
files.each { | f | ret << comm.gsub(patt, f) }
|
194
|
+
ret
|
195
|
+
end
|
196
|
+
|
197
|
+
# Intercalate a variable number of variable sized arrays in one array.
|
198
|
+
#
|
199
|
+
# @param [Array<Array<Object>>] xss An array of arrays.
|
200
|
+
# @return [Array<Object>] An array of the same size as the sum of the size
|
201
|
+
# of all inner arrays. The values are the same (not copies) as the values
|
202
|
+
# of the array. Example: intercalate([[1, 4, 6, 7], [], [2, 5], [3]])
|
203
|
+
# returns [1, 2, 3, 4, 5, 6, 7].
|
204
|
+
def self.intercalate(xss)
|
205
|
+
ret = []
|
206
|
+
xss = xss.map { | xs | xs.reverse }
|
207
|
+
until xss.empty? do
|
208
|
+
xss.delete_if do | xs |
|
209
|
+
unless xs.empty?
|
210
|
+
ret << xs.pop
|
211
|
+
end
|
212
|
+
xs.empty?
|
213
|
+
end
|
214
|
+
end
|
215
|
+
ret
|
216
|
+
end
|
217
|
+
|
218
|
+
# Takes N shell commands and M files/parameters, execute each command of the
|
219
|
+
# N commands over the M files, save the output of each command/file
|
220
|
+
# combination, use objects provided with the command to extract relevant
|
221
|
+
# information from the output file, and group those information in a CVS
|
222
|
+
# file. Easier to understand seeing the sample_batch.rb example in action.
|
223
|
+
#
|
224
|
+
# @param comms_info [Array<Hash>] An array of hashs, each with the config
|
225
|
+
# needed to know how to deal with the command. Four required fields
|
226
|
+
# (all keys are symbols):
|
227
|
+
# command [String] A string with a sh shell command.
|
228
|
+
# pattern [String] A substring of command, will be replace by the strings
|
229
|
+
# in the paramenter 'files'.
|
230
|
+
# extractor [Extractor] An object that implements the Extractor interface.
|
231
|
+
# prefix [String] A string that will be used to prefix the extractor.names
|
232
|
+
# when they are used as column names. Improves Extractor reusability.
|
233
|
+
# @param batch_conf [Hash] Configuration used to call batch. See the
|
234
|
+
# explanation for parameter 'conf' on the documentation of the batch
|
235
|
+
# method. There are required fields for this hash parameter.
|
236
|
+
# @param conf [Hash] Lots of parameters. Here's a list:
|
237
|
+
# csvfname [String] The filename/filepath for the file that will contain
|
238
|
+
# the CSV data. Required field.
|
239
|
+
# separator [String] The separator used at the CSV file. Default: ';'.
|
240
|
+
# ic_columns [TrueClass, FalseClass] Intercalate the data returned by the
|
241
|
+
# extractors. In other words, the csv line for some file will not present
|
242
|
+
# all fields of the first command, then all fields of the second command,
|
243
|
+
# etc, but instead will present the first field of all commands, the second
|
244
|
+
# field of all commands, and so on. Default: true.
|
245
|
+
# ic_comms [TrueClass, FalseClass] Intercalate the commands execution.
|
246
|
+
# Instead of executing the first command over all files first, execute all
|
247
|
+
# the commands over the first file first. This was made to avoid
|
248
|
+
# confounding (statistical concept). If something disrupts the processing
|
249
|
+
# power for some period of time, the effect will probably be distributed
|
250
|
+
# between commands. The risk some algorithm seems better or worse than it
|
251
|
+
# really is will be reduced. For example: you are making tests at an
|
252
|
+
# notebook, the notebook becomes unplugged for a short time. The cores will
|
253
|
+
# probably enter in energy saving mode and affect the observed performance.
|
254
|
+
# If this happens when all tested commands are the same, then will seem
|
255
|
+
# that that an command had a worse performance. If this happens when the
|
256
|
+
# commands are intercalated, then maybe some instances will seem harder
|
257
|
+
# than others (what is less problematic). Default: true.
|
258
|
+
# skip_commands [TrueClass, FalseClass] If true, will not execute the
|
259
|
+
# commands and assume that the outputs are already saved. Will only execute
|
260
|
+
# the extractors over the already saved outputs, and create the CSV file
|
261
|
+
# from them. Default: false.
|
262
|
+
#
|
263
|
+
# @param files [Array<Strings>] The strings that will replace the :pattern
|
264
|
+
# on :command, for every element in comms_info.
|
265
|
+
#
|
266
|
+
# @return [NilClass,Array<String>] The return of the internal #batch
|
267
|
+
# call. Returns nil if conf[:skip_commands] was set to true.
|
268
|
+
#
|
269
|
+
# @see BatchExperiment.batch
|
270
|
+
def self.experiment(comms_info, batch_conf, conf, files)
|
271
|
+
# Throw exceptions if required configurations aren't provided.
|
272
|
+
fail 'conf[:csvfname] is not defined' unless conf[:csvfname]
|
273
|
+
|
274
|
+
# Initialize optional configurations with default values if they weren't
|
275
|
+
# provided. Don't change the conf argument, only our version of conf.
|
276
|
+
conf = conf.clone
|
277
|
+
conf[:separator] ||= ';'
|
278
|
+
conf[:ic_columns] = true if conf[:ic_columns].nil?
|
279
|
+
conf[:ic_comms] = true if conf[:ic_comms].nil?
|
280
|
+
#conf[:skip_commands] defaults to false/nil
|
281
|
+
|
282
|
+
# Get some of the batch config that we use inside here too.
|
283
|
+
out_ext = batch_conf[:out_ext] || '.out'
|
284
|
+
unfinished_ext = batch_conf[:unfinished_ext] || '.unfinished'
|
285
|
+
fname_sanitizer = batch_conf[:fname_sanitizer]
|
286
|
+
fname_sanitizer ||= BatchExperiment::FilenameSanitizer.new
|
287
|
+
|
288
|
+
# Create commands the templates and the file list.
|
289
|
+
comms_sets = []
|
290
|
+
comms_info.each do | comm_info |
|
291
|
+
comms_sets << gencommff(comm_info[:command], comm_info[:pattern], files)
|
292
|
+
end
|
293
|
+
|
294
|
+
comm_list = conf[:ic_comm] ? intercalate(comms_sets) : comms_sets.flatten
|
295
|
+
|
296
|
+
# Execute the commands (or not).
|
297
|
+
ret = batch(comm_list, batch_conf) unless conf[:skip_commands]
|
298
|
+
|
299
|
+
# Build header (first csv line, column names).
|
300
|
+
header = []
|
301
|
+
comms_info.each do | comm_info |
|
302
|
+
prefixed_names = comm_info[:extractor].names.map do | name |
|
303
|
+
(comm_info[:prefix] + ' ') << name
|
304
|
+
end
|
305
|
+
header << prefixed_names
|
306
|
+
end
|
307
|
+
header = intercalate(header) if conf[:ic_columns]
|
308
|
+
header = ['Filename'].concat(header).join(conf[:separator])
|
309
|
+
|
310
|
+
# Build body (inspect all output files an make csv lines).
|
311
|
+
body = [header]
|
312
|
+
files.each_with_index do | inst_fname, j |
|
313
|
+
line = []
|
314
|
+
comms_info.each_with_index do | comm_info, i |
|
315
|
+
command =
|
316
|
+
if conf[:ic_comm]
|
317
|
+
comm_list[(j * comms_info.size) + i]
|
318
|
+
else
|
319
|
+
comm_list[(i * files.size) + j]
|
320
|
+
end
|
321
|
+
|
322
|
+
partial_fname = fname_sanitizer.call(command)
|
323
|
+
out_fname = partial_fname + out_ext
|
324
|
+
lockfname = partial_fname + unfinished_ext
|
325
|
+
if File.exists?(out_fname)
|
326
|
+
f_content = File.open(out_fname, 'r') { | f | f.read }
|
327
|
+
line << comm_info[:extractor].extract(f_content)
|
328
|
+
else
|
329
|
+
# if the file wasn't created insert a empty column set
|
330
|
+
# of the same size the true column set would be
|
331
|
+
line << comm_info[:extractor].names.map { | _ | '' }
|
332
|
+
end
|
333
|
+
end
|
334
|
+
line = intercalate(line) if conf[:ic_columns]
|
335
|
+
body << [inst_fname].concat(line).join(conf[:separator])
|
336
|
+
end
|
337
|
+
body = body.map! { | line | line << conf[:separator] }.join("\n")
|
338
|
+
|
339
|
+
# Write CSV data into a CSV file.
|
340
|
+
File.open(conf[:csvfname], 'w') { | f | f.write(body) }
|
341
|
+
|
342
|
+
return ret
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: batch_experiment
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Henrique Becker
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-03-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: childprocess
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.5'
|
27
|
+
description: ''
|
28
|
+
email: henriquebecker91@gmail.com
|
29
|
+
executables: []
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- examples/sample_batch.rb
|
34
|
+
- examples/ukp_batch.rb
|
35
|
+
- lib/batch_experiment.rb
|
36
|
+
- lib/batch_experiment/extractor.rb
|
37
|
+
- lib/batch_experiment/sample_extractors.rb
|
38
|
+
homepage: https://rubygems.org/gems/batch_experiment
|
39
|
+
licenses:
|
40
|
+
- Public Domain
|
41
|
+
- Unlicense
|
42
|
+
metadata: {}
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options: []
|
45
|
+
require_paths:
|
46
|
+
- lib
|
47
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
requirements: []
|
58
|
+
rubyforge_project:
|
59
|
+
rubygems_version: 2.5.1
|
60
|
+
signing_key:
|
61
|
+
specification_version: 4
|
62
|
+
summary: A ruby script that distributes system commands between cpu cores, and save
|
63
|
+
their output.
|
64
|
+
test_files: []
|