batch_experiment 2.2.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/batch_experiment.rb +64 -62
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c482d1a8d72df171607b55322421975768dd006
|
4
|
+
data.tar.gz: 0f55e0c4a9afe8bdbfbdd18408ecd2b9ec09d71b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4663bc105b70b4ad249ab1d59e0aa49c151fb2c157907bf968576c934ecfc2adced1224aae1bd7a6bfe7a493c803a9bb931f963b5c368d24ee9fc4bed63017f6
|
7
|
+
data.tar.gz: 75b06a9b3a780b9c4db02d56531592463e3747809c38c09cfec623e9eab717832ac260a388b7850d69d17d0c73c071e89eda8e3c390ea392965f51417257c9b2
|
data/lib/batch_experiment.rb
CHANGED
@@ -5,6 +5,16 @@ require 'socket'
|
|
5
5
|
# The main module, the two main utility methods offered are ::batch and
|
6
6
|
# ::experiment.
|
7
7
|
module BatchExperiment
|
8
|
+
# Exception class raised when multiple extractor objects passed to
|
9
|
+
# ::experiment (by the comms_info parameter) disagree on the content of the
|
10
|
+
# columns. Ex.: If we call ::experiment with different extractor objects, all
|
11
|
+
# arrays returned by the #names method of those extractors should be equal or
|
12
|
+
# a prefix of the biggest array. Ex.: ['a', 'b'], ['a', 'b'], ['a'] and
|
13
|
+
# ['a', 'b', 'c'] works, but adding ['a', 'c'] will end the program with
|
14
|
+
# this exception. This is made to avoid making the mistake of generating a
|
15
|
+
# csv where the same column has a different meaning for each row.
|
16
|
+
class ColumnSpecError < ArgumentError; end
|
17
|
+
|
8
18
|
# The default callable object used by Comm2FnameConverter to convert
|
9
19
|
# a command into a filename. Comm2FnameConverter don't create a sanitized
|
10
20
|
# filename from the command string (it uses its first argument to do this,
|
@@ -90,19 +100,23 @@ module BatchExperiment
|
|
90
100
|
# terminated commands on comms_executed.
|
91
101
|
def self.update_finished(free_cpus, comms_running, comms_executed) #:nodoc
|
92
102
|
comms_running.delete_if do | job |
|
93
|
-
# Don't call '#exited?' twice, store value
|
103
|
+
# Don't call '#exited?' twice, store its value in a variable. If you call
|
94
104
|
# it twice it's possible to remove it from the list of running commands
|
95
|
-
# without freeing a cpu, what will
|
105
|
+
# without freeing a cpu, what will mark the cpu as busy forever.
|
96
106
|
exited = job[:proc].exited?
|
97
107
|
if exited
|
98
108
|
free_cpus.push(job[:cpu])
|
99
|
-
|
109
|
+
job[:out_file].close
|
110
|
+
job[:err_file].close
|
111
|
+
File.open(job[:run_fname], 'a') do | f |
|
112
|
+
f.write(
|
113
|
+
"command: #{job[:command]}\n" +
|
114
|
+
"date_before: #{job[:date_before].utc.to_s}\n" +
|
115
|
+
"date_after: #{Time.now.utc.to_s}\n" +
|
116
|
+
"hostname: #{Socket.gethostname}\n"
|
117
|
+
)
|
118
|
+
end
|
100
119
|
comms_executed << job[:command]
|
101
|
-
out = job[:out_file]
|
102
|
-
out.write("\ncommand: " + job[:command])
|
103
|
-
out.write("\ndate_before: " + job[:date_before].utc.to_s)
|
104
|
-
out.write("\ndate_after: " + Time.now.utc.to_s)
|
105
|
-
out.write("\nhostname: " + Socket.gethostname)
|
106
120
|
end
|
107
121
|
exited # bool returned to delete_if
|
108
122
|
end
|
@@ -114,13 +128,15 @@ module BatchExperiment
|
|
114
128
|
#
|
115
129
|
# The output filenames are derived from the commands. The ones with '.out'
|
116
130
|
# are the ones with the command standard output. The analogue is valid for
|
117
|
-
# '.err' and standard error.
|
118
|
-
#
|
119
|
-
#
|
120
|
-
#
|
121
|
-
#
|
122
|
-
#
|
123
|
-
#
|
131
|
+
# '.err' and standard error. The filenames ending in '.run' are created only
|
132
|
+
# after the process has ended (naturally or by timeout) and contain: the
|
133
|
+
# sh command, the date before starting the job (up to the second), the date
|
134
|
+
# after the process has ended (up to the second), and the hostname of the
|
135
|
+
# computer where the command was executed. The '.run' files have a second
|
136
|
+
# utility that is to mark which commands were already executed. If a power
|
137
|
+
# outage turns of the computer, or you decide to kill the script, the '.run'
|
138
|
+
# files will store which executions already happened, and if you execute the
|
139
|
+
# script again it will (by default) skip the already executed commands.
|
124
140
|
#
|
125
141
|
# @param commands [Array<String>] The shell commands.
|
126
142
|
# @param conf [Hash] The configurations, as follows:
|
@@ -144,13 +160,13 @@ module BatchExperiment
|
|
144
160
|
# and convert it (possibly losing information), to a valid filename. Used
|
145
161
|
# over the commands to define the output files of commands. Default:
|
146
162
|
# BatchExperiment::Comm2FnameConverter.new.
|
147
|
-
# * skip_done_comms [FalseClass,TrueClass]
|
148
|
-
# corresponding '.
|
149
|
-
#
|
150
|
-
#
|
151
|
-
#
|
152
|
-
# *
|
153
|
-
#
|
163
|
+
# * skip_done_comms [FalseClass,TrueClass] If true then, for each command,
|
164
|
+
# verify if a corresponding '.run' file exists, if it exists, skip the
|
165
|
+
# command, if it does not exist then execute the command. If false then it
|
166
|
+
# removes the corresponding out/err/run files before executing each
|
167
|
+
# command. Default: true.
|
168
|
+
# * run_ext [String] Extension to be used in place of '.run'.
|
169
|
+
# Default: '.run'.
|
154
170
|
# * out_ext [String] Extension to be used in place of '.out'.
|
155
171
|
# Default: '.out'.
|
156
172
|
# * err_ext [String] Extension to be used in place of '.err'.
|
@@ -193,9 +209,9 @@ module BatchExperiment
|
|
193
209
|
# provided. Don't change the conf argument, only our version of conf.
|
194
210
|
conf = conf.clone
|
195
211
|
conf[:time_fmt] ||= 'ext_time: %e\\next_mem: %M\\n'
|
196
|
-
conf[:unfinished_ext] ||= '.unfinished'
|
197
212
|
conf[:out_ext] ||= '.out'
|
198
213
|
conf[:err_ext] ||= '.err'
|
214
|
+
conf[:run_ext] ||= '.run'
|
199
215
|
conf[:busy_loop_sleep] ||= 0.1
|
200
216
|
conf[:post_timeout] ||= 5
|
201
217
|
conf[:converter] ||= BatchExperiment::Comm2FnameConverter.new
|
@@ -213,20 +229,19 @@ module BatchExperiment
|
|
213
229
|
commfname = conf[:converter].call(command)
|
214
230
|
out_fname = conf[:output_dir] + commfname + conf[:out_ext]
|
215
231
|
err_fname = conf[:output_dir] + commfname + conf[:err_ext]
|
216
|
-
|
217
|
-
|
218
|
-
if conf[:skip_done_comms] && File.exists?(
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
end
|
232
|
+
run_fname = conf[:output_dir] + commfname + conf[:run_ext]
|
233
|
+
|
234
|
+
if conf[:skip_done_comms] && File.exists?(run_fname)
|
235
|
+
puts "Found file: #{commfname} -- skipping command: #{command}"
|
236
|
+
STDOUT.flush
|
237
|
+
next
|
238
|
+
else
|
239
|
+
if File.exists? out_fname then File.delete out_fname end
|
240
|
+
if File.exists? err_fname then File.delete err_fname end
|
241
|
+
if File.exists? run_fname then File.delete run_fname end
|
227
242
|
end
|
228
243
|
|
229
|
-
puts "
|
244
|
+
puts "Next command in the queue: #{command}"
|
230
245
|
STDOUT.flush
|
231
246
|
|
232
247
|
while free_cpus.empty? do
|
@@ -238,7 +253,7 @@ module BatchExperiment
|
|
238
253
|
|
239
254
|
cproc = ChildProcess.build(
|
240
255
|
'taskset', '-c', cpu.to_s,
|
241
|
-
'time', '-f', conf[:time_fmt], '--append', '-o',
|
256
|
+
'time', '-f', conf[:time_fmt], '--append', '-o', run_fname,
|
242
257
|
'timeout', '--preserve-status', '-k', "#{conf[:post_timeout]}s",
|
243
258
|
"#{conf[:timeout]}s",
|
244
259
|
'sh', '-c', command
|
@@ -246,7 +261,6 @@ module BatchExperiment
|
|
246
261
|
|
247
262
|
cproc.cwd = conf[:cwd]
|
248
263
|
|
249
|
-
File.open(lockfname, 'w') {} # empty on purpose
|
250
264
|
out = File.open(out_fname, 'w')
|
251
265
|
err = File.open(err_fname, 'w')
|
252
266
|
cproc.io.stdout = out
|
@@ -258,16 +272,14 @@ module BatchExperiment
|
|
258
272
|
comms_running << {
|
259
273
|
proc: cproc,
|
260
274
|
cpu: cpu,
|
261
|
-
lockfname: lockfname,
|
262
275
|
command: command,
|
263
276
|
date_before: date_before,
|
264
277
|
out_file: out,
|
278
|
+
err_file: err,
|
279
|
+
run_fname: run_fname,
|
265
280
|
}
|
266
281
|
|
267
|
-
|
268
|
-
File.open(lockfname, 'w') { | f | f.write cproc.pid }
|
269
|
-
|
270
|
-
puts "command assigned to cpu#{cpu}"
|
282
|
+
puts "The command was assigned to cpu#{cpu}."
|
271
283
|
STDOUT.flush
|
272
284
|
end
|
273
285
|
|
@@ -323,16 +335,6 @@ module BatchExperiment
|
|
323
335
|
ret
|
324
336
|
end
|
325
337
|
|
326
|
-
# Exception class raised when multiple extractor objects passed to
|
327
|
-
# ::experiment (by the comms_info parameter) disagree on the content of the
|
328
|
-
# columns. Ex.: If we call ::experiment with different extractor objects, all
|
329
|
-
# arrays returned by the #names method of those extractors should be equal or
|
330
|
-
# a prefix of the biggest array. Ex.: ['a', 'b'], ['a', 'b'], ['a'] and
|
331
|
-
# ['a', 'b', 'c'] works, but adding ['a', 'c'] will end the program with
|
332
|
-
# this exception. This is made to avoid making the mistake of generating a
|
333
|
-
# csv where the same column has a different meaning for each row.
|
334
|
-
class ColumnSpecError < ArgumentError; end
|
335
|
-
|
336
338
|
# @!visibility private
|
337
339
|
# Check if the headers can be combined, if they can return a shallow copy of
|
338
340
|
# the biggest header, otherwise throw an exception.
|
@@ -438,8 +440,8 @@ module BatchExperiment
|
|
438
440
|
#conf[:skip_commands] defaults to false/nil
|
439
441
|
|
440
442
|
# Get some of the batch config that we use inside here too.
|
443
|
+
run_ext = batch_conf[:run_ext] || '.run'
|
441
444
|
out_ext = batch_conf[:out_ext] || '.out'
|
442
|
-
unfinished_ext = batch_conf[:unfinished_ext] || '.unfinished'
|
443
445
|
output_dir = batch_conf[:output_dir] || './'
|
444
446
|
converter = batch_conf[:converter].clone unless batch_conf[:converter].nil?
|
445
447
|
converter ||= BatchExperiment::Comm2FnameConverter.new
|
@@ -521,18 +523,18 @@ module BatchExperiment
|
|
521
523
|
curr_line = [algorithm, filename, run_number]
|
522
524
|
|
523
525
|
partial_fname = converter.call(exp_comm)
|
526
|
+
run_fname = output_dir + partial_fname + run_ext
|
524
527
|
out_fname = output_dir + partial_fname + out_ext
|
525
|
-
lockfname = output_dir + partial_fname + unfinished_ext
|
526
528
|
extractor = run_info[:comm_info][:extractor]
|
527
529
|
|
528
|
-
if File.exists?(
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
530
|
+
if File.exists?(run_fname)
|
531
|
+
run_info = File.open(run_fname, 'r') { | f | f.read }
|
532
|
+
output = File.open(out_fname, 'r') { | f | f.read }
|
533
|
+
# TODO: in the future change the extractors to receive
|
534
|
+
# three inputs (out/err/run). If the runs create arbitrary files
|
535
|
+
# with relevant info, the extractor will need to find, and open
|
536
|
+
# them itself (i.e. it's not our job).
|
537
|
+
curr_line << extractor.extract(output + "\n" + run_info)
|
536
538
|
end
|
537
539
|
|
538
540
|
body << curr_line.join(conf[:separator])
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: batch_experiment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Henrique Becker
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-01-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: childprocess
|