batch_experiment 2.2.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/batch_experiment.rb +64 -62
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c482d1a8d72df171607b55322421975768dd006
|
4
|
+
data.tar.gz: 0f55e0c4a9afe8bdbfbdd18408ecd2b9ec09d71b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4663bc105b70b4ad249ab1d59e0aa49c151fb2c157907bf968576c934ecfc2adced1224aae1bd7a6bfe7a493c803a9bb931f963b5c368d24ee9fc4bed63017f6
|
7
|
+
data.tar.gz: 75b06a9b3a780b9c4db02d56531592463e3747809c38c09cfec623e9eab717832ac260a388b7850d69d17d0c73c071e89eda8e3c390ea392965f51417257c9b2
|
data/lib/batch_experiment.rb
CHANGED
@@ -5,6 +5,16 @@ require 'socket'
|
|
5
5
|
# The main module, the two main utility methods offered are ::batch and
|
6
6
|
# ::experiment.
|
7
7
|
module BatchExperiment
|
8
|
+
# Exception class raised when multiple extractor objects passed to
|
9
|
+
# ::experiment (by the comms_info parameter) disagree on the content of the
|
10
|
+
# columns. Ex.: If we call ::experiment with different extractor objects, all
|
11
|
+
# arrays returned by the #names method of those extractors should be equal or
|
12
|
+
# a prefix of the biggest array. Ex.: ['a', 'b'], ['a', 'b'], ['a'] and
|
13
|
+
# ['a', 'b', 'c'] works, but adding ['a', 'c'] will end the program with
|
14
|
+
# this exception. This is made to avoid making the mistake of generating a
|
15
|
+
# csv where the same column has a different meaning for each row.
|
16
|
+
class ColumnSpecError < ArgumentError; end
|
17
|
+
|
8
18
|
# The default callable object used by Comm2FnameConverter to convert
|
9
19
|
# a command into a filename. Comm2FnameConverter don't create a sanitized
|
10
20
|
# filename from the command string (it uses its first argument to do this,
|
@@ -90,19 +100,23 @@ module BatchExperiment
|
|
90
100
|
# terminated commands on comms_executed.
|
91
101
|
def self.update_finished(free_cpus, comms_running, comms_executed) #:nodoc
|
92
102
|
comms_running.delete_if do | job |
|
93
|
-
# Don't call '#exited?' twice, store value
|
103
|
+
# Don't call '#exited?' twice, store its value in a variable. If you call
|
94
104
|
# it twice it's possible to remove it from the list of running commands
|
95
|
-
# without freeing a cpu, what will
|
105
|
+
# without freeing a cpu, what will mark the cpu as busy forever.
|
96
106
|
exited = job[:proc].exited?
|
97
107
|
if exited
|
98
108
|
free_cpus.push(job[:cpu])
|
99
|
-
|
109
|
+
job[:out_file].close
|
110
|
+
job[:err_file].close
|
111
|
+
File.open(job[:run_fname], 'a') do | f |
|
112
|
+
f.write(
|
113
|
+
"command: #{job[:command]}\n" +
|
114
|
+
"date_before: #{job[:date_before].utc.to_s}\n" +
|
115
|
+
"date_after: #{Time.now.utc.to_s}\n" +
|
116
|
+
"hostname: #{Socket.gethostname}\n"
|
117
|
+
)
|
118
|
+
end
|
100
119
|
comms_executed << job[:command]
|
101
|
-
out = job[:out_file]
|
102
|
-
out.write("\ncommand: " + job[:command])
|
103
|
-
out.write("\ndate_before: " + job[:date_before].utc.to_s)
|
104
|
-
out.write("\ndate_after: " + Time.now.utc.to_s)
|
105
|
-
out.write("\nhostname: " + Socket.gethostname)
|
106
120
|
end
|
107
121
|
exited # bool returned to delete_if
|
108
122
|
end
|
@@ -114,13 +128,15 @@ module BatchExperiment
|
|
114
128
|
#
|
115
129
|
# The output filenames are derived from the commands. The ones with '.out'
|
116
130
|
# are the ones with the command standard output. The analogue is valid for
|
117
|
-
# '.err' and standard error.
|
118
|
-
#
|
119
|
-
#
|
120
|
-
#
|
121
|
-
#
|
122
|
-
#
|
123
|
-
#
|
131
|
+
# '.err' and standard error. The filenames ending in '.run' are created only
|
132
|
+
# after the process has ended (naturally or by timeout) and contain: the
|
133
|
+
# sh command, the date before starting the job (up to the second), the date
|
134
|
+
# after the process has ended (up to the second), and the hostname of the
|
135
|
+
# computer where the command was executed. The '.run' files have a second
|
136
|
+
# utility that is to mark which commands were already executed. If a power
|
137
|
+
# outage turns of the computer, or you decide to kill the script, the '.run'
|
138
|
+
# files will store which executions already happened, and if you execute the
|
139
|
+
# script again it will (by default) skip the already executed commands.
|
124
140
|
#
|
125
141
|
# @param commands [Array<String>] The shell commands.
|
126
142
|
# @param conf [Hash] The configurations, as follows:
|
@@ -144,13 +160,13 @@ module BatchExperiment
|
|
144
160
|
# and convert it (possibly losing information), to a valid filename. Used
|
145
161
|
# over the commands to define the output files of commands. Default:
|
146
162
|
# BatchExperiment::Comm2FnameConverter.new.
|
147
|
-
# * skip_done_comms [FalseClass,TrueClass]
|
148
|
-
# corresponding '.
|
149
|
-
#
|
150
|
-
#
|
151
|
-
#
|
152
|
-
# *
|
153
|
-
#
|
163
|
+
# * skip_done_comms [FalseClass,TrueClass] If true then, for each command,
|
164
|
+
# verify if a corresponding '.run' file exists, if it exists, skip the
|
165
|
+
# command, if it does not exist then execute the command. If false then it
|
166
|
+
# removes the corresponding out/err/run files before executing each
|
167
|
+
# command. Default: true.
|
168
|
+
# * run_ext [String] Extension to be used in place of '.run'.
|
169
|
+
# Default: '.run'.
|
154
170
|
# * out_ext [String] Extension to be used in place of '.out'.
|
155
171
|
# Default: '.out'.
|
156
172
|
# * err_ext [String] Extension to be used in place of '.err'.
|
@@ -193,9 +209,9 @@ module BatchExperiment
|
|
193
209
|
# provided. Don't change the conf argument, only our version of conf.
|
194
210
|
conf = conf.clone
|
195
211
|
conf[:time_fmt] ||= 'ext_time: %e\\next_mem: %M\\n'
|
196
|
-
conf[:unfinished_ext] ||= '.unfinished'
|
197
212
|
conf[:out_ext] ||= '.out'
|
198
213
|
conf[:err_ext] ||= '.err'
|
214
|
+
conf[:run_ext] ||= '.run'
|
199
215
|
conf[:busy_loop_sleep] ||= 0.1
|
200
216
|
conf[:post_timeout] ||= 5
|
201
217
|
conf[:converter] ||= BatchExperiment::Comm2FnameConverter.new
|
@@ -213,20 +229,19 @@ module BatchExperiment
|
|
213
229
|
commfname = conf[:converter].call(command)
|
214
230
|
out_fname = conf[:output_dir] + commfname + conf[:out_ext]
|
215
231
|
err_fname = conf[:output_dir] + commfname + conf[:err_ext]
|
216
|
-
|
217
|
-
|
218
|
-
if conf[:skip_done_comms] && File.exists?(
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
end
|
232
|
+
run_fname = conf[:output_dir] + commfname + conf[:run_ext]
|
233
|
+
|
234
|
+
if conf[:skip_done_comms] && File.exists?(run_fname)
|
235
|
+
puts "Found file: #{commfname} -- skipping command: #{command}"
|
236
|
+
STDOUT.flush
|
237
|
+
next
|
238
|
+
else
|
239
|
+
if File.exists? out_fname then File.delete out_fname end
|
240
|
+
if File.exists? err_fname then File.delete err_fname end
|
241
|
+
if File.exists? run_fname then File.delete run_fname end
|
227
242
|
end
|
228
243
|
|
229
|
-
puts "
|
244
|
+
puts "Next command in the queue: #{command}"
|
230
245
|
STDOUT.flush
|
231
246
|
|
232
247
|
while free_cpus.empty? do
|
@@ -238,7 +253,7 @@ module BatchExperiment
|
|
238
253
|
|
239
254
|
cproc = ChildProcess.build(
|
240
255
|
'taskset', '-c', cpu.to_s,
|
241
|
-
'time', '-f', conf[:time_fmt], '--append', '-o',
|
256
|
+
'time', '-f', conf[:time_fmt], '--append', '-o', run_fname,
|
242
257
|
'timeout', '--preserve-status', '-k', "#{conf[:post_timeout]}s",
|
243
258
|
"#{conf[:timeout]}s",
|
244
259
|
'sh', '-c', command
|
@@ -246,7 +261,6 @@ module BatchExperiment
|
|
246
261
|
|
247
262
|
cproc.cwd = conf[:cwd]
|
248
263
|
|
249
|
-
File.open(lockfname, 'w') {} # empty on purpose
|
250
264
|
out = File.open(out_fname, 'w')
|
251
265
|
err = File.open(err_fname, 'w')
|
252
266
|
cproc.io.stdout = out
|
@@ -258,16 +272,14 @@ module BatchExperiment
|
|
258
272
|
comms_running << {
|
259
273
|
proc: cproc,
|
260
274
|
cpu: cpu,
|
261
|
-
lockfname: lockfname,
|
262
275
|
command: command,
|
263
276
|
date_before: date_before,
|
264
277
|
out_file: out,
|
278
|
+
err_file: err,
|
279
|
+
run_fname: run_fname,
|
265
280
|
}
|
266
281
|
|
267
|
-
|
268
|
-
File.open(lockfname, 'w') { | f | f.write cproc.pid }
|
269
|
-
|
270
|
-
puts "command assigned to cpu#{cpu}"
|
282
|
+
puts "The command was assigned to cpu#{cpu}."
|
271
283
|
STDOUT.flush
|
272
284
|
end
|
273
285
|
|
@@ -323,16 +335,6 @@ module BatchExperiment
|
|
323
335
|
ret
|
324
336
|
end
|
325
337
|
|
326
|
-
# Exception class raised when multiple extractor objects passed to
|
327
|
-
# ::experiment (by the comms_info parameter) disagree on the content of the
|
328
|
-
# columns. Ex.: If we call ::experiment with different extractor objects, all
|
329
|
-
# arrays returned by the #names method of those extractors should be equal or
|
330
|
-
# a prefix of the biggest array. Ex.: ['a', 'b'], ['a', 'b'], ['a'] and
|
331
|
-
# ['a', 'b', 'c'] works, but adding ['a', 'c'] will end the program with
|
332
|
-
# this exception. This is made to avoid making the mistake of generating a
|
333
|
-
# csv where the same column has a different meaning for each row.
|
334
|
-
class ColumnSpecError < ArgumentError; end
|
335
|
-
|
336
338
|
# @!visibility private
|
337
339
|
# Check if the headers can be combined, if they can return a shallow copy of
|
338
340
|
# the biggest header, otherwise throw an exception.
|
@@ -438,8 +440,8 @@ module BatchExperiment
|
|
438
440
|
#conf[:skip_commands] defaults to false/nil
|
439
441
|
|
440
442
|
# Get some of the batch config that we use inside here too.
|
443
|
+
run_ext = batch_conf[:run_ext] || '.run'
|
441
444
|
out_ext = batch_conf[:out_ext] || '.out'
|
442
|
-
unfinished_ext = batch_conf[:unfinished_ext] || '.unfinished'
|
443
445
|
output_dir = batch_conf[:output_dir] || './'
|
444
446
|
converter = batch_conf[:converter].clone unless batch_conf[:converter].nil?
|
445
447
|
converter ||= BatchExperiment::Comm2FnameConverter.new
|
@@ -521,18 +523,18 @@ module BatchExperiment
|
|
521
523
|
curr_line = [algorithm, filename, run_number]
|
522
524
|
|
523
525
|
partial_fname = converter.call(exp_comm)
|
526
|
+
run_fname = output_dir + partial_fname + run_ext
|
524
527
|
out_fname = output_dir + partial_fname + out_ext
|
525
|
-
lockfname = output_dir + partial_fname + unfinished_ext
|
526
528
|
extractor = run_info[:comm_info][:extractor]
|
527
529
|
|
528
|
-
if File.exists?(
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
530
|
+
if File.exists?(run_fname)
|
531
|
+
run_info = File.open(run_fname, 'r') { | f | f.read }
|
532
|
+
output = File.open(out_fname, 'r') { | f | f.read }
|
533
|
+
# TODO: in the future change the extractors to receive
|
534
|
+
# three inputs (out/err/run). If the runs create arbitrary files
|
535
|
+
# with relevant info, the extractor will need to find, and open
|
536
|
+
# them itself (i.e. it's not our job).
|
537
|
+
curr_line << extractor.extract(output + "\n" + run_info)
|
536
538
|
end
|
537
539
|
|
538
540
|
body << curr_line.join(conf[:separator])
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: batch_experiment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Henrique Becker
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-01-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: childprocess
|