batch_experiment 2.2.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/batch_experiment.rb +64 -62
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 193dd9c4b80ffc484d36387cf6f5a890fb94fc9f
4
- data.tar.gz: 964b09bb90d7561781aea92aedceea75d51fa39d
3
+ metadata.gz: 4c482d1a8d72df171607b55322421975768dd006
4
+ data.tar.gz: 0f55e0c4a9afe8bdbfbdd18408ecd2b9ec09d71b
5
5
  SHA512:
6
- metadata.gz: 7958d2968951167115da3b6a44ed3760b381a55e994a72420d6ea3039a9c8281644515a38436f766f3bc7dd20f8f467ee95a9ff906eede69842bdeb22c846c77
7
- data.tar.gz: bd359497c5eb6878ad90178b1909533917fc6c67dd0be709ded77baccb4705ebf4976873d17cc866c058d13d390bc6528233bceacf0054ba5517a92e4ee483dc
6
+ metadata.gz: 4663bc105b70b4ad249ab1d59e0aa49c151fb2c157907bf968576c934ecfc2adced1224aae1bd7a6bfe7a493c803a9bb931f963b5c368d24ee9fc4bed63017f6
7
+ data.tar.gz: 75b06a9b3a780b9c4db02d56531592463e3747809c38c09cfec623e9eab717832ac260a388b7850d69d17d0c73c071e89eda8e3c390ea392965f51417257c9b2
@@ -5,6 +5,16 @@ require 'socket'
5
5
  # The main module, the two main utility methods offered are ::batch and
6
6
  # ::experiment.
7
7
  module BatchExperiment
8
+ # Exception class raised when multiple extractor objects passed to
9
+ # ::experiment (by the comms_info parameter) disagree on the content of the
10
+ # columns. Ex.: If we call ::experiment with different extractor objects, all
11
+ # arrays returned by the #names method of those extractors should be equal or
12
+ # a prefix of the biggest array. Ex.: ['a', 'b'], ['a', 'b'], ['a'] and
13
+ # ['a', 'b', 'c'] works, but adding ['a', 'c'] will end the program with
14
+ # this exception. This is made to avoid making the mistake of generating a
15
+ # csv where the same column has a different meaning for each row.
16
+ class ColumnSpecError < ArgumentError; end
17
+
8
18
  # The default callable object used by Comm2FnameConverter to convert
9
19
  # a command into a filename. Comm2FnameConverter don't create a sanitized
10
20
  # filename from the command string (it uses its first argument to do this,
@@ -90,19 +100,23 @@ module BatchExperiment
90
100
  # terminated commands on comms_executed.
91
101
  def self.update_finished(free_cpus, comms_running, comms_executed) #:nodoc
92
102
  comms_running.delete_if do | job |
93
- # Don't call '#exited?' twice, store value at variable. If you call
103
+ # Don't call '#exited?' twice, store its value in a variable. If you call
94
104
  # it twice it's possible to remove it from the list of running commands
95
- # without freeing a cpu, what will end locking all cpus forever.
105
+ # without freeing a cpu, what will mark the cpu as busy forever.
96
106
  exited = job[:proc].exited?
97
107
  if exited
98
108
  free_cpus.push(job[:cpu])
99
- File.delete(job[:lockfname])
109
+ job[:out_file].close
110
+ job[:err_file].close
111
+ File.open(job[:run_fname], 'a') do | f |
112
+ f.write(
113
+ "command: #{job[:command]}\n" +
114
+ "date_before: #{job[:date_before].utc.to_s}\n" +
115
+ "date_after: #{Time.now.utc.to_s}\n" +
116
+ "hostname: #{Socket.gethostname}\n"
117
+ )
118
+ end
100
119
  comms_executed << job[:command]
101
- out = job[:out_file]
102
- out.write("\ncommand: " + job[:command])
103
- out.write("\ndate_before: " + job[:date_before].utc.to_s)
104
- out.write("\ndate_after: " + Time.now.utc.to_s)
105
- out.write("\nhostname: " + Socket.gethostname)
106
120
  end
107
121
  exited # bool returned to delete_if
108
122
  end
@@ -114,13 +128,15 @@ module BatchExperiment
114
128
  #
115
129
  # The output filenames are derived from the commands. The ones with '.out'
116
130
  # are the ones with the command standard output. The analogue is valid for
117
- # '.err' and standard error. Right before starting a command, a '.unfinished'
118
- # file is created. After the command ends its execution this file is
119
- # removed. If the command ends its execution by means of a timeout the file
120
- # is also removed. The file only remains if the batch procedure is
121
- # interrupted (script was killed, or system crashed). This '.unfinished' file
122
- # will contain the process pid, if the corresponding process started with
123
- # success.
131
+ # '.err' and standard error. The filenames ending in '.run' are created only
132
+ # after the process has ended (naturally or by timeout) and contain: the
133
+ # sh command, the date before starting the job (up to the second), the date
134
+ # after the process has ended (up to the second), and the hostname of the
135
+ # computer where the command was executed. The '.run' files have a second
136
+ # utility that is to mark which commands were already executed. If a power
137
+ # outage turns of the computer, or you decide to kill the script, the '.run'
138
+ # files will store which executions already happened, and if you execute the
139
+ # script again it will (by default) skip the already executed commands.
124
140
  #
125
141
  # @param commands [Array<String>] The shell commands.
126
142
  # @param conf [Hash] The configurations, as follows:
@@ -144,13 +160,13 @@ module BatchExperiment
144
160
  # and convert it (possibly losing information), to a valid filename. Used
145
161
  # over the commands to define the output files of commands. Default:
146
162
  # BatchExperiment::Comm2FnameConverter.new.
147
- # * skip_done_comms [FalseClass,TrueClass] Skip any command for what a
148
- # corresponding '.out' file exists, except if both a '.out' and a
149
- # '.unfinished' file exists, in the last case the command is always
150
- # be executed. If false, execute all commands and overwrite any previous
151
- # outputs. Default: true.
152
- # * unfinished_ext [String] Extension to be used in place of
153
- # '.unfinished'. Default: '.unfinished'.
163
+ # * skip_done_comms [FalseClass,TrueClass] If true then, for each command,
164
+ # verify if a corresponding '.run' file exists, if it exists, skip the
165
+ # command, if it does not exist then execute the command. If false then it
166
+ # removes the corresponding out/err/run files before executing each
167
+ # command. Default: true.
168
+ # * run_ext [String] Extension to be used in place of '.run'.
169
+ # Default: '.run'.
154
170
  # * out_ext [String] Extension to be used in place of '.out'.
155
171
  # Default: '.out'.
156
172
  # * err_ext [String] Extension to be used in place of '.err'.
@@ -193,9 +209,9 @@ module BatchExperiment
193
209
  # provided. Don't change the conf argument, only our version of conf.
194
210
  conf = conf.clone
195
211
  conf[:time_fmt] ||= 'ext_time: %e\\next_mem: %M\\n'
196
- conf[:unfinished_ext] ||= '.unfinished'
197
212
  conf[:out_ext] ||= '.out'
198
213
  conf[:err_ext] ||= '.err'
214
+ conf[:run_ext] ||= '.run'
199
215
  conf[:busy_loop_sleep] ||= 0.1
200
216
  conf[:post_timeout] ||= 5
201
217
  conf[:converter] ||= BatchExperiment::Comm2FnameConverter.new
@@ -213,20 +229,19 @@ module BatchExperiment
213
229
  commfname = conf[:converter].call(command)
214
230
  out_fname = conf[:output_dir] + commfname + conf[:out_ext]
215
231
  err_fname = conf[:output_dir] + commfname + conf[:err_ext]
216
- lockfname = conf[:output_dir] + commfname + conf[:unfinished_ext]
217
-
218
- if conf[:skip_done_comms] && File.exists?(out_fname)
219
- if File.exists?(lockfname)
220
- puts "Found file #{out_fname}, but a #{lockfname} also exists:"
221
- puts "Will execute command '#{command}' anyway."
222
- else
223
- puts "Found file #{commfname}, skipping command: #{command}"
224
- STDOUT.flush
225
- next
226
- end
232
+ run_fname = conf[:output_dir] + commfname + conf[:run_ext]
233
+
234
+ if conf[:skip_done_comms] && File.exists?(run_fname)
235
+ puts "Found file: #{commfname} -- skipping command: #{command}"
236
+ STDOUT.flush
237
+ next
238
+ else
239
+ if File.exists? out_fname then File.delete out_fname end
240
+ if File.exists? err_fname then File.delete err_fname end
241
+ if File.exists? run_fname then File.delete run_fname end
227
242
  end
228
243
 
229
- puts "Waiting to execute command: #{command}"
244
+ puts "Next command in the queue: #{command}"
230
245
  STDOUT.flush
231
246
 
232
247
  while free_cpus.empty? do
@@ -238,7 +253,7 @@ module BatchExperiment
238
253
 
239
254
  cproc = ChildProcess.build(
240
255
  'taskset', '-c', cpu.to_s,
241
- 'time', '-f', conf[:time_fmt], '--append', '-o', out_fname,
256
+ 'time', '-f', conf[:time_fmt], '--append', '-o', run_fname,
242
257
  'timeout', '--preserve-status', '-k', "#{conf[:post_timeout]}s",
243
258
  "#{conf[:timeout]}s",
244
259
  'sh', '-c', command
@@ -246,7 +261,6 @@ module BatchExperiment
246
261
 
247
262
  cproc.cwd = conf[:cwd]
248
263
 
249
- File.open(lockfname, 'w') {} # empty on purpose
250
264
  out = File.open(out_fname, 'w')
251
265
  err = File.open(err_fname, 'w')
252
266
  cproc.io.stdout = out
@@ -258,16 +272,14 @@ module BatchExperiment
258
272
  comms_running << {
259
273
  proc: cproc,
260
274
  cpu: cpu,
261
- lockfname: lockfname,
262
275
  command: command,
263
276
  date_before: date_before,
264
277
  out_file: out,
278
+ err_file: err,
279
+ run_fname: run_fname,
265
280
  }
266
281
 
267
- # The lock file now stores the process pid for debug reasons.
268
- File.open(lockfname, 'w') { | f | f.write cproc.pid }
269
-
270
- puts "command assigned to cpu#{cpu}"
282
+ puts "The command was assigned to cpu#{cpu}."
271
283
  STDOUT.flush
272
284
  end
273
285
 
@@ -323,16 +335,6 @@ module BatchExperiment
323
335
  ret
324
336
  end
325
337
 
326
- # Exception class raised when multiple extractor objects passed to
327
- # ::experiment (by the comms_info parameter) disagree on the content of the
328
- # columns. Ex.: If we call ::experiment with different extractor objects, all
329
- # arrays returned by the #names method of those extractors should be equal or
330
- # a prefix of the biggest array. Ex.: ['a', 'b'], ['a', 'b'], ['a'] and
331
- # ['a', 'b', 'c'] works, but adding ['a', 'c'] will end the program with
332
- # this exception. This is made to avoid making the mistake of generating a
333
- # csv where the same column has a different meaning for each row.
334
- class ColumnSpecError < ArgumentError; end
335
-
336
338
  # @!visibility private
337
339
  # Check if the headers can be combined, if they can return a shallow copy of
338
340
  # the biggest header, otherwise throw an exception.
@@ -438,8 +440,8 @@ module BatchExperiment
438
440
  #conf[:skip_commands] defaults to false/nil
439
441
 
440
442
  # Get some of the batch config that we use inside here too.
443
+ run_ext = batch_conf[:run_ext] || '.run'
441
444
  out_ext = batch_conf[:out_ext] || '.out'
442
- unfinished_ext = batch_conf[:unfinished_ext] || '.unfinished'
443
445
  output_dir = batch_conf[:output_dir] || './'
444
446
  converter = batch_conf[:converter].clone unless batch_conf[:converter].nil?
445
447
  converter ||= BatchExperiment::Comm2FnameConverter.new
@@ -521,18 +523,18 @@ module BatchExperiment
521
523
  curr_line = [algorithm, filename, run_number]
522
524
 
523
525
  partial_fname = converter.call(exp_comm)
526
+ run_fname = output_dir + partial_fname + run_ext
524
527
  out_fname = output_dir + partial_fname + out_ext
525
- lockfname = output_dir + partial_fname + unfinished_ext
526
528
  extractor = run_info[:comm_info][:extractor]
527
529
 
528
- if File.exists?(out_fname)
529
- if File.exists?(lockfname)
530
- puts "Ignored file '#{out_fname}' because there was a"
531
- + " '#{lockfname}' file too."
532
- else
533
- f_content = File.open(out_fname, 'r') { | f | f.read }
534
- curr_line << extractor.extract(f_content)
535
- end
530
+ if File.exists?(run_fname)
531
+ run_info = File.open(run_fname, 'r') { | f | f.read }
532
+ output = File.open(out_fname, 'r') { | f | f.read }
533
+ # TODO: in the future change the extractors to receive
534
+ # three inputs (out/err/run). If the runs create arbitrary files
535
+ # with relevant info, the extractor will need to find, and open
536
+ # them itself (i.e. it's not our job).
537
+ curr_line << extractor.extract(output + "\n" + run_info)
536
538
  end
537
539
 
538
540
  body << curr_line.join(conf[:separator])
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: batch_experiment
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Henrique Becker
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-22 00:00:00.000000000 Z
11
+ date: 2017-01-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: childprocess