batch_experiment 2.2.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/batch_experiment.rb +64 -62
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 193dd9c4b80ffc484d36387cf6f5a890fb94fc9f
4
- data.tar.gz: 964b09bb90d7561781aea92aedceea75d51fa39d
3
+ metadata.gz: 4c482d1a8d72df171607b55322421975768dd006
4
+ data.tar.gz: 0f55e0c4a9afe8bdbfbdd18408ecd2b9ec09d71b
5
5
  SHA512:
6
- metadata.gz: 7958d2968951167115da3b6a44ed3760b381a55e994a72420d6ea3039a9c8281644515a38436f766f3bc7dd20f8f467ee95a9ff906eede69842bdeb22c846c77
7
- data.tar.gz: bd359497c5eb6878ad90178b1909533917fc6c67dd0be709ded77baccb4705ebf4976873d17cc866c058d13d390bc6528233bceacf0054ba5517a92e4ee483dc
6
+ metadata.gz: 4663bc105b70b4ad249ab1d59e0aa49c151fb2c157907bf968576c934ecfc2adced1224aae1bd7a6bfe7a493c803a9bb931f963b5c368d24ee9fc4bed63017f6
7
+ data.tar.gz: 75b06a9b3a780b9c4db02d56531592463e3747809c38c09cfec623e9eab717832ac260a388b7850d69d17d0c73c071e89eda8e3c390ea392965f51417257c9b2
@@ -5,6 +5,16 @@ require 'socket'
5
5
  # The main module, the two main utility methods offered are ::batch and
6
6
  # ::experiment.
7
7
  module BatchExperiment
8
+ # Exception class raised when multiple extractor objects passed to
9
+ # ::experiment (by the comms_info parameter) disagree on the content of the
10
+ # columns. Ex.: If we call ::experiment with different extractor objects, all
11
+ # arrays returned by the #names method of those extractors should be equal or
12
+ # a prefix of the biggest array. Ex.: ['a', 'b'], ['a', 'b'], ['a'] and
13
+ # ['a', 'b', 'c'] works, but adding ['a', 'c'] will end the program with
14
+ # this exception. This is made to avoid making the mistake of generating a
15
+ # csv where the same column has a different meaning for each row.
16
+ class ColumnSpecError < ArgumentError; end
17
+
8
18
  # The default callable object used by Comm2FnameConverter to convert
9
19
  # a command into a filename. Comm2FnameConverter don't create a sanitized
10
20
  # filename from the command string (it uses its first argument to do this,
@@ -90,19 +100,23 @@ module BatchExperiment
90
100
  # terminated commands on comms_executed.
91
101
  def self.update_finished(free_cpus, comms_running, comms_executed) #:nodoc
92
102
  comms_running.delete_if do | job |
93
- # Don't call '#exited?' twice, store value at variable. If you call
103
+ # Don't call '#exited?' twice, store its value in a variable. If you call
94
104
  # it twice it's possible to remove it from the list of running commands
95
- # without freeing a cpu, what will end locking all cpus forever.
105
+ # without freeing a cpu, what will mark the cpu as busy forever.
96
106
  exited = job[:proc].exited?
97
107
  if exited
98
108
  free_cpus.push(job[:cpu])
99
- File.delete(job[:lockfname])
109
+ job[:out_file].close
110
+ job[:err_file].close
111
+ File.open(job[:run_fname], 'a') do | f |
112
+ f.write(
113
+ "command: #{job[:command]}\n" +
114
+ "date_before: #{job[:date_before].utc.to_s}\n" +
115
+ "date_after: #{Time.now.utc.to_s}\n" +
116
+ "hostname: #{Socket.gethostname}\n"
117
+ )
118
+ end
100
119
  comms_executed << job[:command]
101
- out = job[:out_file]
102
- out.write("\ncommand: " + job[:command])
103
- out.write("\ndate_before: " + job[:date_before].utc.to_s)
104
- out.write("\ndate_after: " + Time.now.utc.to_s)
105
- out.write("\nhostname: " + Socket.gethostname)
106
120
  end
107
121
  exited # bool returned to delete_if
108
122
  end
@@ -114,13 +128,15 @@ module BatchExperiment
114
128
  #
115
129
  # The output filenames are derived from the commands. The ones with '.out'
116
130
  # are the ones with the command standard output. The analogue is valid for
117
- # '.err' and standard error. Right before starting a command, a '.unfinished'
118
- # file is created. After the command ends its execution this file is
119
- # removed. If the command ends its execution by means of a timeout the file
120
- # is also removed. The file only remains if the batch procedure is
121
- # interrupted (script was killed, or system crashed). This '.unfinished' file
122
- # will contain the process pid, if the corresponding process started with
123
- # success.
131
+ # '.err' and standard error. The filenames ending in '.run' are created only
132
+ # after the process has ended (naturally or by timeout) and contain: the
133
+ # sh command, the date before starting the job (up to the second), the date
134
+ # after the process has ended (up to the second), and the hostname of the
135
+ # computer where the command was executed. The '.run' files have a second
136
+ # utility that is to mark which commands were already executed. If a power
137
+ # outage turns of the computer, or you decide to kill the script, the '.run'
138
+ # files will store which executions already happened, and if you execute the
139
+ # script again it will (by default) skip the already executed commands.
124
140
  #
125
141
  # @param commands [Array<String>] The shell commands.
126
142
  # @param conf [Hash] The configurations, as follows:
@@ -144,13 +160,13 @@ module BatchExperiment
144
160
  # and convert it (possibly losing information), to a valid filename. Used
145
161
  # over the commands to define the output files of commands. Default:
146
162
  # BatchExperiment::Comm2FnameConverter.new.
147
- # * skip_done_comms [FalseClass,TrueClass] Skip any command for what a
148
- # corresponding '.out' file exists, except if both a '.out' and a
149
- # '.unfinished' file exists, in the last case the command is always
150
- # be executed. If false, execute all commands and overwrite any previous
151
- # outputs. Default: true.
152
- # * unfinished_ext [String] Extension to be used in place of
153
- # '.unfinished'. Default: '.unfinished'.
163
+ # * skip_done_comms [FalseClass,TrueClass] If true then, for each command,
164
+ # verify if a corresponding '.run' file exists, if it exists, skip the
165
+ # command, if it does not exist then execute the command. If false then it
166
+ # removes the corresponding out/err/run files before executing each
167
+ # command. Default: true.
168
+ # * run_ext [String] Extension to be used in place of '.run'.
169
+ # Default: '.run'.
154
170
  # * out_ext [String] Extension to be used in place of '.out'.
155
171
  # Default: '.out'.
156
172
  # * err_ext [String] Extension to be used in place of '.err'.
@@ -193,9 +209,9 @@ module BatchExperiment
193
209
  # provided. Don't change the conf argument, only our version of conf.
194
210
  conf = conf.clone
195
211
  conf[:time_fmt] ||= 'ext_time: %e\\next_mem: %M\\n'
196
- conf[:unfinished_ext] ||= '.unfinished'
197
212
  conf[:out_ext] ||= '.out'
198
213
  conf[:err_ext] ||= '.err'
214
+ conf[:run_ext] ||= '.run'
199
215
  conf[:busy_loop_sleep] ||= 0.1
200
216
  conf[:post_timeout] ||= 5
201
217
  conf[:converter] ||= BatchExperiment::Comm2FnameConverter.new
@@ -213,20 +229,19 @@ module BatchExperiment
213
229
  commfname = conf[:converter].call(command)
214
230
  out_fname = conf[:output_dir] + commfname + conf[:out_ext]
215
231
  err_fname = conf[:output_dir] + commfname + conf[:err_ext]
216
- lockfname = conf[:output_dir] + commfname + conf[:unfinished_ext]
217
-
218
- if conf[:skip_done_comms] && File.exists?(out_fname)
219
- if File.exists?(lockfname)
220
- puts "Found file #{out_fname}, but a #{lockfname} also exists:"
221
- puts "Will execute command '#{command}' anyway."
222
- else
223
- puts "Found file #{commfname}, skipping command: #{command}"
224
- STDOUT.flush
225
- next
226
- end
232
+ run_fname = conf[:output_dir] + commfname + conf[:run_ext]
233
+
234
+ if conf[:skip_done_comms] && File.exists?(run_fname)
235
+ puts "Found file: #{commfname} -- skipping command: #{command}"
236
+ STDOUT.flush
237
+ next
238
+ else
239
+ if File.exists? out_fname then File.delete out_fname end
240
+ if File.exists? err_fname then File.delete err_fname end
241
+ if File.exists? run_fname then File.delete run_fname end
227
242
  end
228
243
 
229
- puts "Waiting to execute command: #{command}"
244
+ puts "Next command in the queue: #{command}"
230
245
  STDOUT.flush
231
246
 
232
247
  while free_cpus.empty? do
@@ -238,7 +253,7 @@ module BatchExperiment
238
253
 
239
254
  cproc = ChildProcess.build(
240
255
  'taskset', '-c', cpu.to_s,
241
- 'time', '-f', conf[:time_fmt], '--append', '-o', out_fname,
256
+ 'time', '-f', conf[:time_fmt], '--append', '-o', run_fname,
242
257
  'timeout', '--preserve-status', '-k', "#{conf[:post_timeout]}s",
243
258
  "#{conf[:timeout]}s",
244
259
  'sh', '-c', command
@@ -246,7 +261,6 @@ module BatchExperiment
246
261
 
247
262
  cproc.cwd = conf[:cwd]
248
263
 
249
- File.open(lockfname, 'w') {} # empty on purpose
250
264
  out = File.open(out_fname, 'w')
251
265
  err = File.open(err_fname, 'w')
252
266
  cproc.io.stdout = out
@@ -258,16 +272,14 @@ module BatchExperiment
258
272
  comms_running << {
259
273
  proc: cproc,
260
274
  cpu: cpu,
261
- lockfname: lockfname,
262
275
  command: command,
263
276
  date_before: date_before,
264
277
  out_file: out,
278
+ err_file: err,
279
+ run_fname: run_fname,
265
280
  }
266
281
 
267
- # The lock file now stores the process pid for debug reasons.
268
- File.open(lockfname, 'w') { | f | f.write cproc.pid }
269
-
270
- puts "command assigned to cpu#{cpu}"
282
+ puts "The command was assigned to cpu#{cpu}."
271
283
  STDOUT.flush
272
284
  end
273
285
 
@@ -323,16 +335,6 @@ module BatchExperiment
323
335
  ret
324
336
  end
325
337
 
326
- # Exception class raised when multiple extractor objects passed to
327
- # ::experiment (by the comms_info parameter) disagree on the content of the
328
- # columns. Ex.: If we call ::experiment with different extractor objects, all
329
- # arrays returned by the #names method of those extractors should be equal or
330
- # a prefix of the biggest array. Ex.: ['a', 'b'], ['a', 'b'], ['a'] and
331
- # ['a', 'b', 'c'] works, but adding ['a', 'c'] will end the program with
332
- # this exception. This is made to avoid making the mistake of generating a
333
- # csv where the same column has a different meaning for each row.
334
- class ColumnSpecError < ArgumentError; end
335
-
336
338
  # @!visibility private
337
339
  # Check if the headers can be combined, if they can return a shallow copy of
338
340
  # the biggest header, otherwise throw an exception.
@@ -438,8 +440,8 @@ module BatchExperiment
438
440
  #conf[:skip_commands] defaults to false/nil
439
441
 
440
442
  # Get some of the batch config that we use inside here too.
443
+ run_ext = batch_conf[:run_ext] || '.run'
441
444
  out_ext = batch_conf[:out_ext] || '.out'
442
- unfinished_ext = batch_conf[:unfinished_ext] || '.unfinished'
443
445
  output_dir = batch_conf[:output_dir] || './'
444
446
  converter = batch_conf[:converter].clone unless batch_conf[:converter].nil?
445
447
  converter ||= BatchExperiment::Comm2FnameConverter.new
@@ -521,18 +523,18 @@ module BatchExperiment
521
523
  curr_line = [algorithm, filename, run_number]
522
524
 
523
525
  partial_fname = converter.call(exp_comm)
526
+ run_fname = output_dir + partial_fname + run_ext
524
527
  out_fname = output_dir + partial_fname + out_ext
525
- lockfname = output_dir + partial_fname + unfinished_ext
526
528
  extractor = run_info[:comm_info][:extractor]
527
529
 
528
- if File.exists?(out_fname)
529
- if File.exists?(lockfname)
530
- puts "Ignored file '#{out_fname}' because there was a"
531
- + " '#{lockfname}' file too."
532
- else
533
- f_content = File.open(out_fname, 'r') { | f | f.read }
534
- curr_line << extractor.extract(f_content)
535
- end
530
+ if File.exists?(run_fname)
531
+ run_info = File.open(run_fname, 'r') { | f | f.read }
532
+ output = File.open(out_fname, 'r') { | f | f.read }
533
+ # TODO: in the future change the extractors to receive
534
+ # three inputs (out/err/run). If the runs create arbitrary files
535
+ # with relevant info, the extractor will need to find, and open
536
+ # them itself (i.e. it's not our job).
537
+ curr_line << extractor.extract(output + "\n" + run_info)
536
538
  end
537
539
 
538
540
  body << curr_line.join(conf[:separator])
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: batch_experiment
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Henrique Becker
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-22 00:00:00.000000000 Z
11
+ date: 2017-01-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: childprocess