batch_experiment 2.1.0 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/batch_experiment/extractor.rb +1 -1
- data/lib/batch_experiment/sample_extractors.rb +11 -0
- data/lib/batch_experiment.rb +127 -105
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1ffe7ede1b2dec3d9e0a7b6bdff96bf19a93e53
|
4
|
+
data.tar.gz: eb3c74ba400a0e67b71130853353465054877d3e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 21a826a0f697e447331c5d0f806d8688411f1bc43967d85f42ba60de3b36123e85f2f9191850bf6539b58dfeb7aa6004deb6383ba26d52f0ca7a1b6b8390fbef
|
7
|
+
data.tar.gz: a0ec49f73d2601aaefa16fbc580d723e46de2bbf500ecea22fad76a6b8691cc1ee9ea017ae4de7946ca5cd6c17d50fa585b836784cf1e58b2f6bbce7db6cab88
|
@@ -8,7 +8,7 @@ module BatchExperiment
|
|
8
8
|
#
|
9
9
|
# @param lines [Array<String>] Program output, broken in lines.
|
10
10
|
# @param field [String] String to be found at the lines in the following
|
11
|
-
#
|
11
|
+
# pattern: 'field: value'.
|
12
12
|
#
|
13
13
|
# @return [String] The 'value' as a string or, if 'field' isn't found, an
|
14
14
|
# empty string.
|
@@ -1,12 +1,17 @@
|
|
1
1
|
require 'batch_experiment/extractor'
|
2
2
|
|
3
3
|
module BatchExperiment
|
4
|
+
# A very simple sample extractor that extracts the first line of the
|
5
|
+
# command output, the run time and the memory usage (the last two are
|
6
|
+
# automatically added to any command output by ::batch and ::experiment).
|
4
7
|
module FirstLineExtractor
|
5
8
|
extend Extractor
|
9
|
+
# (see BatchExperiment::Extractor#names)
|
6
10
|
def self.names
|
7
11
|
['first line', 'ext_time', 'ext_mem']
|
8
12
|
end
|
9
13
|
|
14
|
+
# (see BatchExperiment::Extractor#extract_from_lines)
|
10
15
|
def self.extract_from_lines(lines)
|
11
16
|
[ (lines[0] or ''),
|
12
17
|
Extractor.get_field(lines, 'ext_time'),
|
@@ -19,16 +24,19 @@ module BatchExperiment
|
|
19
24
|
# where this code had its beggining. This file contains the code used to
|
20
25
|
# extract info from the different outputs generated by UKP solving programs.
|
21
26
|
|
27
|
+
# @private
|
22
28
|
# Extractor for the output of the run_ukp5.out program available at
|
23
29
|
# https://github.com/henriquebecker91/masters. Not of interest for the
|
24
30
|
# majority of the users of this gem. Kept as example, and for this gem author
|
25
31
|
# personal use.
|
26
32
|
module UKP5Extractor
|
27
33
|
extend Extractor
|
34
|
+
# (see BatchExperiment::Extractor#names)
|
28
35
|
def self.names
|
29
36
|
['internal time', 'external time', 'external memory', 'opt']
|
30
37
|
end
|
31
38
|
|
39
|
+
# (see BatchExperiment::Extractor#extract_from_lines)
|
32
40
|
def self.extract_from_lines(lines)
|
33
41
|
['Seconds', 'ext_time', 'ext_mem', 'opt'].map do | label |
|
34
42
|
Extractor.get_field(lines, label)
|
@@ -36,16 +44,19 @@ module BatchExperiment
|
|
36
44
|
end
|
37
45
|
end
|
38
46
|
|
47
|
+
# @private
|
39
48
|
# Extractor for the output of the pyasukp program available at
|
40
49
|
# https://github.com/henriquebecker91/masters. Not of interest for the
|
41
50
|
# majority of the users of this gem. Kept as example, and for this gem author
|
42
51
|
# personal use.
|
43
52
|
class PyaExtractor
|
44
53
|
extend Extractor
|
54
|
+
# (see BatchExperiment::Extractor#names)
|
45
55
|
def self.names
|
46
56
|
['internal time', 'external time', 'external memory', 'opt']
|
47
57
|
end
|
48
58
|
|
59
|
+
# (see BatchExperiment::Extractor#extract_from_lines)
|
49
60
|
def self.extract_from_lines(lines)
|
50
61
|
values = ['Total Time ', 'ext_time', 'ext_mem'].map do | label |
|
51
62
|
Extractor.get_field(lines, label)
|
data/lib/batch_experiment.rb
CHANGED
@@ -13,6 +13,12 @@ module BatchExperiment
|
|
13
13
|
# multiple instances of the same command different names (by suffixing with
|
14
14
|
# numbers).
|
15
15
|
module FnameSanitizer
|
16
|
+
# Returns a copy of the argument where each sequence of non-alphanumeric
|
17
|
+
# characters were changed to one single underscore ('_'), remove
|
18
|
+
# trailing underscores at the beggining and the end of the string.
|
19
|
+
#
|
20
|
+
# @param command [String] A command to be sanitized.
|
21
|
+
# @return [String] The sanitized command.
|
16
22
|
def self.call(command)
|
17
23
|
fname = command.strip
|
18
24
|
fname.gsub!(/[^[:alnum:]]/, '_')
|
@@ -64,20 +70,24 @@ module BatchExperiment
|
|
64
70
|
fname.clone
|
65
71
|
end
|
66
72
|
|
73
|
+
# Used to guarantee that a clone of Comm2FnameConverter will not
|
74
|
+
# share relevant state with the original. So calls to #call
|
75
|
+
# on a clone don't affect the state of original (and vice versa).
|
67
76
|
def initialize_clone(old)
|
68
77
|
@num_times_seen = old.num_times_seen.clone
|
69
78
|
end
|
70
79
|
|
71
|
-
#
|
80
|
+
# Needed by the initialize_clone implementation.
|
72
81
|
protected
|
73
82
|
attr_reader :num_times_seen
|
74
83
|
end
|
75
84
|
|
85
|
+
# @!visibility private
|
76
86
|
# INTERNAL USE ONLY.
|
77
87
|
# Remove any finished commands from comms_running, insert the cpus
|
78
88
|
# freed by the commands termination to the free_cpus, insert the
|
79
89
|
# terminated commands on comms_executed.
|
80
|
-
def self.update_finished(free_cpus, comms_running, comms_executed)
|
90
|
+
def self.update_finished(free_cpus, comms_running, comms_executed) #:nodoc
|
81
91
|
comms_running.delete_if do | job |
|
82
92
|
# Don't call '#exited?' twice, store value at variable. If you call
|
83
93
|
# it twice it's possible to remove it from the list of running commands
|
@@ -92,64 +102,68 @@ module BatchExperiment
|
|
92
102
|
end
|
93
103
|
end
|
94
104
|
|
95
|
-
#
|
96
|
-
#
|
97
|
-
#
|
98
|
-
#
|
99
|
-
#
|
100
|
-
#
|
101
|
-
#
|
102
|
-
# After the command ends its execution this file is
|
103
|
-
# ends its execution by means of a timeout the file
|
104
|
-
# only remains if the batch procedure is
|
105
|
-
# or system crashed). This '.unfinished' file
|
106
|
-
# if the corresponding process started with
|
105
|
+
# Execute a list of sh commands, one per specified core, kill them if the
|
106
|
+
# timeout expires, when a command ends (naturally or by timeout) put the
|
107
|
+
# next on the freed core, save all commands output to files.
|
108
|
+
#
|
109
|
+
# The output filenames are derived from the commands. The ones with '.out'
|
110
|
+
# are the ones with the command standard output. The analogue is valid for
|
111
|
+
# '.err' and standard error. Right before starting a command, a '.unfinished'
|
112
|
+
# file is created. After the command ends its execution this file is
|
113
|
+
# removed. If the command ends its execution by means of a timeout the file
|
114
|
+
# is also removed. The file only remains if the batch procedure is
|
115
|
+
# interrupted (script was killed, or system crashed). This '.unfinished' file
|
116
|
+
# will contain the process pid, if the corresponding process started with
|
117
|
+
# success.
|
107
118
|
#
|
108
119
|
# @param commands [Array<String>] The shell commands.
|
109
120
|
# @param conf [Hash] The configurations, as follows:
|
110
|
-
#
|
111
|
-
#
|
112
|
-
#
|
113
|
-
#
|
114
|
-
#
|
115
|
-
#
|
116
|
-
#
|
117
|
-
#
|
118
|
-
#
|
119
|
-
#
|
120
|
-
#
|
121
|
-
#
|
122
|
-
# post_timeout
|
123
|
-
#
|
124
|
-
#
|
125
|
-
#
|
126
|
-
#
|
127
|
-
#
|
128
|
-
#
|
129
|
-
#
|
130
|
-
#
|
131
|
-
#
|
132
|
-
#
|
133
|
-
#
|
134
|
-
#
|
135
|
-
#
|
136
|
-
#
|
137
|
-
#
|
138
|
-
#
|
139
|
-
#
|
140
|
-
#
|
141
|
-
#
|
142
|
-
#
|
143
|
-
#
|
144
|
-
#
|
121
|
+
# * cpus_available [Array<Fixnum>] CPU cores that can be used to run the
|
122
|
+
# commands. Required parameter. The cpu numbers begin at 0, despite what
|
123
|
+
# htop tells you. Maybe you will want to disable hyperthreading.
|
124
|
+
# * timeout [Number] Number of seconds before killing a command. Required
|
125
|
+
# parameter. Is the same for all the commands.
|
126
|
+
# * time_fmt [String] A string in the time (external command) format. See
|
127
|
+
# http://linux.die.net/man/1/time. Default: 'ext_time: %e\\next_mem:
|
128
|
+
# %M\\n'.
|
129
|
+
# * busy_loop_sleep [Number] How many seconds to wait before checking if
|
130
|
+
# a command ended execution. This time will be very close to the max time
|
131
|
+
# a cpu will remain vacant between two commands. Default: 0.1 (1/10
|
132
|
+
# second).
|
133
|
+
# * post_timeout [Number] A command isn't guaranteed to end after
|
134
|
+
# receiving a TERM signal. If the command hasn't stopped, waits
|
135
|
+
# post_timeout seconds before sending a KILL signal (give it a chance to
|
136
|
+
# end gracefully). Default: 5.
|
137
|
+
# * converter [#call] The call method of this object should take a String
|
138
|
+
# and convert it (possibly losing information), to a valid filename. Used
|
139
|
+
# over the commands to define the output files of commands. Default:
|
140
|
+
# BatchExperiment::Comm2FnameConverter.new.
|
141
|
+
# * skip_done_comms [FalseClass,TrueClass] Skip any command for what a
|
142
|
+
# corresponding '.out' file exists, except if both a '.out' and a
|
143
|
+
# '.unfinished' file exists, in the last case the command is always
|
144
|
+
# be executed. If false, execute all commands and overwrite any previous
|
145
|
+
# outputs. Default: true.
|
146
|
+
# * unfinished_ext [String] Extension to be used in place of
|
147
|
+
# '.unfinished'. Default: '.unfinished'.
|
148
|
+
# * out_ext [String] Extension to be used in place of '.out'.
|
149
|
+
# Default: '.out'.
|
150
|
+
# * err_ext [String] Extension to be used in place of '.err'.
|
151
|
+
# Default: '.err'.
|
152
|
+
# * cwd [String] Command Working Directory. The path from where the
|
153
|
+
# commands will be executed. Default: './' (i.e. the same directory from
|
154
|
+
# where the ruby script was run).
|
155
|
+
# * output_dir [String] The folder used to save the output files.
|
156
|
+
# Default: './' (i.e. the same directory from where the ruby script
|
157
|
+
# was run).
|
145
158
|
#
|
146
|
-
# @return [String] Which commands were executed. Can be different from
|
159
|
+
# @return [Array<String>] Which commands were executed. Can be different from
|
147
160
|
# the 'commands' argument if commands are skipped (see :skip_done_comms).
|
161
|
+
# The order of this array will match the order of the argument one.
|
148
162
|
#
|
149
163
|
# @note If the same command is executed over the same file more than one
|
150
164
|
# time, then any run besides the first will have a numeric suffix.
|
151
|
-
# Example: "sleep 1" -> "sleep_1", "sleep 1" -> "sleep_1.2".
|
152
|
-
# For more info see the parameter conf
|
165
|
+
# Example: "sleep 1" -> "sleep_1.out", "sleep 1" -> "sleep_1.2.out".
|
166
|
+
# For more info see the parameter conf's :fname_sanitizer, and its
|
153
167
|
# default value BatchExperiment::Comm2FnameConverter.new.
|
154
168
|
# @note This procedure makes use of the following linux commands: time (not
|
155
169
|
# the bash internal one, but the package one, i.e.
|
@@ -159,9 +173,9 @@ module BatchExperiment
|
|
159
173
|
# shell).
|
160
174
|
# @note The command is executed inside a call to "sh -c command", so it has
|
161
175
|
# to be a valid sh command.
|
162
|
-
# @note The output of the command "time -f conf
|
176
|
+
# @note The output of the command "time -f conf's :time_fmt" will be
|
163
177
|
# appended to the '.out' file of every command. If you set
|
164
|
-
# conf
|
178
|
+
# conf's :time_fmt to an empty string only a newline will be appended.
|
165
179
|
def self.batch(commands, conf)
|
166
180
|
# Throw exceptions if required configurations aren't provided.
|
167
181
|
if !conf[:cpus_available] then
|
@@ -256,7 +270,8 @@ module BatchExperiment
|
|
256
270
|
comms_executed
|
257
271
|
end
|
258
272
|
|
259
|
-
#
|
273
|
+
# @!visibility private
|
274
|
+
# gencommff: GENerate COMMands For Files.
|
260
275
|
# Creates a hash with the generated commands as keys, and store (as the
|
261
276
|
# respective value) the comm_info hash and the file (using a { comm_info: X,
|
262
277
|
# filename: Y } structure).
|
@@ -267,7 +282,7 @@ module BatchExperiment
|
|
267
282
|
# comm_info[:pattern] at a copy of comm_info[:command].
|
268
283
|
# @return [Hash<String, Hash>] A hash on the following format
|
269
284
|
# { expanded_command => { comm_info: comm_info, filename: f }, ...}
|
270
|
-
def self.gencommff(comm_info, files)
|
285
|
+
def self.gencommff(comm_info, files) #:nodoc
|
271
286
|
ret = {}
|
272
287
|
comm = comm_info[:command]
|
273
288
|
patt = comm_info[:pattern]
|
@@ -277,15 +292,15 @@ module BatchExperiment
|
|
277
292
|
ret
|
278
293
|
end
|
279
294
|
|
280
|
-
#
|
281
|
-
# in one array.
|
295
|
+
# @!visibility private
|
296
|
+
# Intercalate a variable number of variable sized arrays in one array.
|
282
297
|
#
|
283
298
|
# @param [Array<Array<Object>>] xss An array of arrays.
|
284
299
|
# @return [Array<Object>] An array of the same size as the sum of the size
|
285
300
|
# of all inner arrays. The values are the same (not copies) as the values
|
286
301
|
# of the array. Example: intercalate([[1, 4, 6, 7], [], [2, 5], [3]])
|
287
302
|
# returns [1, 2, 3, 4, 5, 6, 7].
|
288
|
-
def self.intercalate(xss)
|
303
|
+
def self.intercalate(xss) #:nodoc
|
289
304
|
ret = []
|
290
305
|
xss = xss.map { | xs | xs.reverse }
|
291
306
|
until xss.empty? do
|
@@ -299,17 +314,26 @@ module BatchExperiment
|
|
299
314
|
ret
|
300
315
|
end
|
301
316
|
|
317
|
+
# Exception class raised when multiple extractor objects passed to
|
318
|
+
# ::experiment (by the comms_info parameter) disagree on the content of the
|
319
|
+
# columns. Ex.: If we call ::experiment with different extractor objects, all
|
320
|
+
# arrays returned by the #names method of those extractors should be equal or
|
321
|
+
# a prefix of the biggest array. Ex.: ['a', 'b'], ['a', 'b'], ['a'] and
|
322
|
+
# ['a', 'b', 'c'] works, but adding ['a', 'c'] will end the program with
|
323
|
+
# this exception. This is made to avoid making the mistake of generating a
|
324
|
+
# csv where the same column has a different meaning for each row.
|
302
325
|
class ColumnSpecError < ArgumentError; end
|
303
326
|
|
304
|
-
#
|
305
|
-
#
|
327
|
+
# @!visibility private
|
328
|
+
# Check if the headers can be combined, if they can return a shallow copy of
|
329
|
+
# the biggest header, otherwise throw an exception.
|
306
330
|
#
|
307
|
-
# @param headers [Array<Array<
|
331
|
+
# @param headers [Array<Array<Object>>] An array of arrays of strings
|
308
332
|
# (or any object that implements '!=').
|
309
333
|
# @return A shallow copy of the biggest inner array in headers. Only returns
|
310
334
|
# if for each position on the biggest inner array has the same value as
|
311
335
|
# that position on all the other arrays with at least that size.
|
312
|
-
def self.merge_headers(headers)
|
336
|
+
def self.merge_headers(headers) #:nodoc
|
313
337
|
mer_size = headers.map { | h | h.size }.max
|
314
338
|
merged_h = Array.new(mer_size)
|
315
339
|
mer_size.times do | i |
|
@@ -331,57 +355,56 @@ module BatchExperiment
|
|
331
355
|
merged_h
|
332
356
|
end
|
333
357
|
|
334
|
-
#
|
335
|
-
#
|
336
|
-
#
|
337
|
-
#
|
338
|
-
# file. Easier to understand seeing the sample_batch.rb example in action.
|
358
|
+
# Uses ::batch to execute N commands over M files Q times for each
|
359
|
+
# command/file, save their output, inspect their output using provided
|
360
|
+
# extractors, save the extracted data in a CSV file; easier to understand
|
361
|
+
# seeing the sample_batch.rb example in action.
|
339
362
|
#
|
340
363
|
# @param comms_info [Array<Hash>] An array of hashs, each with the config
|
341
364
|
# needed to know how to deal with the command. Four required fields
|
342
365
|
# (all keys are symbols):
|
343
|
-
# command [String] A string with a sh shell command.
|
344
|
-
# pattern [String] A substring of command, will be replaced by the strings
|
345
|
-
#
|
346
|
-
# extractor [#extract,#names] Object implementing the Extractor interface.
|
347
|
-
# prefix [String] A string that will be used on the 'algorithm' column
|
348
|
-
#
|
366
|
+
# * command [String] A string with a sh shell command.
|
367
|
+
# * pattern [String] A substring of command, will be replaced by the strings
|
368
|
+
# in the paramenter 'files'.
|
369
|
+
# * extractor [#extract,#names] Object implementing the Extractor interface.
|
370
|
+
# * prefix [String] A string that will be used on the 'algorithm' column
|
371
|
+
# to identify the used command.
|
349
372
|
# @param batch_conf [Hash] Configuration used to call batch. See the
|
350
373
|
# explanation for parameter 'conf' on the documentation of the batch
|
351
374
|
# method. There are required fields for this hash parameter. Also, note
|
352
|
-
# that
|
375
|
+
# that batch_conf's :converter should allow cloning without sharing
|
353
376
|
# mutable state. A converter clone is used by #experiment internally, it
|
354
377
|
# has to obtain the same results as the original copy (that is passed to
|
355
378
|
# BatchExperiment::batch).
|
356
379
|
# @param conf [Hash] Lots of parameters. Here's a list:
|
357
|
-
#
|
358
|
-
#
|
359
|
-
# separator [String] The separator used at the CSV file. Default: ';'.
|
360
|
-
#
|
361
|
-
#
|
362
|
-
#
|
363
|
-
#
|
364
|
-
#
|
365
|
-
#
|
366
|
-
#
|
367
|
-
#
|
368
|
-
#
|
369
|
-
#
|
370
|
-
#
|
371
|
-
#
|
372
|
-
#
|
373
|
-
#
|
374
|
-
#
|
375
|
-
#
|
376
|
-
#
|
377
|
-
#
|
378
|
-
#
|
379
|
-
#
|
380
|
-
#
|
381
|
-
# skip_commands [TrueClass, FalseClass] If true, will not execute the
|
382
|
-
#
|
383
|
-
#
|
384
|
-
#
|
380
|
+
# * csvfname [String] The filename/filepath for the file that will contain
|
381
|
+
# the CSV data. Required field.
|
382
|
+
# * separator [String] The separator used at the CSV file. Default: ';'.
|
383
|
+
# * qt_runs [NilClass,Integer] If nil or one then each command is
|
384
|
+
# executed once. If is a number bigger than one, the command is executed
|
385
|
+
# that number of times. The batch_conf's :converter will define the name
|
386
|
+
# that will be given to each run. Every file will appear qt_runs times on
|
387
|
+
# the filename column and, for the same file, the values on the
|
388
|
+
# run_number column will be the integer numbers between 1 and qt_runs
|
389
|
+
# (both inclusive). Default: nil.
|
390
|
+
# * comms_order [:by_comm,:by_file,:random] The order the
|
391
|
+
# commands will be executed. Case by_comm: will execute the first command
|
392
|
+
# over all the files (using the files order), then will execute the
|
393
|
+
# second command over all files, and so on. Case by_file: will execute
|
394
|
+
# all the commands (using the comms_info order) over the first file, then
|
395
|
+
# will execute all the comands over the second file, and so on. Case
|
396
|
+
# random: will expand all the command/file combinations (replicating the
|
397
|
+
# same command qt_run times) and then will apply shuffle to this array,
|
398
|
+
# using the object passed to the rng parameter. This last option is the
|
399
|
+
# most adequate for statistical testing.
|
400
|
+
# * rng [Nil,#rand] An object that implements the #rand method (behaves
|
401
|
+
# like an instance of the core Random class). If comms_order is random
|
402
|
+
# and rng is nil, will issue a warning remembering the default that was
|
403
|
+
# used. Default: Random.new(42).
|
404
|
+
# * skip_commands [TrueClass, FalseClass] If true, will not execute the
|
405
|
+
# commands and assume that the outputs are already saved (on ".out"
|
406
|
+
# files). Will only execute the extractors over the already saved
|
407
|
+
# outputs, and create the CSV file from them. Default: false.
|
385
408
|
#
|
386
409
|
# @param files [Array<Strings>] The strings that will replace the :pattern
|
387
410
|
# on :command, for every element in comms_info. Can be a filename, or
|
@@ -389,10 +412,9 @@ module BatchExperiment
|
|
389
412
|
# refer to them as files for simplicity and uniformity.
|
390
413
|
#
|
391
414
|
# @return [NilClass,Array<String>] The return of the internal #batch
|
392
|
-
# call. Returns nil if conf
|
415
|
+
# call. Returns nil if conf's :skip_commands was set to true.
|
393
416
|
#
|
394
417
|
# @see BatchExperiment::batch
|
395
|
-
# @note This command call ::batch internally.
|
396
418
|
def self.experiment(comms_info, batch_conf, conf, files)
|
397
419
|
# Throw exceptions if required configurations aren't provided.
|
398
420
|
fail 'conf[:csvfname] is not defined' unless conf[:csvfname]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: batch_experiment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Henrique Becker
|
@@ -45,7 +45,8 @@ licenses:
|
|
45
45
|
- Unlicense
|
46
46
|
metadata: {}
|
47
47
|
post_install_message:
|
48
|
-
rdoc_options:
|
48
|
+
rdoc_options:
|
49
|
+
- "--no-private"
|
49
50
|
require_paths:
|
50
51
|
- lib
|
51
52
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -66,3 +67,4 @@ specification_version: 4
|
|
66
67
|
summary: A ruby script that distributes system commands between cpu cores, and save
|
67
68
|
their output.
|
68
69
|
test_files: []
|
70
|
+
has_rdoc:
|