batch_experiment 1.0.2 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -2
- data/examples/debug_batch.rb +1 -1
- data/examples/{example_batch.rb → experiment_example.rb} +11 -11
- data/examples/{ukp_batch.rb → ukp_experiment.rb} +10 -6
- data/lib/batch_experiment.rb +260 -107
- data/lib/batch_experiment/extractor.rb +6 -4
- data/lib/batch_experiment/sample_extractors.rb +0 -31
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4bfaf4a6fa10230872eb47786f5ed25caef9866c
|
4
|
+
data.tar.gz: 5c2d57dbc5487a37d409bd6c9454177f16d44cae
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e53e5c263fca88c5389268e2d8e2aafd8491bf100d3ad3888d84024ba7f515e4797ef7856fd0f03274274f563e3263a5bb5d55a1e336210e3e770409ca52e377
|
7
|
+
data.tar.gz: f4d4741012d48a9383ac7cd73013ab40ab86d0dcbd98d7b55cee2b0ae6c3ffe790c1662d60ab2093249a4d41a720c2705a3615198ce4aae4239a5ecc1da52c2e
|
data/README.md
CHANGED
@@ -15,7 +15,7 @@ What conditions you need to use this tool:
|
|
15
15
|
|
16
16
|
What is not needed:
|
17
17
|
|
18
|
-
* To know how to program in ruby. Only taking less than 5 minutes to learn some basic syntax will suffice to run commands on multiple cores and save the results to files. However,
|
18
|
+
* To know how to program in ruby. Only taking less than 5 minutes to learn some basic syntax will suffice to run commands on multiple cores and save the results to files (using BatchExperiment::batch). However, if you want not only to execute the commands but want to extract and group some information from their output to a CVS (using BatchExperiment::experiment), you will need to tell ruby how to do the extracting part.
|
19
19
|
|
20
20
|
## How to use it
|
21
21
|
|
@@ -23,7 +23,7 @@ You will need to create a ruby script (copy, past and adapt one of the provided
|
|
23
23
|
|
24
24
|
## Examples
|
25
25
|
|
26
|
-
After installing the gem, you will have a examples folder (/home/YOUR_USER/.gem/ruby/RUBY_VERSION/gems/batch_experiment-GEM_VERSION/examples). The sample_batch.rb gives you a good ideia of how to use
|
26
|
+
After installing the gem, you will have a examples folder (/home/YOUR_USER/.gem/ruby/RUBY_VERSION/gems/batch_experiment-GEM_VERSION/examples). The sample_batch.rb gives you a good ideia of how to use ::batch (no csv creation).
|
27
27
|
|
28
28
|
```ruby
|
29
29
|
#!/bin/ruby
|
@@ -50,5 +50,7 @@ conf = {
|
|
50
50
|
BatchExperiment::batch(commands, conf)
|
51
51
|
```
|
52
52
|
|
53
|
+
The experiment_example.rb (and the lib/batch_experiment/sample_extractors.rb) gives a good ideia of how to use #experiment with multiple commands and how to create an extractor (used to create a csv).
|
54
|
+
|
53
55
|
This code was born in [this repository](https://github.com/henriquebecker91/masters/tree/master/codes/rb/batch_experiment).
|
54
56
|
|
data/examples/debug_batch.rb
CHANGED
@@ -13,30 +13,30 @@ comms_info = [{
|
|
13
13
|
pattern: 'y',
|
14
14
|
extractor: BatchExperiment::FirstLineExtractor,
|
15
15
|
prefix: 'echo',
|
16
|
-
}, {
|
17
|
-
command: 'wc FILE',
|
18
|
-
pattern: 'FILE',
|
19
|
-
extractor: BatchExperiment::WcExtractor,
|
20
|
-
prefix: 'wc',
|
21
16
|
}]
|
22
17
|
|
23
|
-
|
18
|
+
batch_conf = {
|
24
19
|
# IDs of the CPU cores that can be used for executing tests.
|
25
20
|
cpus_available: [1, 2, 3],
|
26
21
|
# Maximum number of seconds that a command can run. After this a kill command
|
27
22
|
# (TERM signal) will be issued.
|
28
23
|
timeout: 5,
|
24
|
+
# Object that gives the filename for storing the output of each run.
|
25
|
+
converter: BatchExperiment::Comm2FnameConverter.new,
|
29
26
|
}
|
30
27
|
|
31
|
-
|
28
|
+
experiment_conf = {
|
32
29
|
# The name of the file where will be written the CSV data.
|
33
30
|
csvfname: 'example.csv',
|
34
|
-
#
|
35
|
-
|
36
|
-
|
31
|
+
# Number of times the same command will be executed over the same file.
|
32
|
+
qt_runs: 5,
|
33
|
+
# Order of the commands execution
|
34
|
+
comms_order: :random, #:by_comm, #:by_file,
|
35
|
+
# Random seed (only used if comms_order is :random)
|
36
|
+
rng: Random.new(0),
|
37
37
|
}
|
38
38
|
|
39
39
|
files = ['bible.txt', 'taoteching.txt']
|
40
40
|
|
41
|
-
BatchExperiment::experiment(comms_info,
|
41
|
+
BatchExperiment::experiment(comms_info, batch_conf, experiment_conf, files)
|
42
42
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
require 'batch_experiment'
|
4
|
+
require 'batch_experiment/sample_extractors'
|
5
5
|
|
6
6
|
# I run the three lines below in the console to disable hyperthreading cores on
|
7
7
|
# my computer before examining the cores with the top command.
|
@@ -24,16 +24,20 @@ comms_info = [{
|
|
24
24
|
execution_info = {
|
25
25
|
cpus_available: [1, 2, 3],
|
26
26
|
timeout: 10,
|
27
|
-
post_timeout:
|
27
|
+
post_timeout: 2,
|
28
28
|
}
|
29
29
|
|
30
|
-
conf = {
|
30
|
+
conf = {
|
31
|
+
csvfname: 'pya_site8.csv',
|
32
|
+
comms_order: :random,
|
33
|
+
qt_runs: 10,
|
34
|
+
}
|
31
35
|
|
32
36
|
files = ['corepb.ukp', 'exnsd18.ukp', 'exnsd26.ukp', 'exnsdbis18.ukp', 'exnsd16.ukp', 'exnsd20.ukp', 'exnsdbis10.ukp', 'exnsds12.ukp']
|
33
37
|
# If you don't execute the script from the ukp files folder you need to put the
|
34
38
|
# folder relative or absolute path here (with trailing slash).
|
35
|
-
path = ''
|
39
|
+
path = '~/Aulas/mestrado/masters/data/ukp/'
|
36
40
|
files.map! { | f | path + f }
|
37
41
|
|
38
|
-
experiment(comms_info, execution_info, conf, files)
|
42
|
+
BatchExperiment::experiment(comms_info, execution_info, conf, files)
|
39
43
|
|
data/lib/batch_experiment.rb
CHANGED
@@ -4,9 +4,15 @@ require 'pathname'
|
|
4
4
|
# The main module, the two main utility methods offered are ::batch and
|
5
5
|
# ::experiment.
|
6
6
|
module BatchExperiment
|
7
|
-
# The default callable object used by
|
8
|
-
# filename.
|
9
|
-
|
7
|
+
# The default callable object used by Comm2FnameConverter to convert
|
8
|
+
# a command into a filename. Comm2FnameConverter don't create a sanitized
|
9
|
+
# filename from the command string (it uses its first argument to do this,
|
10
|
+
# whose default is FnameSanitizer).
|
11
|
+
# Note that this is a pure function, so if the same command appears more than
|
12
|
+
# one time, it will get the same name, it's Comm2FnameConverter that gives
|
13
|
+
# multiple instances of the same command different names (by suffixing with
|
14
|
+
# numbers).
|
15
|
+
module FnameSanitizer
|
10
16
|
def self.call(command)
|
11
17
|
fname = command.strip
|
12
18
|
fname.gsub!(/[^[:alnum:]]/, '_')
|
@@ -17,7 +23,57 @@ module BatchExperiment
|
|
17
23
|
end
|
18
24
|
end
|
19
25
|
|
20
|
-
#
|
26
|
+
# Converts a command to a filename using a given sanitizer, gives different
|
27
|
+
# names to different calls with the same arguments. Example: if a call with
|
28
|
+
# "sleep 1" yields "sleep_1", the second call with the same argument yields
|
29
|
+
# "sleep_1.2", and so on. Note that this is done by remembering previous
|
30
|
+
# calls, the object don't inspect the filesystem to check if that name
|
31
|
+
# was or wasn't used.
|
32
|
+
class Comm2FnameConverter
|
33
|
+
# Creates a new Comm2FnameConverter, with no memory of any previous calls.
|
34
|
+
#
|
35
|
+
# @param sanitizer [#call] Callable object used to create a filename from
|
36
|
+
# the arguments passed to Comm2FnameConverter.call. This class expects
|
37
|
+
# that sanitizer has no internal state, so when an instance of this class
|
38
|
+
# is cloned, there's no problem with sharing the sanitizer between the
|
39
|
+
# clones. Default: BatchExperiment::FnameSanitizer.
|
40
|
+
def initialize(sanitizer = FnameSanitizer)
|
41
|
+
@num_times_seen = {}
|
42
|
+
@sanitizer = sanitizer
|
43
|
+
end
|
44
|
+
|
45
|
+
# Takes a command, creates a fname for it, if this fname was already seen
|
46
|
+
# before, returns the fname + ".N", where N is the number of times fname
|
47
|
+
# was already seen.
|
48
|
+
#
|
49
|
+
# @param comm [String] A system command.
|
50
|
+
# @return [String] The sanitized filename created from that command.
|
51
|
+
# @note Note that different arguments can be reduced to the same
|
52
|
+
# sanitized filename and, if this happens, they will NOT overwrite
|
53
|
+
# each other. Example: 'echo "abc"' -> 'echo_abc'; 'echo abc' ->
|
54
|
+
# 'echo_abc.2'.
|
55
|
+
def call(comm)
|
56
|
+
fname = @sanitizer.call(comm)
|
57
|
+
if @num_times_seen.include? fname
|
58
|
+
@num_times_seen[fname] += 1
|
59
|
+
fname << ".#{@num_times_seen[fname]}"
|
60
|
+
else
|
61
|
+
@num_times_seen[fname] = 1
|
62
|
+
end
|
63
|
+
|
64
|
+
fname.clone
|
65
|
+
end
|
66
|
+
|
67
|
+
def initialize_clone(old)
|
68
|
+
@num_times_seen = old.num_times_seen.clone
|
69
|
+
end
|
70
|
+
|
71
|
+
# To allow the initialize_clone implementation.
|
72
|
+
protected
|
73
|
+
attr_reader :num_times_seen
|
74
|
+
end
|
75
|
+
|
76
|
+
# INTERNAL USE ONLY.
|
21
77
|
# Remove any finished commands from comms_running, insert the cpus
|
22
78
|
# freed by the commands termination to the free_cpus, insert the
|
23
79
|
# terminated commands on comms_executed.
|
@@ -42,7 +98,7 @@ module BatchExperiment
|
|
42
98
|
# filenames are derived from the commands. Appending '.out' to one of the
|
43
99
|
# partial filenames will give the filename were the command stdout was
|
44
100
|
# redirected. The analogue is valid for '.err' and stderr. Right before a
|
45
|
-
# command begans to run, a
|
101
|
+
# command begans to run, a 'partial_filename.unfinished' file is created.
|
46
102
|
# After the command ends its execution this file is removed. If the command
|
47
103
|
# ends its execution by means of a timeout the file is also removed. The file
|
48
104
|
# only remains if the batch procedure is interrupted (script was killed,
|
@@ -51,7 +107,7 @@ module BatchExperiment
|
|
51
107
|
#
|
52
108
|
# @param commands [Array<String>] The shell commands.
|
53
109
|
# @param conf [Hash] The configurations, as follows:
|
54
|
-
# -- cpus_available [Array<Fixnum>]
|
110
|
+
# -- cpus_available [Array<Fixnum>] CPU cores that can be used to run the
|
55
111
|
# commands. Required parameter. The cpu numbers begin at 0, despite what
|
56
112
|
# htop tells you.
|
57
113
|
# -- timeout [Number] Number of seconds before killing a command. Required
|
@@ -59,35 +115,36 @@ module BatchExperiment
|
|
59
115
|
# -- time_fmt [String] A string in the time (external command) format. See
|
60
116
|
# http://linux.die.net/man/1/time. Default: 'ext_time: %e\next_mem: %M\n'.
|
61
117
|
# -- busy_loop_sleep [Number] How many seconds to wait before checking if
|
62
|
-
# a command ended execution. This
|
63
|
-
# two commands. Default: 0.1.
|
118
|
+
# a command ended execution. This time will be very close to the max time a
|
119
|
+
# cpu will remain vacant between two commands. Default: 0.1 (1/10 second).
|
64
120
|
# -- post_timeout [Number] A command isn't guaranteed to end after
|
65
121
|
# receiving a TERM signal. If the command hasn't stopped, waits
|
66
122
|
# post_timeout seconds before sending a KILL signal (give it a chance to
|
67
123
|
# end gracefully). Default: 5.
|
68
|
-
# --
|
69
|
-
#
|
70
|
-
#
|
71
|
-
#
|
124
|
+
# -- converter [#call] The call method of this object should take a String
|
125
|
+
# and convert it (possibly losing information), to a valid filename. Used
|
126
|
+
# over the commands to define the output files of commands.
|
127
|
+
# Default: BatchExperiment::Comm2FnameConverter.new.
|
72
128
|
# -- skip_done_comms [FalseClass,TrueClass] Skip any command for what a
|
73
129
|
# corresponding '.out' file exists, except if both a '.out' and a
|
74
|
-
# '.unfinished' file exist, in the last case the command is
|
130
|
+
# '.unfinished' file exist, in the last case the command is always
|
131
|
+
# executed. If false, execute all commands and overwrite all ".out".
|
75
132
|
# Default: true.
|
76
133
|
# -- unfinished_ext [String] Extension to be used in place of
|
77
|
-
# '.unfinished'.
|
134
|
+
# '.unfinished'. Default: '.unfinished'.
|
78
135
|
# -- out_ext [String] Extension to be used in place of '.out'.
|
79
136
|
# Default: '.out'.
|
80
137
|
# -- err_ext [String] Extension to be used in place of '.err'.
|
81
138
|
# Default: '.err'.
|
139
|
+
#
|
82
140
|
# @return [String] Which commands were executed. Can be different from
|
83
141
|
# the 'commands' argument if commands are skipped (see :skip_done_comms).
|
84
142
|
#
|
85
143
|
# @note If the same command is executed over the same file more than one
|
86
|
-
# time, then
|
87
|
-
#
|
88
|
-
# conf\[:fname_sanitizer\]
|
89
|
-
#
|
90
|
-
# filename for every time it's called with the same argument).
|
144
|
+
# time, then any run besides the first will have a numeric suffix.
|
145
|
+
# Example: "sleep 1" -> "sleep_1", "sleep 1" -> "sleep_1.2".
|
146
|
+
# For more info see the parameter conf\[:fname_sanitizer\], and its
|
147
|
+
# default value BatchExperiment::Comm2FnameConverter.new.
|
91
148
|
# @note This procedure makes use of the following linux commands: time (not
|
92
149
|
# the bash internal one, but the package one, i.e.
|
93
150
|
# https://www.archlinux.org/packages/extra/x86_64/time/); timeout (from
|
@@ -101,8 +158,10 @@ module BatchExperiment
|
|
101
158
|
# conf\[:time_fmt\] to a empty string only a newline will be appended.
|
102
159
|
def self.batch(commands, conf)
|
103
160
|
# Throw exceptions if required configurations aren't provided.
|
104
|
-
|
105
|
-
|
161
|
+
if !conf[:cpus_available] then
|
162
|
+
fail ArgumentError, 'conf[:cpus_available] not set'
|
163
|
+
end
|
164
|
+
fail ArgumentError, 'conf[:timeout] not set' unless conf[:timeout]
|
106
165
|
|
107
166
|
# Initialize optional configurations with default values if they weren't
|
108
167
|
# provided. Don't change the conf argument, only our version of conf.
|
@@ -113,7 +172,7 @@ module BatchExperiment
|
|
113
172
|
conf[:err_ext] ||= '.err'
|
114
173
|
conf[:busy_loop_sleep] ||= 0.1
|
115
174
|
conf[:post_timeout] ||= 5
|
116
|
-
conf[:
|
175
|
+
conf[:converter] ||= BatchExperiment::Comm2FnameConverter.new
|
117
176
|
conf[:skip_done_comms] = true if conf[:skip_done_comms].nil?
|
118
177
|
|
119
178
|
# Initialize main variables
|
@@ -123,23 +182,23 @@ module BatchExperiment
|
|
123
182
|
comms_executed = []
|
124
183
|
|
125
184
|
commands.each do | command |
|
126
|
-
commfname = conf[:
|
185
|
+
commfname = conf[:converter].call(command)
|
127
186
|
out_fname = commfname + conf[:out_ext]
|
128
187
|
err_fname = commfname + conf[:err_ext]
|
129
188
|
lockfname = commfname + conf[:unfinished_ext]
|
130
189
|
|
131
190
|
if conf[:skip_done_comms] && File.exists?(out_fname)
|
132
191
|
if File.exists?(lockfname)
|
133
|
-
puts "
|
134
|
-
puts "
|
192
|
+
puts "Found file #{out_fname}, but a #{lockfname} also exists:"
|
193
|
+
puts "Will execute command '#{command}' anyway."
|
135
194
|
else
|
136
|
-
puts "
|
195
|
+
puts "Found file #{commfname}, skipping command: #{command}"
|
137
196
|
STDOUT.flush
|
138
197
|
next
|
139
198
|
end
|
140
199
|
end
|
141
200
|
|
142
|
-
puts "
|
201
|
+
puts "Waiting to execute command: #{command}"
|
143
202
|
STDOUT.flush
|
144
203
|
|
145
204
|
while free_cpus.empty? do
|
@@ -188,21 +247,29 @@ module BatchExperiment
|
|
188
247
|
comms_executed
|
189
248
|
end
|
190
249
|
|
191
|
-
# gencommff: GENerate COMMands For Files
|
250
|
+
# INTERNAL USE ONLY. gencommff: GENerate COMMands For Files.
|
251
|
+
# Creates a hash with the generated commands as keys, and store (as the
|
252
|
+
# respective value) the comm_info hash and the file (using a { comm_info: X,
|
253
|
+
# filename: Y } structure).
|
192
254
|
#
|
193
|
-
# @param
|
194
|
-
#
|
195
|
-
# @param files [Enumerable<String>] A list of strings
|
196
|
-
#
|
197
|
-
# @return [
|
198
|
-
#
|
199
|
-
def self.gencommff(
|
200
|
-
ret =
|
201
|
-
|
255
|
+
# @param comm_info [Hash] A hash structure following the same format
|
256
|
+
# that the elements of the comms_info array parameter of #experiment.
|
257
|
+
# @param files [Enumerable<String>] A list of strings that will replace
|
258
|
+
# comm_info[:pattern] at a copy of comm_info[:command].
|
259
|
+
# @return [Hash<String, Hash>] A hash on the following format
|
260
|
+
# { expanded_command => { comm_info: comm_info, filename: f }, ...}
|
261
|
+
def self.gencommff(comm_info, files)
|
262
|
+
ret = {}
|
263
|
+
comm = comm_info[:command]
|
264
|
+
patt = comm_info[:pattern]
|
265
|
+
files.each do | f |
|
266
|
+
ret[comm.gsub(patt, f)] = { comm_info: comm_info, filename: f }
|
267
|
+
end
|
202
268
|
ret
|
203
269
|
end
|
204
270
|
|
205
|
-
# Intercalate a variable number of variable sized arrays
|
271
|
+
# INTERNAL USE ONLY. Intercalate a variable number of variable sized arrays
|
272
|
+
# in one array.
|
206
273
|
#
|
207
274
|
# @param [Array<Array<Object>>] xss An array of arrays.
|
208
275
|
# @return [Array<Object>] An array of the same size as the sum of the size
|
@@ -223,53 +290,94 @@ module BatchExperiment
|
|
223
290
|
ret
|
224
291
|
end
|
225
292
|
|
293
|
+
class ColumnSpecError < ArgumentError; end
|
294
|
+
|
295
|
+
# INTERNAL USE ONLY. Check if the headers can be combined, if they can
|
296
|
+
# return a shallow copy of the biggest header, otherwise throw an exception.
|
297
|
+
#
|
298
|
+
# @param headers [Array<Array<Comparable>>] An array of arrays of strings
|
299
|
+
# (or any object that implements '!=').
|
300
|
+
# @return A shallow copy of the biggest inner array in headers. Only returns
|
301
|
+
# if for each position on the biggest inner array has the same value as
|
302
|
+
# that position on all the other arrays with at least that size.
|
303
|
+
def self.merge_headers(headers)
|
304
|
+
mer_size = headers.map { | h | h.size }.max
|
305
|
+
merged_h = Array.new(mer_size)
|
306
|
+
mer_size.times do | i |
|
307
|
+
headers.each do | h |
|
308
|
+
next if h.size < i
|
309
|
+
if merged_h[i].nil?
|
310
|
+
merged_h[i] = h[i]
|
311
|
+
elsif merged_h[i] != h[i]
|
312
|
+
raise ColumnSpecError, "Error: When using BatchExperiment::experiment"
|
313
|
+
+ " all the extractors have to agree on the columns they share."
|
314
|
+
+ " In the specific case: the column nº #{i} was labeled as"
|
315
|
+
+ " '#{merged_h[i]}' on one extractor, and '#{h[i]}' on another,"
|
316
|
+
+ " this can be only a difference on notation ('time' vs 'Time'),"
|
317
|
+
+ " or can mean that in the same column two different kinds of data"
|
318
|
+
+ " are being presented. The program will be aborted. Check that."
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
merged_h
|
323
|
+
end
|
324
|
+
|
226
325
|
# Takes N shell commands and M files/parameters, execute each command of the
|
227
326
|
# N commands over the M files, save the output of each command/file
|
228
327
|
# combination, use objects provided with the command to extract relevant
|
229
|
-
# information from the output file, and group those information in a
|
328
|
+
# information from the output file, and group those information in a CSV
|
230
329
|
# file. Easier to understand seeing the sample_batch.rb example in action.
|
231
330
|
#
|
232
331
|
# @param comms_info [Array<Hash>] An array of hashs, each with the config
|
233
332
|
# needed to know how to deal with the command. Four required fields
|
234
333
|
# (all keys are symbols):
|
235
334
|
# command [String] A string with a sh shell command.
|
236
|
-
# pattern [String] A substring of command, will be
|
335
|
+
# pattern [String] A substring of command, will be replaced by the strings
|
237
336
|
# in the paramenter 'files'.
|
238
337
|
# extractor [#extract,#names] Object implementing the Extractor interface.
|
239
|
-
# prefix [String] A string that will be used
|
240
|
-
#
|
338
|
+
# prefix [String] A string that will be used on the 'algorithm' column
|
339
|
+
# to identify the used command.
|
241
340
|
# @param batch_conf [Hash] Configuration used to call batch. See the
|
242
341
|
# explanation for parameter 'conf' on the documentation of the batch
|
243
|
-
# method. There are required fields for this hash parameter.
|
342
|
+
# method. There are required fields for this hash parameter. Also, note
|
343
|
+
# that the batch_conf\[:converter\] should allow cloning without sharing
|
344
|
+
# mutable state. A converter clone is used by #experiment internally, it
|
345
|
+
# has to obtain the same results as the original copy (that is passed to
|
346
|
+
# BatchExperiment::batch).
|
244
347
|
# @param conf [Hash] Lots of parameters. Here's a list:
|
245
|
-
# csvfname [String] The filename/filepath for the file that will contain
|
348
|
+
# -- csvfname [String] The filename/filepath for the file that will contain
|
246
349
|
# the CSV data. Required field.
|
247
350
|
# separator [String] The separator used at the CSV file. Default: ';'.
|
248
|
-
#
|
249
|
-
#
|
250
|
-
#
|
251
|
-
#
|
252
|
-
#
|
253
|
-
#
|
254
|
-
#
|
255
|
-
#
|
256
|
-
#
|
257
|
-
#
|
258
|
-
#
|
259
|
-
#
|
260
|
-
#
|
261
|
-
#
|
262
|
-
#
|
263
|
-
#
|
264
|
-
#
|
265
|
-
#
|
351
|
+
# -- qt_runs [NilClass,Integer] If nil or one then each command is
|
352
|
+
# executed once. If is a number bigger than one, the command is executed
|
353
|
+
# that number of times. The batch_conf[:converter] will define the name
|
354
|
+
# that will be given to each run. Every file will appear qt_runs times on
|
355
|
+
# the filename column and, for the same file, the values on the run_number
|
356
|
+
# column will be the integer numbers between 1 and qt_runs (both
|
357
|
+
# inclusive). Default: nil.
|
358
|
+
# -- comms_order [:by_comm,:by_file,:random] The order the
|
359
|
+
# commands will be executed. Case by_comm: will execute the first command
|
360
|
+
# over all the files (using the files order), then will execute the
|
361
|
+
# second command over all files, and so on. Case by_file: will execute
|
362
|
+
# all the commands (using the comms_info order) over the first file,
|
363
|
+
# then will execute all the comands over the second file, and so on.
|
364
|
+
# Case random: will expand all the command/file combinations (replicating
|
365
|
+
# the same command qt_run times) and then will apply shuffle to this array,
|
366
|
+
# using the object passed to the rng parameter. This last option is the
|
367
|
+
# most adequate for statistical testing.
|
368
|
+
# -- rng [Nil,#rand] An object that implements the #rand method (behaves
|
369
|
+
# like an instance of the core Random class). If comms_order is random and
|
370
|
+
# rng is nil, will issue a warning remembering the default that was used.
|
371
|
+
# Default: Random.new(42).
|
266
372
|
# skip_commands [TrueClass, FalseClass] If true, will not execute the
|
267
|
-
# commands and assume that the outputs are already saved
|
268
|
-
# the extractors over the already saved outputs, and
|
269
|
-
# from them. Default: false.
|
373
|
+
# commands and assume that the outputs are already saved (on ".out" files).
|
374
|
+
# Will only execute the extractors over the already saved outputs, and
|
375
|
+
# create the CSV file from them. Default: false.
|
270
376
|
#
|
271
377
|
# @param files [Array<Strings>] The strings that will replace the :pattern
|
272
|
-
# on :command, for every element in comms_info.
|
378
|
+
# on :command, for every element in comms_info. Can be a filename, or
|
379
|
+
# can be anything else (a numeric parameter, sh code, etc..), but we
|
380
|
+
# refer to them as files for simplicity and uniformity.
|
273
381
|
#
|
274
382
|
# @return [NilClass,Array<String>] The return of the internal #batch
|
275
383
|
# call. Returns nil if conf[:skip_commands] was set to true.
|
@@ -283,67 +391,112 @@ module BatchExperiment
|
|
283
391
|
# Initialize optional configurations with default values if they weren't
|
284
392
|
# provided. Don't change the conf argument, only our version of conf.
|
285
393
|
conf = conf.clone
|
286
|
-
conf[:separator]
|
287
|
-
conf[:
|
288
|
-
conf[:
|
394
|
+
conf[:separator] ||= ';'
|
395
|
+
conf[:qt_runs] ||= 1
|
396
|
+
conf[:comms_order] ||= :by_comm
|
397
|
+
conf[:rng] ||= Random.new(42)
|
289
398
|
#conf[:skip_commands] defaults to false/nil
|
290
399
|
|
291
400
|
# Get some of the batch config that we use inside here too.
|
292
|
-
out_ext
|
293
|
-
unfinished_ext
|
294
|
-
|
295
|
-
|
401
|
+
out_ext = batch_conf[:out_ext] || '.out'
|
402
|
+
unfinished_ext = batch_conf[:unfinished_ext] || '.unfinished'
|
403
|
+
converter = batch_conf[:converter].clone unless batch_conf[:converter].nil?
|
404
|
+
converter ||= BatchExperiment::Comm2FnameConverter.new
|
296
405
|
|
297
|
-
#
|
406
|
+
# Expand all commands, combining command templates and files.
|
298
407
|
comms_sets = []
|
299
408
|
comms_info.each do | comm_info |
|
300
|
-
comms_sets << gencommff(comm_info
|
409
|
+
comms_sets << gencommff(comm_info, files)
|
301
410
|
end
|
302
411
|
|
303
|
-
|
412
|
+
expanded_comms = comms_sets.map { | h | h.keys }
|
413
|
+
# If each command should be run more than once...
|
414
|
+
if conf[:qt_runs] > 1
|
415
|
+
# ... we replace each single command by an array of qt_runs copies,
|
416
|
+
# and then flatten the parent array.
|
417
|
+
expanded_comms.map! do | a |
|
418
|
+
a.map! { | c | Array.new(conf[:qt_runs], c) }.flatten!
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
# At this moment the expanded_comms is an array of arrays, each internal
|
423
|
+
# array has all the expanded commands of the one single command template
|
424
|
+
# over all the files.
|
425
|
+
# After the code block below, the expanded_comms will be an one-level array
|
426
|
+
# of the expanded commands, in the order they will be executed.
|
427
|
+
expanded_comms = case conf[:comms_order]
|
428
|
+
when :by_comm # all runs of the first command template first
|
429
|
+
expanded_comms.flatten!
|
430
|
+
when :by_file # all runs over the first file first
|
431
|
+
intercalate(expanded_comms)
|
432
|
+
when :random # a random order
|
433
|
+
expanded_comms.flatten!.shuffle!(random: conf[:rng])
|
434
|
+
end
|
304
435
|
|
305
436
|
# Execute the commands (or not).
|
306
|
-
ret = batch(
|
437
|
+
ret = batch(expanded_comms, batch_conf) unless conf[:skip_commands]
|
307
438
|
|
308
439
|
# Build header (first csv line, column names).
|
309
|
-
header = []
|
310
|
-
comms_info.
|
311
|
-
|
312
|
-
|
440
|
+
header = ['algorithm', 'filename', 'run_number']
|
441
|
+
header << merge_headers(comms_info.map { | c | c[:extractor].names })
|
442
|
+
header = header.join(conf[:separator])
|
443
|
+
|
444
|
+
# We need to merge the union of all comms_sets to query it.
|
445
|
+
comm2origin = {}
|
446
|
+
comms_sets.each do | h |
|
447
|
+
comm2origin.merge!(h) do | k, v, v2 |
|
448
|
+
puts "WARNING: The command expansion '#{k}' was generated more than once. The first time was by the template '#{v[:comm]}' and the file '#{v[:file]}', and this time by template '#{v2[:comm]}' and the file '#{v2[:file]}'. Will report on CSV as this command was generated by the template '#{v[:comm]}' and the file '#{v[:file]}'."
|
449
|
+
v
|
313
450
|
end
|
314
|
-
header << prefixed_names
|
315
451
|
end
|
316
|
-
header = intercalate(header) if conf[:ic_columns]
|
317
|
-
header = ['Filename'].concat(header).join(conf[:separator])
|
318
452
|
|
319
|
-
# Build body (inspect all output files
|
453
|
+
# Build body (inspect all output files and make csv lines).
|
454
|
+
#
|
455
|
+
# Body format: algorithm;filename;run_number;first extracted column; ...
|
456
|
+
#
|
457
|
+
# This means that the extractors have to agree on what is each column, two
|
458
|
+
# different extractors have to extract the same kind of data at each column
|
459
|
+
# (the first field returned by all extractors has to be, for example, cpu
|
460
|
+
# time, the same applies for the remaining fields).
|
461
|
+
# If one extractor extract more fields than the others this is not a
|
462
|
+
# problem, if the second biggest extractor (in number of fields extract)
|
463
|
+
# will extract, for example, 4 fields, and the biggest extract 6 fields,
|
464
|
+
# the first 4 fields extracted by the biggest extractor have to be the same
|
465
|
+
# as the ones on the second-biggest extractor. This way, all the lines will
|
466
|
+
# have the kind of data on the first four columns (not counting the
|
467
|
+
# algorithm, filename and run_number ones), and only lines provenient from
|
468
|
+
# the biggest extractor will have data on the fifth and sixth columns.
|
320
469
|
body = [header]
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
470
|
+
times_found = {}
|
471
|
+
expanded_comms.each do | exp_comm |
|
472
|
+
run_info = comm2origin[exp_comm]
|
473
|
+
algorithm = run_info[:comm_info][:prefix]
|
474
|
+
filename = run_info[:filename]
|
475
|
+
|
476
|
+
times_found[exp_comm] ||= 0
|
477
|
+
times_found[exp_comm] += 1
|
478
|
+
run_number = times_found[exp_comm]
|
479
|
+
|
480
|
+
curr_line = [algorithm, filename, run_number]
|
481
|
+
|
482
|
+
partial_fname = converter.call(exp_comm)
|
483
|
+
out_fname = partial_fname + out_ext
|
484
|
+
lockfname = partial_fname + unfinished_ext
|
485
|
+
extractor = run_info[:comm_info][:extractor]
|
486
|
+
|
487
|
+
if File.exists?(out_fname)
|
488
|
+
if File.exists?(lockfname)
|
489
|
+
puts "Ignored file '#{out_fname}' because there was a"
|
490
|
+
+ " '#{lockfname}' file in the same folder."
|
337
491
|
else
|
338
|
-
|
339
|
-
|
340
|
-
line << comm_info[:extractor].names.map { | _ | '' }
|
492
|
+
f_content = File.open(out_fname, 'r') { | f | f.read }
|
493
|
+
curr_line << extractor.extract(f_content)
|
341
494
|
end
|
342
495
|
end
|
343
|
-
|
344
|
-
body <<
|
496
|
+
|
497
|
+
body << curr_line.join(conf[:separator])
|
345
498
|
end
|
346
|
-
body = body.
|
499
|
+
body = body.join(conf[:separator] + "\n")
|
347
500
|
|
348
501
|
# Write CSV data into a CSV file.
|
349
502
|
File.open(conf[:csvfname], 'w') { | f | f.write(body) }
|
@@ -2,7 +2,7 @@ module BatchExperiment
|
|
2
2
|
# Module that defines the interface used for extracting info from other
|
3
3
|
# programs output. You don't need to include it in your object, will suffice
|
4
4
|
# that the object (that you will use to extract info from the output) has the
|
5
|
-
#
|
5
|
+
# #names and #extract methods defined.
|
6
6
|
module Extractor
|
7
7
|
# Find a line in the following format: "field: value", return value.
|
8
8
|
#
|
@@ -13,7 +13,9 @@ module BatchExperiment
|
|
13
13
|
# @return [String] The 'value' as a string or, if 'field' isn't found, an
|
14
14
|
# empty string.
|
15
15
|
def self.get_field(lines, field)
|
16
|
-
lines.grep(/^#{field}: .*/).each
|
16
|
+
lines.grep(/^#{field}: .*/).each do | l |
|
17
|
+
return l.match(/:[\t ]+(.*)/)[1]
|
18
|
+
end
|
17
19
|
''
|
18
20
|
end
|
19
21
|
|
@@ -42,7 +44,7 @@ module BatchExperiment
|
|
42
44
|
# @return [Array<String>] The strings that will be used to make the column
|
43
45
|
# names at the BatchExperiment.experiment method.
|
44
46
|
def names
|
45
|
-
|
47
|
+
raise 'This method should have been overwritten by a subclass.'
|
46
48
|
end
|
47
49
|
|
48
50
|
# Extract N values of some program output, where N is equal to #names.size.
|
@@ -62,7 +64,7 @@ module BatchExperiment
|
|
62
64
|
# and the line string elements don't end in linebreak.
|
63
65
|
# @return [Array<String>] The N extracted values, as strings.
|
64
66
|
def extract_from_lines(lines)
|
65
|
-
|
67
|
+
raise 'This method should have been overwritten by a subclass.'
|
66
68
|
end
|
67
69
|
end
|
68
70
|
end
|
@@ -15,37 +15,6 @@ module BatchExperiment
|
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
-
module WcExtractor
|
19
|
-
extend Extractor
|
20
|
-
def self.names
|
21
|
-
['lines', 'words', 'bytes', 'ext_time', 'ext_mem']
|
22
|
-
end
|
23
|
-
|
24
|
-
def self.extract(content)
|
25
|
-
arr = content.split(' ')
|
26
|
-
qt_lines, words, bytes = arr[0], arr[1], arr[2]
|
27
|
-
lines = content.lines.map! { | l | l.chomp! }
|
28
|
-
[ qt_lines, words, bytes,
|
29
|
-
Extractor.get_field(lines, 'ext_time'),
|
30
|
-
Extractor.get_field(lines, 'ext_mem')
|
31
|
-
]
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
module TwoWordsExtractor
|
36
|
-
extend Extractor
|
37
|
-
def self.names
|
38
|
-
['first word', 'second word', 'ext_time', 'ext_mem']
|
39
|
-
end
|
40
|
-
|
41
|
-
def self.extract_from_lines(lines)
|
42
|
-
words = lines.empty? || lines[0].nil? ? ['',''] : lines[0].split().take(2)
|
43
|
-
words << Extractor.get_field(lines, 'ext_time')
|
44
|
-
words << Extractor.get_field(lines, 'ext_mem')
|
45
|
-
words
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
18
|
# Sample extractors used at https://github.com/henriquebecker91/masters,
|
50
19
|
# where this code had its beggining. This file contains the code used to
|
51
20
|
# extract info from the different outputs generated by UKP solving programs.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: batch_experiment
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Henrique Becker
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-06-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: childprocess
|
@@ -33,10 +33,10 @@ files:
|
|
33
33
|
- README.md
|
34
34
|
- examples/bible.txt
|
35
35
|
- examples/debug_batch.rb
|
36
|
-
- examples/
|
36
|
+
- examples/experiment_example.rb
|
37
37
|
- examples/sample_batch.rb
|
38
38
|
- examples/taoteching.txt
|
39
|
-
- examples/
|
39
|
+
- examples/ukp_experiment.rb
|
40
40
|
- lib/batch_experiment.rb
|
41
41
|
- lib/batch_experiment/extractor.rb
|
42
42
|
- lib/batch_experiment/sample_extractors.rb
|