rbbt-util 3.0.2 → 3.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt/util/log.rb +8 -2
- data/lib/rbbt/util/misc.rb +31 -3
- data/lib/rbbt/util/open.rb +16 -3
- data/lib/rbbt/util/persistence.rb +52 -37
- data/lib/rbbt/util/resource.rb +8 -0
- data/lib/rbbt/util/task.rb +41 -281
- data/lib/rbbt/util/task/job.rb +276 -0
- data/lib/rbbt/util/tsv/attach.rb +86 -19
- data/lib/rbbt/util/workflow.rb +18 -24
- data/test/rbbt/util/test_misc.rb +42 -10
- data/test/rbbt/util/test_task.rb +7 -6
- data/test/rbbt/util/test_tc_hash.rb +20 -2
- data/test/rbbt/util/test_workflow.rb +38 -8
- data/test/rbbt/util/tsv/test_attach.rb +17 -0
- metadata +19 -4
@@ -0,0 +1,276 @@
|
|
1
|
+
require 'rbbt/util/misc'
|
2
|
+
|
3
|
+
class Task
|
4
|
+
class Job
|
5
|
+
attr_accessor :task, :id, :name, :options, :previsous_jobs, :required_files, :pid, :path, :previous_jobs, :input
|
6
|
+
|
7
|
+
IDSEP = "_"
|
8
|
+
|
9
|
+
def self.id2name(job_id)
|
10
|
+
job_id.split(IDSEP)
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.load(task, id)
|
14
|
+
name, hash = id2name(id)
|
15
|
+
job = self.new task, id, name, nil, nil
|
16
|
+
job.load_dependencies
|
17
|
+
job
|
18
|
+
end
|
19
|
+
|
20
|
+
def initialize(task, id, name, options = nil, previous_jobs = nil, required_files = nil, input = nil)
|
21
|
+
@task = task
|
22
|
+
@id =id
|
23
|
+
@name = name
|
24
|
+
@options = options || {}
|
25
|
+
@previous_jobs = previous_jobs || []
|
26
|
+
@required_files = required_files || []
|
27
|
+
@input = input
|
28
|
+
|
29
|
+
basedir = task.workflow.jobdir unless task.workflow.nil?
|
30
|
+
@path = File.join(basedir || Task.basedir, task.name, id)
|
31
|
+
end
|
32
|
+
|
33
|
+
def previous_jobs_rec
|
34
|
+
return [] if previous_jobs.nil?
|
35
|
+
previous_jobs + previous_jobs.collect{|job| job.previous_jobs_rec}.flatten
|
36
|
+
end
|
37
|
+
|
38
|
+
def previous_jobs=(previous_jobs)
|
39
|
+
@previous_jobs = previous_jobs
|
40
|
+
@all_inputs = nil
|
41
|
+
end
|
42
|
+
|
43
|
+
def all_inputs
|
44
|
+
if true or not defined? @all_inputs
|
45
|
+
@all_inputs = {}
|
46
|
+
previous_jobs_rec.each do |job| @all_inputs[job.task.name] = job end
|
47
|
+
@all_inputs.extend IndiferentHash
|
48
|
+
@all_inputs
|
49
|
+
else
|
50
|
+
@all_inputs
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def input(name = nil)
|
55
|
+
if name.nil?
|
56
|
+
if @input.nil?
|
57
|
+
nil
|
58
|
+
else
|
59
|
+
@input.load
|
60
|
+
end
|
61
|
+
else
|
62
|
+
all_inputs[name]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def previous_jobs
|
67
|
+
if @previous_jobs.nil?
|
68
|
+
nil
|
69
|
+
else
|
70
|
+
NamedArray.name @previous_jobs, @previous_jobs.collect{|job| job.task.name}
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def info_file
|
75
|
+
path + '.info'
|
76
|
+
end
|
77
|
+
|
78
|
+
def info
|
79
|
+
return {} if not File.exists?(info_file)
|
80
|
+
info = YAML.load(File.open(info_file))
|
81
|
+
info.extend IndiferentHash
|
82
|
+
end
|
83
|
+
|
84
|
+
def set_info(key, value)
|
85
|
+
Misc.lock(info_file, key, value) do |info_file, key, value|
|
86
|
+
i = self.info
|
87
|
+
new_info = i.merge(key => value)
|
88
|
+
Open.write(info_file, new_info.to_yaml)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def step(name = nil, message = nil)
|
93
|
+
@previous_jobs
|
94
|
+
if name.nil?
|
95
|
+
info[:step]
|
96
|
+
else
|
97
|
+
set_info(:step, name)
|
98
|
+
if message.nil?
|
99
|
+
Log.info "[#{task.name}] Step '#{name}'"
|
100
|
+
else
|
101
|
+
Log.info "[#{task.name}] Step '#{name}': #{message.chomp}"
|
102
|
+
set_info(:messages, info[:messages] || [] << message) if not message.nil?
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def messages
|
108
|
+
info[:messages] || []
|
109
|
+
end
|
110
|
+
|
111
|
+
def done?
|
112
|
+
[:done, :error, :aborted].include? info[:step]
|
113
|
+
end
|
114
|
+
|
115
|
+
def error?
|
116
|
+
step == :error or step == :aborted
|
117
|
+
end
|
118
|
+
|
119
|
+
def arguments
|
120
|
+
options.values_at *task.options
|
121
|
+
end
|
122
|
+
|
123
|
+
def block
|
124
|
+
task.block
|
125
|
+
end
|
126
|
+
|
127
|
+
def run_dependencies
|
128
|
+
required_files.each do |file| file.produce unless File.exists? file end unless required_files.nil?
|
129
|
+
previous_jobs.each do |job| job.start unless File.exists? job.path; job.set_info(:step, :done) end unless previous_jobs.nil?
|
130
|
+
end
|
131
|
+
|
132
|
+
def save_dependencies
|
133
|
+
set_info :previous_jobs, @previous_jobs.collect{|job| "JOB:#{job.task.name}/#{job.id}"} unless @previous_jobs.nil?
|
134
|
+
set_info :required_files, @required_files.collect{|file| file.responds_to? :find ? file.find : file} if @required_files.nil?
|
135
|
+
end
|
136
|
+
|
137
|
+
def load_dependencies
|
138
|
+
@previous_jobs = info[:previous_jobs].collect do |job_string|
|
139
|
+
job_string =~ /JOB:(.*)\/(.*)/
|
140
|
+
task.workflow.load_job($1, $2)
|
141
|
+
end if info[:previous_jobs]
|
142
|
+
@required_files = info[:required_files] if info[:required_files]
|
143
|
+
end
|
144
|
+
|
145
|
+
def start
|
146
|
+
begin
|
147
|
+
run_dependencies
|
148
|
+
|
149
|
+
Log.medium("[#{task.name}] Starting Job '#{ name }'. Path: '#{ path }'")
|
150
|
+
set_info(:start_time, Time.now)
|
151
|
+
save_options(options)
|
152
|
+
save_dependencies
|
153
|
+
|
154
|
+
extend task.scope unless task.scope.nil? or Object == task.scope.class
|
155
|
+
|
156
|
+
result = instance_exec *arguments, &block
|
157
|
+
|
158
|
+
if not result.nil?
|
159
|
+
case task.persistence
|
160
|
+
when nil, :string, :tsv, :integer
|
161
|
+
Open.write(path, result.to_s)
|
162
|
+
when :marshal
|
163
|
+
Open.write(path, Marshal.dump(result))
|
164
|
+
when :yaml
|
165
|
+
Open.write(path, YAML.dump(result))
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
set_info(:end_time, Time.now)
|
170
|
+
Log.medium("[#{task.name}] Finished Job '#{ name }'. Path: '#{ path }'")
|
171
|
+
rescue Exception
|
172
|
+
step(:error, "#{$!.class}: #{$!.message}")
|
173
|
+
raise $!
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def save_options(options)
|
178
|
+
new_options = {}
|
179
|
+
options.each do |key, value|
|
180
|
+
case
|
181
|
+
when TSV === value
|
182
|
+
new_options[key] = value.to_s
|
183
|
+
else
|
184
|
+
new_options[key] = value
|
185
|
+
end
|
186
|
+
end
|
187
|
+
set_info(:options, new_options)
|
188
|
+
end
|
189
|
+
|
190
|
+
def recursive_done?
|
191
|
+
previous_jobs.inject(true){|acc,j| acc and j.recursive_done?} and done?
|
192
|
+
end
|
193
|
+
|
194
|
+
def run
|
195
|
+
return self if recursive_done?
|
196
|
+
begin
|
197
|
+
step(:started)
|
198
|
+
start
|
199
|
+
step(:done)
|
200
|
+
rescue Exception
|
201
|
+
Log.debug $!.message
|
202
|
+
Log.debug $!.backtrace * "\n"
|
203
|
+
step(:error, "#{$!.class}: #{$!.message}")
|
204
|
+
end
|
205
|
+
self
|
206
|
+
end
|
207
|
+
|
208
|
+
def fork
|
209
|
+
return self if recursive_done?
|
210
|
+
@pid = Process.fork do
|
211
|
+
begin
|
212
|
+
step(:started)
|
213
|
+
start
|
214
|
+
step(:done)
|
215
|
+
rescue Exception
|
216
|
+
Log.debug $!.message
|
217
|
+
Log.debug $!.backtrace * "\n"
|
218
|
+
step(:error, "#{$!.class}: #{$!.message}")
|
219
|
+
end
|
220
|
+
exit
|
221
|
+
end
|
222
|
+
|
223
|
+
self
|
224
|
+
end
|
225
|
+
|
226
|
+
def join
|
227
|
+
if @pid.nil?
|
228
|
+
while not done? do
|
229
|
+
Log.debug "Waiting: #{info[:step]}"
|
230
|
+
sleep 5
|
231
|
+
end
|
232
|
+
else
|
233
|
+
Process.waitpid @pid
|
234
|
+
end
|
235
|
+
|
236
|
+
self
|
237
|
+
end
|
238
|
+
|
239
|
+
def open
|
240
|
+
File.open(path)
|
241
|
+
end
|
242
|
+
|
243
|
+
def read
|
244
|
+
File.open(path) do |f| f.read end
|
245
|
+
end
|
246
|
+
|
247
|
+
def load
|
248
|
+
case task.persistence
|
249
|
+
when :float
|
250
|
+
Open.read(path).to_f
|
251
|
+
when :integer
|
252
|
+
Open.read(path).to_i
|
253
|
+
when :string
|
254
|
+
Open.read(path)
|
255
|
+
when :tsv
|
256
|
+
TSV.new(path)
|
257
|
+
when :marshal
|
258
|
+
Marshal.load(Open.read(path))
|
259
|
+
when :yaml
|
260
|
+
YAML.load(Open.read(path))
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
def clean
|
265
|
+
FileUtils.rm path if File.exists? path
|
266
|
+
FileUtils.rm info_file if File.exists? info_file
|
267
|
+
end
|
268
|
+
|
269
|
+
def recursive_clean
|
270
|
+
previous_jobs.each do |job| job.recursive_clean end unless previous_jobs.nil?
|
271
|
+
clean
|
272
|
+
end
|
273
|
+
end # END Job
|
274
|
+
end
|
275
|
+
|
276
|
+
|
data/lib/rbbt/util/tsv/attach.rb
CHANGED
@@ -1,4 +1,46 @@
|
|
1
1
|
class TSV
|
2
|
+
def self.merge_rows(input, output, sep = "\t")
|
3
|
+
is = case
|
4
|
+
when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
|
5
|
+
CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
|
6
|
+
when (String === input or StringIO === input)
|
7
|
+
CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
|
8
|
+
else
|
9
|
+
input
|
10
|
+
end
|
11
|
+
|
12
|
+
current_key = nil
|
13
|
+
current_parts = []
|
14
|
+
|
15
|
+
done = false
|
16
|
+
Open.write(output) do |os|
|
17
|
+
|
18
|
+
done = is.eof?
|
19
|
+
while not done
|
20
|
+
key, *parts = is.gets.sub("\n",'').split(sep, -1)
|
21
|
+
current_key ||= key
|
22
|
+
case
|
23
|
+
when key.nil?
|
24
|
+
when current_key == key
|
25
|
+
parts.each_with_index do |part,i|
|
26
|
+
if current_parts[i].nil?
|
27
|
+
current_parts[i] = part
|
28
|
+
else
|
29
|
+
current_parts[i] = current_parts[i] << "|" << part
|
30
|
+
end
|
31
|
+
end
|
32
|
+
when current_key != key
|
33
|
+
os.puts [current_key, current_parts].flatten * sep
|
34
|
+
current_key = key
|
35
|
+
current_parts = parts
|
36
|
+
end
|
37
|
+
|
38
|
+
done = is.eof?
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
2
44
|
def self.paste_merge(file1, file2, output, sep = "\t")
|
3
45
|
case
|
4
46
|
when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
|
@@ -186,7 +228,7 @@ class TSV
|
|
186
228
|
if other.include? key
|
187
229
|
new_values = other[key].values_at *fields
|
188
230
|
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
189
|
-
new_values.collect!{|v| v.first} if not type == :double and other.type == :double
|
231
|
+
new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
|
190
232
|
self[key] = self[key].concat new_values
|
191
233
|
else
|
192
234
|
if type == :double
|
@@ -223,8 +265,8 @@ class TSV
|
|
223
265
|
end
|
224
266
|
end
|
225
267
|
|
226
|
-
new_values.collect!{|v| [v]}
|
227
|
-
new_values.collect!{|v| v.first} if not type == :double and other.type == :double
|
268
|
+
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
269
|
+
new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
|
228
270
|
all_new_values << new_values
|
229
271
|
end
|
230
272
|
end
|
@@ -274,7 +316,7 @@ class TSV
|
|
274
316
|
end
|
275
317
|
end
|
276
318
|
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
277
|
-
new_values.collect!{|v| v.first} if not type == :double and other.type == :double
|
319
|
+
new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
|
278
320
|
all_new_values << new_values
|
279
321
|
end
|
280
322
|
end
|
@@ -330,8 +372,8 @@ class TSV
|
|
330
372
|
end
|
331
373
|
|
332
374
|
def self.build_traverse_index(files, options = {})
|
333
|
-
options
|
334
|
-
in_namespace
|
375
|
+
options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
|
376
|
+
in_namespace = options[:in_namespace]
|
335
377
|
persist_input = options[:persist_input]
|
336
378
|
|
337
379
|
path = find_path(files, options)
|
@@ -339,26 +381,42 @@ class TSV
|
|
339
381
|
return nil if path.nil?
|
340
382
|
|
341
383
|
traversal_ids = path.collect{|p| p.first}
|
342
|
-
|
343
|
-
Log.medium "Found Traversal: #{traversal_ids * " => "}"
|
344
|
-
|
345
|
-
current_id, current_file = path.shift
|
346
|
-
current_key = current_file.all_fields.first
|
347
384
|
|
348
|
-
|
385
|
+
Log.medium "Found Traversal: #{traversal_ids * " => "}"
|
386
|
+
|
387
|
+
data_key, data_file = path.shift
|
388
|
+
if data_key == data_file.key_field
|
389
|
+
Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
|
390
|
+
data_index = nil
|
391
|
+
else
|
392
|
+
Log.debug "Data index required"
|
393
|
+
data_index = data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
|
394
|
+
end
|
349
395
|
|
396
|
+
current_index = data_index
|
397
|
+
current_key = data_key
|
350
398
|
while not path.empty?
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
current_index.
|
399
|
+
next_key, next_file = path.shift
|
400
|
+
|
401
|
+
if current_index.nil?
|
402
|
+
current_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
|
403
|
+
else
|
404
|
+
next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
|
405
|
+
current_index.process current_index.fields.first do |key, values, values|
|
406
|
+
if values.nil?
|
407
|
+
nil
|
408
|
+
else
|
409
|
+
next_index.values_at(*values).flatten.collect
|
410
|
+
end
|
411
|
+
end
|
412
|
+
current_index.fields = [next_key]
|
355
413
|
end
|
356
|
-
index.fields = current_index.fields
|
357
414
|
end
|
358
415
|
|
359
|
-
|
416
|
+
current_index
|
360
417
|
end
|
361
418
|
|
419
|
+
|
362
420
|
def self.find_traversal(tsv1, tsv2, options = {})
|
363
421
|
options = Misc.add_defaults options, :in_namespace => false
|
364
422
|
in_namespace = options[:in_namespace]
|
@@ -388,14 +446,23 @@ class TSV
|
|
388
446
|
in_namespace = options[:in_namespace]
|
389
447
|
|
390
448
|
fields = other.fields - [key_field].concat(self.fields) if fields == :all
|
391
|
-
|
449
|
+
if in_namespace
|
450
|
+
fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
|
451
|
+
else
|
452
|
+
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
453
|
+
end
|
454
|
+
|
392
455
|
Log.high("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
|
456
|
+
|
457
|
+
other = other.tsv(:persistence => options[:persist_input] == true) unless TSV === other
|
393
458
|
case
|
394
459
|
when key_field == other.key_field
|
395
460
|
attach_same_key other, fields
|
396
461
|
when (not in_namespace and self.fields.include?(other.key_field))
|
462
|
+
Log.medium "Found other's key field: #{other.key_field}"
|
397
463
|
attach_source_key other, other.key_field, fields
|
398
464
|
when (in_namespace and self.fields_in_namespace.include?(other.key_field))
|
465
|
+
Log.medium "Found other's key field in #{in_namespace}: #{other.key_field}"
|
399
466
|
attach_source_key other, other.key_field, fields
|
400
467
|
else
|
401
468
|
index = TSV.find_traversal(self, other, options)
|
data/lib/rbbt/util/workflow.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'rbbt/util/resource'
|
2
2
|
require 'rbbt/util/task'
|
3
3
|
require 'rbbt/util/persistence'
|
4
|
+
require 'rbbt/util/misc'
|
5
|
+
|
4
6
|
module WorkFlow
|
5
7
|
def self.extended(base)
|
6
8
|
class << base
|
@@ -11,10 +13,16 @@ module WorkFlow
|
|
11
13
|
base.extend Resource
|
12
14
|
base.lib_dir = Resource.caller_base_dir if base.class == Object
|
13
15
|
base.tasks = {}
|
16
|
+
base.tasks.extend IndiferentHash
|
14
17
|
base.jobdir = (File.exists?(base.var.find(:lib)) ? base.var.find(:lib) : base.var.find)
|
15
18
|
base.clear_dangling
|
16
19
|
end
|
17
20
|
|
21
|
+
def tasks=(tasks)
|
22
|
+
tasks.extend IndiferentHash
|
23
|
+
@tasks = tasks
|
24
|
+
end
|
25
|
+
|
18
26
|
def local_persist(*args, &block)
|
19
27
|
argsv = *args
|
20
28
|
options = argsv.pop
|
@@ -34,6 +42,7 @@ module WorkFlow
|
|
34
42
|
@dangling_option_types = {}
|
35
43
|
@dangling_option_defaults = {}
|
36
44
|
@dangling_dependencies = nil
|
45
|
+
@dangling_description = nil
|
37
46
|
end
|
38
47
|
|
39
48
|
def task_option(*args)
|
@@ -49,13 +58,18 @@ module WorkFlow
|
|
49
58
|
@dangling_dependencies = dependencies
|
50
59
|
end
|
51
60
|
|
61
|
+
def task_description(description)
|
62
|
+
@dangling_description = description
|
63
|
+
end
|
64
|
+
|
52
65
|
def process_dangling
|
53
66
|
res = [
|
54
67
|
@dangling_options,
|
55
68
|
Hash[*@dangling_options.zip(@dangling_option_descriptions.values_at(*@dangling_options)).flatten],
|
56
69
|
Hash[*@dangling_options.zip(@dangling_option_types.values_at(*@dangling_options)).flatten],
|
57
70
|
Hash[*@dangling_options.zip(@dangling_option_defaults.values_at(*@dangling_options)).flatten],
|
58
|
-
@dangling_dependencies || @last_task,
|
71
|
+
(@dangling_dependencies || [@last_task]).compact,
|
72
|
+
@dangling_description,
|
59
73
|
]
|
60
74
|
|
61
75
|
clear_dangling
|
@@ -70,37 +84,17 @@ module WorkFlow
|
|
70
84
|
persistence = :marshal
|
71
85
|
end
|
72
86
|
|
73
|
-
options, option_descriptions, option_types, option_defaults, dependencies = process_dangling
|
87
|
+
options, option_descriptions, option_types, option_defaults, dependencies, description = process_dangling
|
74
88
|
option_descriptions.delete_if do |k,v| v.nil? end
|
75
89
|
option_types.delete_if do |k,v| v.nil? end
|
76
90
|
option_defaults.delete_if do |k,v| v.nil? end
|
77
|
-
task = Task.new name, persistence, options, option_descriptions, option_types, option_defaults, self, dependencies, self, &block
|
91
|
+
task = Task.new name, persistence, options, option_descriptions, option_types, option_defaults, self, dependencies, self, description, &block
|
78
92
|
tasks[name] = task
|
79
93
|
@last_task = task
|
80
94
|
end
|
81
95
|
|
82
96
|
def job(task, jobname, *args)
|
83
|
-
|
84
|
-
raise "Task #{ task } not found" if task.nil?
|
85
|
-
|
86
|
-
all_options, option_descriptions, option_types, option_defaults = task.recursive_options
|
87
|
-
|
88
|
-
non_optional_arguments = all_options.reject{|option| option_defaults.include? option}
|
89
|
-
run_options = nil
|
90
|
-
|
91
|
-
case
|
92
|
-
when args.length == non_optional_arguments.length
|
93
|
-
run_options = Hash[*non_optional_arguments.zip(args).flatten].merge option_defaults
|
94
|
-
when args.length == non_optional_arguments.length + 1
|
95
|
-
optional_args = args.pop
|
96
|
-
run_options = option_defaults.
|
97
|
-
merge(optional_args).
|
98
|
-
merge(Hash[*non_optional_arguments.zip(args).flatten])
|
99
|
-
else
|
100
|
-
raise "Number of non optional arguments (#{non_optional_arguments * ', '}) does not match given (#{args.flatten * ", "})"
|
101
|
-
end
|
102
|
-
|
103
|
-
task.job(jobname, run_options)
|
97
|
+
tasks[task].job(jobname, *args)
|
104
98
|
end
|
105
99
|
|
106
100
|
def run(*args)
|