rbbt-util 3.0.2 → 3.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,276 @@
1
+ require 'rbbt/util/misc'
2
+
3
+ class Task
4
+ class Job
5
+ attr_accessor :task, :id, :name, :options, :previsous_jobs, :required_files, :pid, :path, :previous_jobs, :input
6
+
7
+ IDSEP = "_"
8
+
9
+ def self.id2name(job_id)
10
+ job_id.split(IDSEP)
11
+ end
12
+
13
+ def self.load(task, id)
14
+ name, hash = id2name(id)
15
+ job = self.new task, id, name, nil, nil
16
+ job.load_dependencies
17
+ job
18
+ end
19
+
20
+ def initialize(task, id, name, options = nil, previous_jobs = nil, required_files = nil, input = nil)
21
+ @task = task
22
+ @id =id
23
+ @name = name
24
+ @options = options || {}
25
+ @previous_jobs = previous_jobs || []
26
+ @required_files = required_files || []
27
+ @input = input
28
+
29
+ basedir = task.workflow.jobdir unless task.workflow.nil?
30
+ @path = File.join(basedir || Task.basedir, task.name, id)
31
+ end
32
+
33
+ def previous_jobs_rec
34
+ return [] if previous_jobs.nil?
35
+ previous_jobs + previous_jobs.collect{|job| job.previous_jobs_rec}.flatten
36
+ end
37
+
38
+ def previous_jobs=(previous_jobs)
39
+ @previous_jobs = previous_jobs
40
+ @all_inputs = nil
41
+ end
42
+
43
+ def all_inputs
44
+ if true or not defined? @all_inputs
45
+ @all_inputs = {}
46
+ previous_jobs_rec.each do |job| @all_inputs[job.task.name] = job end
47
+ @all_inputs.extend IndiferentHash
48
+ @all_inputs
49
+ else
50
+ @all_inputs
51
+ end
52
+ end
53
+
54
+ def input(name = nil)
55
+ if name.nil?
56
+ if @input.nil?
57
+ nil
58
+ else
59
+ @input.load
60
+ end
61
+ else
62
+ all_inputs[name]
63
+ end
64
+ end
65
+
66
+ def previous_jobs
67
+ if @previous_jobs.nil?
68
+ nil
69
+ else
70
+ NamedArray.name @previous_jobs, @previous_jobs.collect{|job| job.task.name}
71
+ end
72
+ end
73
+
74
+ def info_file
75
+ path + '.info'
76
+ end
77
+
78
+ def info
79
+ return {} if not File.exists?(info_file)
80
+ info = YAML.load(File.open(info_file))
81
+ info.extend IndiferentHash
82
+ end
83
+
84
+ def set_info(key, value)
85
+ Misc.lock(info_file, key, value) do |info_file, key, value|
86
+ i = self.info
87
+ new_info = i.merge(key => value)
88
+ Open.write(info_file, new_info.to_yaml)
89
+ end
90
+ end
91
+
92
+ def step(name = nil, message = nil)
93
+ @previous_jobs
94
+ if name.nil?
95
+ info[:step]
96
+ else
97
+ set_info(:step, name)
98
+ if message.nil?
99
+ Log.info "[#{task.name}] Step '#{name}'"
100
+ else
101
+ Log.info "[#{task.name}] Step '#{name}': #{message.chomp}"
102
+ set_info(:messages, info[:messages] || [] << message) if not message.nil?
103
+ end
104
+ end
105
+ end
106
+
107
+ def messages
108
+ info[:messages] || []
109
+ end
110
+
111
+ def done?
112
+ [:done, :error, :aborted].include? info[:step]
113
+ end
114
+
115
+ def error?
116
+ step == :error or step == :aborted
117
+ end
118
+
119
+ def arguments
120
+ options.values_at *task.options
121
+ end
122
+
123
+ def block
124
+ task.block
125
+ end
126
+
127
+ def run_dependencies
128
+ required_files.each do |file| file.produce unless File.exists? file end unless required_files.nil?
129
+ previous_jobs.each do |job| job.start unless File.exists? job.path; job.set_info(:step, :done) end unless previous_jobs.nil?
130
+ end
131
+
132
+ def save_dependencies
133
+ set_info :previous_jobs, @previous_jobs.collect{|job| "JOB:#{job.task.name}/#{job.id}"} unless @previous_jobs.nil?
134
+ set_info :required_files, @required_files.collect{|file| file.responds_to? :find ? file.find : file} if @required_files.nil?
135
+ end
136
+
137
+ def load_dependencies
138
+ @previous_jobs = info[:previous_jobs].collect do |job_string|
139
+ job_string =~ /JOB:(.*)\/(.*)/
140
+ task.workflow.load_job($1, $2)
141
+ end if info[:previous_jobs]
142
+ @required_files = info[:required_files] if info[:required_files]
143
+ end
144
+
145
+ def start
146
+ begin
147
+ run_dependencies
148
+
149
+ Log.medium("[#{task.name}] Starting Job '#{ name }'. Path: '#{ path }'")
150
+ set_info(:start_time, Time.now)
151
+ save_options(options)
152
+ save_dependencies
153
+
154
+ extend task.scope unless task.scope.nil? or Object == task.scope.class
155
+
156
+ result = instance_exec *arguments, &block
157
+
158
+ if not result.nil?
159
+ case task.persistence
160
+ when nil, :string, :tsv, :integer
161
+ Open.write(path, result.to_s)
162
+ when :marshal
163
+ Open.write(path, Marshal.dump(result))
164
+ when :yaml
165
+ Open.write(path, YAML.dump(result))
166
+ end
167
+ end
168
+
169
+ set_info(:end_time, Time.now)
170
+ Log.medium("[#{task.name}] Finished Job '#{ name }'. Path: '#{ path }'")
171
+ rescue Exception
172
+ step(:error, "#{$!.class}: #{$!.message}")
173
+ raise $!
174
+ end
175
+ end
176
+
177
+ def save_options(options)
178
+ new_options = {}
179
+ options.each do |key, value|
180
+ case
181
+ when TSV === value
182
+ new_options[key] = value.to_s
183
+ else
184
+ new_options[key] = value
185
+ end
186
+ end
187
+ set_info(:options, new_options)
188
+ end
189
+
190
+ def recursive_done?
191
+ previous_jobs.inject(true){|acc,j| acc and j.recursive_done?} and done?
192
+ end
193
+
194
+ def run
195
+ return self if recursive_done?
196
+ begin
197
+ step(:started)
198
+ start
199
+ step(:done)
200
+ rescue Exception
201
+ Log.debug $!.message
202
+ Log.debug $!.backtrace * "\n"
203
+ step(:error, "#{$!.class}: #{$!.message}")
204
+ end
205
+ self
206
+ end
207
+
208
+ def fork
209
+ return self if recursive_done?
210
+ @pid = Process.fork do
211
+ begin
212
+ step(:started)
213
+ start
214
+ step(:done)
215
+ rescue Exception
216
+ Log.debug $!.message
217
+ Log.debug $!.backtrace * "\n"
218
+ step(:error, "#{$!.class}: #{$!.message}")
219
+ end
220
+ exit
221
+ end
222
+
223
+ self
224
+ end
225
+
226
+ def join
227
+ if @pid.nil?
228
+ while not done? do
229
+ Log.debug "Waiting: #{info[:step]}"
230
+ sleep 5
231
+ end
232
+ else
233
+ Process.waitpid @pid
234
+ end
235
+
236
+ self
237
+ end
238
+
239
+ def open
240
+ File.open(path)
241
+ end
242
+
243
+ def read
244
+ File.open(path) do |f| f.read end
245
+ end
246
+
247
+ def load
248
+ case task.persistence
249
+ when :float
250
+ Open.read(path).to_f
251
+ when :integer
252
+ Open.read(path).to_i
253
+ when :string
254
+ Open.read(path)
255
+ when :tsv
256
+ TSV.new(path)
257
+ when :marshal
258
+ Marshal.load(Open.read(path))
259
+ when :yaml
260
+ YAML.load(Open.read(path))
261
+ end
262
+ end
263
+
264
+ def clean
265
+ FileUtils.rm path if File.exists? path
266
+ FileUtils.rm info_file if File.exists? info_file
267
+ end
268
+
269
+ def recursive_clean
270
+ previous_jobs.each do |job| job.recursive_clean end unless previous_jobs.nil?
271
+ clean
272
+ end
273
+ end # END Job
274
+ end
275
+
276
+
@@ -1,4 +1,46 @@
1
1
  class TSV
2
+ def self.merge_rows(input, output, sep = "\t")
3
+ is = case
4
+ when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
5
+ CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
6
+ when (String === input or StringIO === input)
7
+ CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
8
+ else
9
+ input
10
+ end
11
+
12
+ current_key = nil
13
+ current_parts = []
14
+
15
+ done = false
16
+ Open.write(output) do |os|
17
+
18
+ done = is.eof?
19
+ while not done
20
+ key, *parts = is.gets.sub("\n",'').split(sep, -1)
21
+ current_key ||= key
22
+ case
23
+ when key.nil?
24
+ when current_key == key
25
+ parts.each_with_index do |part,i|
26
+ if current_parts[i].nil?
27
+ current_parts[i] = part
28
+ else
29
+ current_parts[i] = current_parts[i] << "|" << part
30
+ end
31
+ end
32
+ when current_key != key
33
+ os.puts [current_key, current_parts].flatten * sep
34
+ current_key = key
35
+ current_parts = parts
36
+ end
37
+
38
+ done = is.eof?
39
+ end
40
+
41
+ end
42
+ end
43
+
2
44
  def self.paste_merge(file1, file2, output, sep = "\t")
3
45
  case
4
46
  when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
@@ -186,7 +228,7 @@ class TSV
186
228
  if other.include? key
187
229
  new_values = other[key].values_at *fields
188
230
  new_values.collect!{|v| [v]} if type == :double and not other.type == :double
189
- new_values.collect!{|v| v.first} if not type == :double and other.type == :double
231
+ new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
190
232
  self[key] = self[key].concat new_values
191
233
  else
192
234
  if type == :double
@@ -223,8 +265,8 @@ class TSV
223
265
  end
224
266
  end
225
267
 
226
- new_values.collect!{|v| [v]} if type == :double and not other.type == :double
227
- new_values.collect!{|v| v.first} if not type == :double and other.type == :double
268
+ new_values.collect!{|v| [v]} if type == :double and not other.type == :double
269
+ new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
228
270
  all_new_values << new_values
229
271
  end
230
272
  end
@@ -274,7 +316,7 @@ class TSV
274
316
  end
275
317
  end
276
318
  new_values.collect!{|v| [v]} if type == :double and not other.type == :double
277
- new_values.collect!{|v| v.first} if not type == :double and other.type == :double
319
+ new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
278
320
  all_new_values << new_values
279
321
  end
280
322
  end
@@ -330,8 +372,8 @@ class TSV
330
372
  end
331
373
 
332
374
  def self.build_traverse_index(files, options = {})
333
- options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
334
- in_namespace = options[:in_namespace]
375
+ options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
376
+ in_namespace = options[:in_namespace]
335
377
  persist_input = options[:persist_input]
336
378
 
337
379
  path = find_path(files, options)
@@ -339,26 +381,42 @@ class TSV
339
381
  return nil if path.nil?
340
382
 
341
383
  traversal_ids = path.collect{|p| p.first}
342
-
343
- Log.medium "Found Traversal: #{traversal_ids * " => "}"
344
-
345
- current_id, current_file = path.shift
346
- current_key = current_file.all_fields.first
347
384
 
348
- index = current_file.index :target => current_id, :fields => current_key, :persistence => persist_input
385
+ Log.medium "Found Traversal: #{traversal_ids * " => "}"
386
+
387
+ data_key, data_file = path.shift
388
+ if data_key == data_file.key_field
389
+ Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
390
+ data_index = nil
391
+ else
392
+ Log.debug "Data index required"
393
+ data_index = data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
394
+ end
349
395
 
396
+ current_index = data_index
397
+ current_key = data_key
350
398
  while not path.empty?
351
- current_id, current_file = path.shift
352
- current_index = current_file.index :target => current_id, :fields => index.fields.first, :persistence => true
353
- index.process 0 do |value|
354
- current_index.values_at(*value).flatten.uniq
399
+ next_key, next_file = path.shift
400
+
401
+ if current_index.nil?
402
+ current_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
403
+ else
404
+ next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
405
+ current_index.process current_index.fields.first do |key, values, values|
406
+ if values.nil?
407
+ nil
408
+ else
409
+ next_index.values_at(*values).flatten.collect
410
+ end
411
+ end
412
+ current_index.fields = [next_key]
355
413
  end
356
- index.fields = current_index.fields
357
414
  end
358
415
 
359
- index
416
+ current_index
360
417
  end
361
418
 
419
+
362
420
  def self.find_traversal(tsv1, tsv2, options = {})
363
421
  options = Misc.add_defaults options, :in_namespace => false
364
422
  in_namespace = options[:in_namespace]
@@ -388,14 +446,23 @@ class TSV
388
446
  in_namespace = options[:in_namespace]
389
447
 
390
448
  fields = other.fields - [key_field].concat(self.fields) if fields == :all
391
- fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
449
+ if in_namespace
450
+ fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
451
+ else
452
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
453
+ end
454
+
392
455
  Log.high("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
456
+
457
+ other = other.tsv(:persistence => options[:persist_input] == true) unless TSV === other
393
458
  case
394
459
  when key_field == other.key_field
395
460
  attach_same_key other, fields
396
461
  when (not in_namespace and self.fields.include?(other.key_field))
462
+ Log.medium "Found other's key field: #{other.key_field}"
397
463
  attach_source_key other, other.key_field, fields
398
464
  when (in_namespace and self.fields_in_namespace.include?(other.key_field))
465
+ Log.medium "Found other's key field in #{in_namespace}: #{other.key_field}"
399
466
  attach_source_key other, other.key_field, fields
400
467
  else
401
468
  index = TSV.find_traversal(self, other, options)
@@ -1,6 +1,8 @@
1
1
  require 'rbbt/util/resource'
2
2
  require 'rbbt/util/task'
3
3
  require 'rbbt/util/persistence'
4
+ require 'rbbt/util/misc'
5
+
4
6
  module WorkFlow
5
7
  def self.extended(base)
6
8
  class << base
@@ -11,10 +13,16 @@ module WorkFlow
11
13
  base.extend Resource
12
14
  base.lib_dir = Resource.caller_base_dir if base.class == Object
13
15
  base.tasks = {}
16
+ base.tasks.extend IndiferentHash
14
17
  base.jobdir = (File.exists?(base.var.find(:lib)) ? base.var.find(:lib) : base.var.find)
15
18
  base.clear_dangling
16
19
  end
17
20
 
21
+ def tasks=(tasks)
22
+ tasks.extend IndiferentHash
23
+ @tasks = tasks
24
+ end
25
+
18
26
  def local_persist(*args, &block)
19
27
  argsv = *args
20
28
  options = argsv.pop
@@ -34,6 +42,7 @@ module WorkFlow
34
42
  @dangling_option_types = {}
35
43
  @dangling_option_defaults = {}
36
44
  @dangling_dependencies = nil
45
+ @dangling_description = nil
37
46
  end
38
47
 
39
48
  def task_option(*args)
@@ -49,13 +58,18 @@ module WorkFlow
49
58
  @dangling_dependencies = dependencies
50
59
  end
51
60
 
61
+ def task_description(description)
62
+ @dangling_description = description
63
+ end
64
+
52
65
  def process_dangling
53
66
  res = [
54
67
  @dangling_options,
55
68
  Hash[*@dangling_options.zip(@dangling_option_descriptions.values_at(*@dangling_options)).flatten],
56
69
  Hash[*@dangling_options.zip(@dangling_option_types.values_at(*@dangling_options)).flatten],
57
70
  Hash[*@dangling_options.zip(@dangling_option_defaults.values_at(*@dangling_options)).flatten],
58
- @dangling_dependencies || @last_task,
71
+ (@dangling_dependencies || [@last_task]).compact,
72
+ @dangling_description,
59
73
  ]
60
74
 
61
75
  clear_dangling
@@ -70,37 +84,17 @@ module WorkFlow
70
84
  persistence = :marshal
71
85
  end
72
86
 
73
- options, option_descriptions, option_types, option_defaults, dependencies = process_dangling
87
+ options, option_descriptions, option_types, option_defaults, dependencies, description = process_dangling
74
88
  option_descriptions.delete_if do |k,v| v.nil? end
75
89
  option_types.delete_if do |k,v| v.nil? end
76
90
  option_defaults.delete_if do |k,v| v.nil? end
77
- task = Task.new name, persistence, options, option_descriptions, option_types, option_defaults, self, dependencies, self, &block
91
+ task = Task.new name, persistence, options, option_descriptions, option_types, option_defaults, self, dependencies, self, description, &block
78
92
  tasks[name] = task
79
93
  @last_task = task
80
94
  end
81
95
 
82
96
  def job(task, jobname, *args)
83
- task = tasks[task]
84
- raise "Task #{ task } not found" if task.nil?
85
-
86
- all_options, option_descriptions, option_types, option_defaults = task.recursive_options
87
-
88
- non_optional_arguments = all_options.reject{|option| option_defaults.include? option}
89
- run_options = nil
90
-
91
- case
92
- when args.length == non_optional_arguments.length
93
- run_options = Hash[*non_optional_arguments.zip(args).flatten].merge option_defaults
94
- when args.length == non_optional_arguments.length + 1
95
- optional_args = args.pop
96
- run_options = option_defaults.
97
- merge(optional_args).
98
- merge(Hash[*non_optional_arguments.zip(args).flatten])
99
- else
100
- raise "Number of non optional arguments (#{non_optional_arguments * ', '}) does not match given (#{args.flatten * ", "})"
101
- end
102
-
103
- task.job(jobname, run_options)
97
+ tasks[task].job(jobname, *args)
104
98
  end
105
99
 
106
100
  def run(*args)