rbbt-util 3.0.2 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,276 @@
1
+ require 'rbbt/util/misc'
2
+
3
+ class Task
4
+ class Job
5
+ attr_accessor :task, :id, :name, :options, :previsous_jobs, :required_files, :pid, :path, :previous_jobs, :input
6
+
7
+ IDSEP = "_"
8
+
9
+ def self.id2name(job_id)
10
+ job_id.split(IDSEP)
11
+ end
12
+
13
+ def self.load(task, id)
14
+ name, hash = id2name(id)
15
+ job = self.new task, id, name, nil, nil
16
+ job.load_dependencies
17
+ job
18
+ end
19
+
20
+ def initialize(task, id, name, options = nil, previous_jobs = nil, required_files = nil, input = nil)
21
+ @task = task
22
+ @id =id
23
+ @name = name
24
+ @options = options || {}
25
+ @previous_jobs = previous_jobs || []
26
+ @required_files = required_files || []
27
+ @input = input
28
+
29
+ basedir = task.workflow.jobdir unless task.workflow.nil?
30
+ @path = File.join(basedir || Task.basedir, task.name, id)
31
+ end
32
+
33
+ def previous_jobs_rec
34
+ return [] if previous_jobs.nil?
35
+ previous_jobs + previous_jobs.collect{|job| job.previous_jobs_rec}.flatten
36
+ end
37
+
38
+ def previous_jobs=(previous_jobs)
39
+ @previous_jobs = previous_jobs
40
+ @all_inputs = nil
41
+ end
42
+
43
+ def all_inputs
44
+ if true or not defined? @all_inputs
45
+ @all_inputs = {}
46
+ previous_jobs_rec.each do |job| @all_inputs[job.task.name] = job end
47
+ @all_inputs.extend IndiferentHash
48
+ @all_inputs
49
+ else
50
+ @all_inputs
51
+ end
52
+ end
53
+
54
+ def input(name = nil)
55
+ if name.nil?
56
+ if @input.nil?
57
+ nil
58
+ else
59
+ @input.load
60
+ end
61
+ else
62
+ all_inputs[name]
63
+ end
64
+ end
65
+
66
+ def previous_jobs
67
+ if @previous_jobs.nil?
68
+ nil
69
+ else
70
+ NamedArray.name @previous_jobs, @previous_jobs.collect{|job| job.task.name}
71
+ end
72
+ end
73
+
74
+ def info_file
75
+ path + '.info'
76
+ end
77
+
78
+ def info
79
+ return {} if not File.exists?(info_file)
80
+ info = YAML.load(File.open(info_file))
81
+ info.extend IndiferentHash
82
+ end
83
+
84
+ def set_info(key, value)
85
+ Misc.lock(info_file, key, value) do |info_file, key, value|
86
+ i = self.info
87
+ new_info = i.merge(key => value)
88
+ Open.write(info_file, new_info.to_yaml)
89
+ end
90
+ end
91
+
92
+ def step(name = nil, message = nil)
93
+ @previous_jobs
94
+ if name.nil?
95
+ info[:step]
96
+ else
97
+ set_info(:step, name)
98
+ if message.nil?
99
+ Log.info "[#{task.name}] Step '#{name}'"
100
+ else
101
+ Log.info "[#{task.name}] Step '#{name}': #{message.chomp}"
102
+ set_info(:messages, info[:messages] || [] << message) if not message.nil?
103
+ end
104
+ end
105
+ end
106
+
107
+ def messages
108
+ info[:messages] || []
109
+ end
110
+
111
+ def done?
112
+ [:done, :error, :aborted].include? info[:step]
113
+ end
114
+
115
+ def error?
116
+ step == :error or step == :aborted
117
+ end
118
+
119
+ def arguments
120
+ options.values_at *task.options
121
+ end
122
+
123
+ def block
124
+ task.block
125
+ end
126
+
127
+ def run_dependencies
128
+ required_files.each do |file| file.produce unless File.exists? file end unless required_files.nil?
129
+ previous_jobs.each do |job| job.start unless File.exists? job.path; job.set_info(:step, :done) end unless previous_jobs.nil?
130
+ end
131
+
132
+ def save_dependencies
133
+ set_info :previous_jobs, @previous_jobs.collect{|job| "JOB:#{job.task.name}/#{job.id}"} unless @previous_jobs.nil?
134
+ set_info :required_files, @required_files.collect{|file| file.responds_to? :find ? file.find : file} if @required_files.nil?
135
+ end
136
+
137
+ def load_dependencies
138
+ @previous_jobs = info[:previous_jobs].collect do |job_string|
139
+ job_string =~ /JOB:(.*)\/(.*)/
140
+ task.workflow.load_job($1, $2)
141
+ end if info[:previous_jobs]
142
+ @required_files = info[:required_files] if info[:required_files]
143
+ end
144
+
145
+ def start
146
+ begin
147
+ run_dependencies
148
+
149
+ Log.medium("[#{task.name}] Starting Job '#{ name }'. Path: '#{ path }'")
150
+ set_info(:start_time, Time.now)
151
+ save_options(options)
152
+ save_dependencies
153
+
154
+ extend task.scope unless task.scope.nil? or Object == task.scope.class
155
+
156
+ result = instance_exec *arguments, &block
157
+
158
+ if not result.nil?
159
+ case task.persistence
160
+ when nil, :string, :tsv, :integer
161
+ Open.write(path, result.to_s)
162
+ when :marshal
163
+ Open.write(path, Marshal.dump(result))
164
+ when :yaml
165
+ Open.write(path, YAML.dump(result))
166
+ end
167
+ end
168
+
169
+ set_info(:end_time, Time.now)
170
+ Log.medium("[#{task.name}] Finished Job '#{ name }'. Path: '#{ path }'")
171
+ rescue Exception
172
+ step(:error, "#{$!.class}: #{$!.message}")
173
+ raise $!
174
+ end
175
+ end
176
+
177
+ def save_options(options)
178
+ new_options = {}
179
+ options.each do |key, value|
180
+ case
181
+ when TSV === value
182
+ new_options[key] = value.to_s
183
+ else
184
+ new_options[key] = value
185
+ end
186
+ end
187
+ set_info(:options, new_options)
188
+ end
189
+
190
+ def recursive_done?
191
+ previous_jobs.inject(true){|acc,j| acc and j.recursive_done?} and done?
192
+ end
193
+
194
+ def run
195
+ return self if recursive_done?
196
+ begin
197
+ step(:started)
198
+ start
199
+ step(:done)
200
+ rescue Exception
201
+ Log.debug $!.message
202
+ Log.debug $!.backtrace * "\n"
203
+ step(:error, "#{$!.class}: #{$!.message}")
204
+ end
205
+ self
206
+ end
207
+
208
+ def fork
209
+ return self if recursive_done?
210
+ @pid = Process.fork do
211
+ begin
212
+ step(:started)
213
+ start
214
+ step(:done)
215
+ rescue Exception
216
+ Log.debug $!.message
217
+ Log.debug $!.backtrace * "\n"
218
+ step(:error, "#{$!.class}: #{$!.message}")
219
+ end
220
+ exit
221
+ end
222
+
223
+ self
224
+ end
225
+
226
+ def join
227
+ if @pid.nil?
228
+ while not done? do
229
+ Log.debug "Waiting: #{info[:step]}"
230
+ sleep 5
231
+ end
232
+ else
233
+ Process.waitpid @pid
234
+ end
235
+
236
+ self
237
+ end
238
+
239
+ def open
240
+ File.open(path)
241
+ end
242
+
243
+ def read
244
+ File.open(path) do |f| f.read end
245
+ end
246
+
247
+ def load
248
+ case task.persistence
249
+ when :float
250
+ Open.read(path).to_f
251
+ when :integer
252
+ Open.read(path).to_i
253
+ when :string
254
+ Open.read(path)
255
+ when :tsv
256
+ TSV.new(path)
257
+ when :marshal
258
+ Marshal.load(Open.read(path))
259
+ when :yaml
260
+ YAML.load(Open.read(path))
261
+ end
262
+ end
263
+
264
+ def clean
265
+ FileUtils.rm path if File.exists? path
266
+ FileUtils.rm info_file if File.exists? info_file
267
+ end
268
+
269
+ def recursive_clean
270
+ previous_jobs.each do |job| job.recursive_clean end unless previous_jobs.nil?
271
+ clean
272
+ end
273
+ end # END Job
274
+ end
275
+
276
+
@@ -1,4 +1,46 @@
1
1
  class TSV
2
+ def self.merge_rows(input, output, sep = "\t")
3
+ is = case
4
+ when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
5
+ CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
6
+ when (String === input or StringIO === input)
7
+ CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
8
+ else
9
+ input
10
+ end
11
+
12
+ current_key = nil
13
+ current_parts = []
14
+
15
+ done = false
16
+ Open.write(output) do |os|
17
+
18
+ done = is.eof?
19
+ while not done
20
+ key, *parts = is.gets.sub("\n",'').split(sep, -1)
21
+ current_key ||= key
22
+ case
23
+ when key.nil?
24
+ when current_key == key
25
+ parts.each_with_index do |part,i|
26
+ if current_parts[i].nil?
27
+ current_parts[i] = part
28
+ else
29
+ current_parts[i] = current_parts[i] << "|" << part
30
+ end
31
+ end
32
+ when current_key != key
33
+ os.puts [current_key, current_parts].flatten * sep
34
+ current_key = key
35
+ current_parts = parts
36
+ end
37
+
38
+ done = is.eof?
39
+ end
40
+
41
+ end
42
+ end
43
+
2
44
  def self.paste_merge(file1, file2, output, sep = "\t")
3
45
  case
4
46
  when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
@@ -186,7 +228,7 @@ class TSV
186
228
  if other.include? key
187
229
  new_values = other[key].values_at *fields
188
230
  new_values.collect!{|v| [v]} if type == :double and not other.type == :double
189
- new_values.collect!{|v| v.first} if not type == :double and other.type == :double
231
+ new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
190
232
  self[key] = self[key].concat new_values
191
233
  else
192
234
  if type == :double
@@ -223,8 +265,8 @@ class TSV
223
265
  end
224
266
  end
225
267
 
226
- new_values.collect!{|v| [v]} if type == :double and not other.type == :double
227
- new_values.collect!{|v| v.first} if not type == :double and other.type == :double
268
+ new_values.collect!{|v| [v]} if type == :double and not other.type == :double
269
+ new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
228
270
  all_new_values << new_values
229
271
  end
230
272
  end
@@ -274,7 +316,7 @@ class TSV
274
316
  end
275
317
  end
276
318
  new_values.collect!{|v| [v]} if type == :double and not other.type == :double
277
- new_values.collect!{|v| v.first} if not type == :double and other.type == :double
319
+ new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
278
320
  all_new_values << new_values
279
321
  end
280
322
  end
@@ -330,8 +372,8 @@ class TSV
330
372
  end
331
373
 
332
374
  def self.build_traverse_index(files, options = {})
333
- options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
334
- in_namespace = options[:in_namespace]
375
+ options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
376
+ in_namespace = options[:in_namespace]
335
377
  persist_input = options[:persist_input]
336
378
 
337
379
  path = find_path(files, options)
@@ -339,26 +381,42 @@ class TSV
339
381
  return nil if path.nil?
340
382
 
341
383
  traversal_ids = path.collect{|p| p.first}
342
-
343
- Log.medium "Found Traversal: #{traversal_ids * " => "}"
344
-
345
- current_id, current_file = path.shift
346
- current_key = current_file.all_fields.first
347
384
 
348
- index = current_file.index :target => current_id, :fields => current_key, :persistence => persist_input
385
+ Log.medium "Found Traversal: #{traversal_ids * " => "}"
386
+
387
+ data_key, data_file = path.shift
388
+ if data_key == data_file.key_field
389
+ Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
390
+ data_index = nil
391
+ else
392
+ Log.debug "Data index required"
393
+ data_index = data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
394
+ end
349
395
 
396
+ current_index = data_index
397
+ current_key = data_key
350
398
  while not path.empty?
351
- current_id, current_file = path.shift
352
- current_index = current_file.index :target => current_id, :fields => index.fields.first, :persistence => true
353
- index.process 0 do |value|
354
- current_index.values_at(*value).flatten.uniq
399
+ next_key, next_file = path.shift
400
+
401
+ if current_index.nil?
402
+ current_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
403
+ else
404
+ next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
405
+ current_index.process current_index.fields.first do |key, values, values|
406
+ if values.nil?
407
+ nil
408
+ else
409
+ next_index.values_at(*values).flatten.collect
410
+ end
411
+ end
412
+ current_index.fields = [next_key]
355
413
  end
356
- index.fields = current_index.fields
357
414
  end
358
415
 
359
- index
416
+ current_index
360
417
  end
361
418
 
419
+
362
420
  def self.find_traversal(tsv1, tsv2, options = {})
363
421
  options = Misc.add_defaults options, :in_namespace => false
364
422
  in_namespace = options[:in_namespace]
@@ -388,14 +446,23 @@ class TSV
388
446
  in_namespace = options[:in_namespace]
389
447
 
390
448
  fields = other.fields - [key_field].concat(self.fields) if fields == :all
391
- fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
449
+ if in_namespace
450
+ fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
451
+ else
452
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
453
+ end
454
+
392
455
  Log.high("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
456
+
457
+ other = other.tsv(:persistence => options[:persist_input] == true) unless TSV === other
393
458
  case
394
459
  when key_field == other.key_field
395
460
  attach_same_key other, fields
396
461
  when (not in_namespace and self.fields.include?(other.key_field))
462
+ Log.medium "Found other's key field: #{other.key_field}"
397
463
  attach_source_key other, other.key_field, fields
398
464
  when (in_namespace and self.fields_in_namespace.include?(other.key_field))
465
+ Log.medium "Found other's key field in #{in_namespace}: #{other.key_field}"
399
466
  attach_source_key other, other.key_field, fields
400
467
  else
401
468
  index = TSV.find_traversal(self, other, options)
@@ -1,6 +1,8 @@
1
1
  require 'rbbt/util/resource'
2
2
  require 'rbbt/util/task'
3
3
  require 'rbbt/util/persistence'
4
+ require 'rbbt/util/misc'
5
+
4
6
  module WorkFlow
5
7
  def self.extended(base)
6
8
  class << base
@@ -11,10 +13,16 @@ module WorkFlow
11
13
  base.extend Resource
12
14
  base.lib_dir = Resource.caller_base_dir if base.class == Object
13
15
  base.tasks = {}
16
+ base.tasks.extend IndiferentHash
14
17
  base.jobdir = (File.exists?(base.var.find(:lib)) ? base.var.find(:lib) : base.var.find)
15
18
  base.clear_dangling
16
19
  end
17
20
 
21
+ def tasks=(tasks)
22
+ tasks.extend IndiferentHash
23
+ @tasks = tasks
24
+ end
25
+
18
26
  def local_persist(*args, &block)
19
27
  argsv = *args
20
28
  options = argsv.pop
@@ -34,6 +42,7 @@ module WorkFlow
34
42
  @dangling_option_types = {}
35
43
  @dangling_option_defaults = {}
36
44
  @dangling_dependencies = nil
45
+ @dangling_description = nil
37
46
  end
38
47
 
39
48
  def task_option(*args)
@@ -49,13 +58,18 @@ module WorkFlow
49
58
  @dangling_dependencies = dependencies
50
59
  end
51
60
 
61
+ def task_description(description)
62
+ @dangling_description = description
63
+ end
64
+
52
65
  def process_dangling
53
66
  res = [
54
67
  @dangling_options,
55
68
  Hash[*@dangling_options.zip(@dangling_option_descriptions.values_at(*@dangling_options)).flatten],
56
69
  Hash[*@dangling_options.zip(@dangling_option_types.values_at(*@dangling_options)).flatten],
57
70
  Hash[*@dangling_options.zip(@dangling_option_defaults.values_at(*@dangling_options)).flatten],
58
- @dangling_dependencies || @last_task,
71
+ (@dangling_dependencies || [@last_task]).compact,
72
+ @dangling_description,
59
73
  ]
60
74
 
61
75
  clear_dangling
@@ -70,37 +84,17 @@ module WorkFlow
70
84
  persistence = :marshal
71
85
  end
72
86
 
73
- options, option_descriptions, option_types, option_defaults, dependencies = process_dangling
87
+ options, option_descriptions, option_types, option_defaults, dependencies, description = process_dangling
74
88
  option_descriptions.delete_if do |k,v| v.nil? end
75
89
  option_types.delete_if do |k,v| v.nil? end
76
90
  option_defaults.delete_if do |k,v| v.nil? end
77
- task = Task.new name, persistence, options, option_descriptions, option_types, option_defaults, self, dependencies, self, &block
91
+ task = Task.new name, persistence, options, option_descriptions, option_types, option_defaults, self, dependencies, self, description, &block
78
92
  tasks[name] = task
79
93
  @last_task = task
80
94
  end
81
95
 
82
96
  def job(task, jobname, *args)
83
- task = tasks[task]
84
- raise "Task #{ task } not found" if task.nil?
85
-
86
- all_options, option_descriptions, option_types, option_defaults = task.recursive_options
87
-
88
- non_optional_arguments = all_options.reject{|option| option_defaults.include? option}
89
- run_options = nil
90
-
91
- case
92
- when args.length == non_optional_arguments.length
93
- run_options = Hash[*non_optional_arguments.zip(args).flatten].merge option_defaults
94
- when args.length == non_optional_arguments.length + 1
95
- optional_args = args.pop
96
- run_options = option_defaults.
97
- merge(optional_args).
98
- merge(Hash[*non_optional_arguments.zip(args).flatten])
99
- else
100
- raise "Number of non optional arguments (#{non_optional_arguments * ', '}) does not match given (#{args.flatten * ", "})"
101
- end
102
-
103
- task.job(jobname, run_options)
97
+ tasks[task].job(jobname, *args)
104
98
  end
105
99
 
106
100
  def run(*args)