rbbt-util 5.26.77 → 5.26.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,8 @@ require 'rbbt/persist'
2
2
  require 'rbbt/persist/tsv'
3
3
  require 'rbbt/util/log'
4
4
  require 'rbbt/util/semaphore'
5
- require 'rbbt/workflow/accessor'
5
+ require 'rbbt/workflow/step/accessor'
6
+ require 'rbbt/workflow/step/prepare'
6
7
 
7
8
  class Step
8
9
  attr_accessor :clean_name, :path, :task, :workflow, :inputs, :dependencies, :bindings
@@ -140,7 +141,6 @@ class Step
140
141
  dep.inputs.zip(dep.inputs.fields).each do |v,f|
141
142
  if i.include?(f) && i[f] != v
142
143
  Log.debug "Conflict in #{ f }: #{[Misc.fingerprint(i[f]), Misc.fingerprint(v)] * " <-> "}"
143
- i[f] = nil
144
144
  else
145
145
  i[f] = v
146
146
  end
@@ -309,6 +309,46 @@ class Step
309
309
  child_pid
310
310
  end
311
311
 
312
+ def cmd(*args)
313
+ all_args = *args
314
+
315
+ all_args << {} unless Hash === all_args.last
316
+
317
+ level = all_args.last[:log] || 0
318
+ level = 0 if TrueClass === level
319
+ level = 10 if FalseClass === level
320
+ level = level.to_i
321
+
322
+ all_args.last[:log] = true
323
+ all_args.last[:pipe] = true
324
+
325
+ io = CMD.cmd(*all_args)
326
+ child_pid = io.pids.first
327
+
328
+ children_pids = info[:children_pids]
329
+ if children_pids.nil?
330
+ children_pids = [child_pid]
331
+ else
332
+ children_pids << child_pid
333
+ end
334
+ set_info :children_pids, children_pids
335
+
336
+ while c = io.getc
337
+ STDERR << c if Log.severity <= level
338
+ if c == "\n"
339
+ if pid
340
+ Log.logn "STDOUT [#{pid}]: ", level
341
+ else
342
+ Log.logn "STDOUT: ", level
343
+ end
344
+ end
345
+ end
346
+
347
+ io.join
348
+
349
+ nil
350
+ end
351
+
312
352
 
313
353
  def load
314
354
  res = begin
@@ -0,0 +1,685 @@
1
+ class Step
2
+
3
+ INFO_SERIALIAZER = Marshal
4
+
5
+ def self.wait_for_jobs(jobs)
6
+ jobs = [jobs] if Step === jobs
7
+ begin
8
+ threads = []
9
+
10
+ threads = jobs.collect do |j|
11
+ Thread.new do
12
+ begin
13
+ j.join unless j.done?
14
+ rescue Exception
15
+ Log.error "Exception waiting for job: #{Log.color :blue, j.path}"
16
+ raise $!
17
+ end
18
+ end
19
+ end
20
+
21
+ threads.each{|t| t.join }
22
+ rescue Exception
23
+ threads.each{|t| t.exit }
24
+ jobs.each do |j| j.abort end
25
+ raise $!
26
+ end
27
+ end
28
+
29
+ def self.files_dir(path)
30
+ path.nil? ? nil : path + '.files'
31
+ end
32
+
33
+ def self.info_file(path)
34
+ path.nil? ? nil : path + '.info'
35
+ end
36
+
37
+ def self.tmp_path(path)
38
+ path = path.find if Path === path
39
+ path = File.expand_path(path)
40
+ dir = File.dirname(path)
41
+ filename = File.basename(path)
42
+ File.join(dir, '.' << filename)
43
+ end
44
+
45
+ def self.md5_file(path)
46
+ path.nil? ? nil : path + '.md5'
47
+ end
48
+
49
+ def self.pid_file(path)
50
+ path.nil? ? nil : path + '.pid'
51
+ end
52
+
53
+ def self.step_info(path)
54
+ begin
55
+ Open.open(info_file(path), :mode => 'rb') do |f|
56
+ INFO_SERIALIAZER.load(f)
57
+ end
58
+ rescue Exception
59
+ Log.exception $!
60
+ {}
61
+ end
62
+ end
63
+
64
+ def self.job_name_for_info_file(info_file, extension = nil)
65
+ if extension and not extension.empty?
66
+ info_file.sub(/\.#{extension}\.info$/,'')
67
+ else
68
+ info_file.sub(/\.info$/,'')
69
+ end
70
+ end
71
+
72
+ def self.save_inputs(inputs, input_types, dir)
73
+ inputs.each do |name,value|
74
+ type = input_types[name].to_s
75
+ path = File.join(dir, name.to_s)
76
+
77
+ Log.debug "Saving job input #{name} (#{type}) into #{path}"
78
+ case
79
+ when Array === value
80
+ Open.write(path, value * "\n")
81
+ when IO === value
82
+ Open.write(path, value)
83
+ when type == "file"
84
+ if String === value && File.exists?(value)
85
+ Open.link(value, path)
86
+ else
87
+ Open.write(path + '.read', value.to_s)
88
+ end
89
+ else
90
+ Open.write(path, value.to_s)
91
+ end
92
+ end.any?
93
+ end
94
+
95
+ def self.save_job_inputs(job, dir, options = nil)
96
+ options = IndiferentHash.setup options.dup if options
97
+
98
+ task_name = job.task_name
99
+ workflow = job.workflow
100
+ workflow = Kernel.const_get workflow if String === workflow
101
+ task_info = workflow.task_info(task_name)
102
+ input_types = task_info[:input_types]
103
+ task_inputs = task_info[:inputs]
104
+
105
+ inputs = {}
106
+ job.recursive_inputs.zip(job.recursive_inputs.fields).each do |value,name|
107
+ next unless task_inputs.include? name.to_sym
108
+ next if options and ! options.include?(name)
109
+ next if value.nil?
110
+ inputs[name] = value
111
+ end
112
+ save_inputs(inputs, input_types, dir)
113
+
114
+ inputs.any?
115
+ end
116
+
117
+ def name
118
+ @name ||= path.sub(/.*\/#{Regexp.quote task_name.to_s}\/(.*)/, '\1')
119
+ end
120
+
121
+ def short_path
122
+ [task_name, name] * "/"
123
+ end
124
+
125
+ def task_name
126
+ @task_name ||= task.name
127
+ end
128
+
129
+ # {{{ INFO
130
+
131
+ def info_file
132
+ @info_file ||= Step.info_file(path)
133
+ end
134
+
135
+ def pid_file
136
+ @pid_file ||= Step.pid_file(path)
137
+ end
138
+
139
+ def info_lock
140
+ @info_lock = begin
141
+ path = Persist.persistence_path(info_file + '.lock', {:dir => Step.lock_dir})
142
+ #Lockfile.new path, :refresh => false, :dont_use_lock_id => true
143
+ Lockfile.new path
144
+ end if @info_lock.nil?
145
+ @info_lock
146
+ end
147
+
148
+ def status_lock
149
+ return @mutex
150
+ #@status_lock = begin
151
+ # path = Persist.persistence_path(info_file + '.status.lock', {:dir => Step.lock_dir})
152
+ # Lockfile.new path, :refresh => false, :dont_use_lock_id => true
153
+ # end if @status_lock.nil?
154
+ #@status_lock
155
+ end
156
+
157
+ def info(check_lock = true)
158
+ return {:status => :noinfo} if info_file.nil? or not Open.exists? info_file
159
+ begin
160
+ Misc.insist do
161
+ begin
162
+ return @info_cache if @info_cache and @info_cache_time and Open.ctime(info_file) < @info_cache_time
163
+ rescue Exception
164
+ raise $!
165
+ end
166
+
167
+ begin
168
+ @info_cache = Misc.insist(3, 1.6, info_file) do
169
+ Misc.insist(2, 1, info_file) do
170
+ Misc.insist(3, 0.2, info_file) do
171
+ raise TryAgain, "Info locked" if check_lock and info_lock.locked?
172
+ info_lock.lock if check_lock and false
173
+ begin
174
+ Open.open(info_file, :mode => 'rb') do |file|
175
+ INFO_SERIALIAZER.load(file) #|| {}
176
+ end
177
+ ensure
178
+ info_lock.unlock if check_lock and false
179
+ end
180
+ end
181
+ end
182
+ end
183
+ @info_cache_time = Time.now
184
+ @info_cache
185
+ end
186
+ end
187
+ rescue Exception
188
+ Log.debug{"Error loading info file: " + info_file}
189
+ Log.exception $!
190
+ Open.rm info_file
191
+ Misc.sensiblewrite(info_file, INFO_SERIALIAZER.dump({:status => :error, :messages => ["Info file lost"]}))
192
+ raise $!
193
+ end
194
+ end
195
+
196
+ def init_info(force = false)
197
+ return nil if @exec or info_file.nil? or (Open.exists?(info_file) and ! force)
198
+ Open.lock(info_file, :lock => info_lock) do
199
+ i = {:status => :waiting, :pid => Process.pid, :path => path}
200
+ i[:dependencies] = dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]} if dependencies
201
+ @info_cache = i
202
+ Misc.sensiblewrite(info_file, INFO_SERIALIAZER.dump(i), :force => true, :lock => false)
203
+ @info_cache_time = Time.now
204
+ end
205
+ end
206
+
207
+ def set_info(key, value)
208
+ return nil if @exec or info_file.nil?
209
+ return nil if ! writable?
210
+ value = Annotated.purge value if defined? Annotated
211
+ Open.lock(info_file, :lock => info_lock) do
212
+ i = info(false).dup
213
+ i[key] = value
214
+ @info_cache = i
215
+ dump = INFO_SERIALIAZER.dump(i)
216
+ Misc.sensiblewrite(info_file, dump, :force => true, :lock => false)
217
+ @info_cache_time = Time.now
218
+ value
219
+ end
220
+ end
221
+
222
+ def merge_info(hash)
223
+ return nil if @exec or info_file.nil?
224
+ return nil if ! writable?
225
+ value = Annotated.purge value if defined? Annotated
226
+ Open.lock(info_file, :lock => info_lock) do
227
+ i = info(false)
228
+ i.merge! hash
229
+ @info_cache = i
230
+ dump = INFO_SERIALIAZER.dump(i)
231
+ Misc.sensiblewrite(info_file, dump, :force => true, :lock => false)
232
+ @info_cache_time = Time.now
233
+ value
234
+ end
235
+ end
236
+
237
+ def status
238
+ begin
239
+ info[:status]
240
+ rescue Exception
241
+ Log.error "Exception reading status: #{$!.message}"
242
+ :error
243
+ end
244
+ end
245
+
246
+ def status=(status)
247
+ set_info(:status, status)
248
+ end
249
+
250
+ def messages
251
+ if messages = info[:messages]
252
+ messages
253
+ else
254
+ set_info(:messages, []) if self.respond_to?(:set_info)
255
+ end
256
+ end
257
+
258
+ def message(message)
259
+ message = Log.uncolor(message)
260
+ set_info(:messages, (messages || []) << message)
261
+ end
262
+
263
+ def self.status_color(status)
264
+ status = status.split(">").last
265
+ case status
266
+ when "starting"
267
+ :yellow
268
+ when "error", "aborted"
269
+ :red
270
+ when "done"
271
+ :green
272
+ else
273
+ :cyan
274
+ end
275
+ end
276
+
277
+ def self.log_block(status, message, path, &block)
278
+ start = Time.now
279
+ status = status.to_s
280
+ status_color = self.status_color status
281
+
282
+ Log.info do
283
+ now = Time.now
284
+ str = Log.color :reset
285
+ str << "#{ Log.color status_color, status}"
286
+ str << ": #{ message }" if message
287
+ str << " -- #{Log.color :blue, path.to_s}" if path
288
+ str << " #{Log.color :yellow, Process.pid}"
289
+ str
290
+ end
291
+ res = yield
292
+ eend = Time.now
293
+ Log.info do
294
+ now = Time.now
295
+ str = "#{ Log.color :cyan, status.to_s } +#{Log.color :green, "%.2f" % (eend - start)}"
296
+ str << " -- #{Log.color :blue, path.to_s}" if path
297
+ str << " #{Log.color :yellow, Process.pid}"
298
+ str
299
+ end
300
+ res
301
+ end
302
+
303
+ def self.log_string(status, message, path)
304
+ Log.info do
305
+
306
+ status = status.to_s
307
+ status_color = self.status_color status
308
+
309
+ str = Log.color :reset
310
+ str << "#{ Log.color status_color, status}"
311
+ str << ": #{ message }" if message
312
+ str << " -- #{Log.color :blue, path.to_s}" if path
313
+ str << " #{Log.color :yellow, Process.pid}"
314
+ str
315
+ end
316
+ end
317
+
318
+ def self.log_progress(status, options = {}, path = nil, &block)
319
+ options = Misc.add_defaults options, :severity => Log::INFO, :file => path
320
+ max = Misc.process_options options, :max
321
+ Log::ProgressBar.with_bar(max, options) do |bar|
322
+ begin
323
+ res = yield bar
324
+ raise KeepBar.new res if IO === res
325
+ res
326
+ rescue
327
+ Log.exception $!
328
+ raise $!
329
+ end
330
+ end
331
+ end
332
+
333
+ def log_progress(status, options = {}, &block)
334
+ Step.log_progress(status, options, file(:progress), &block)
335
+ end
336
+
337
+ def progress_bar(msg = "Progress", options = nil)
338
+ if Hash === msg and options.nil?
339
+ options = msg
340
+ msg = nil
341
+ end
342
+ options = {} if options.nil?
343
+
344
+ max = options[:max]
345
+ Log::ProgressBar.new_bar(max, {:desc => msg, :file => file(:progress)}.merge(options))
346
+ end
347
+
348
+ def self.log(status, message, path, &block)
349
+ if block
350
+ if Hash === message
351
+ log_progress(status, message, path, &block)
352
+ else
353
+ log_block(status, message, path, &block)
354
+ end
355
+ else
356
+ log_string(status, message, path)
357
+ end
358
+ end
359
+
360
+ def log(status, message = nil, &block)
361
+ self.status = status
362
+ if message
363
+ self.message Log.uncolor(message)
364
+ end
365
+ Step.log(status, message, path, &block)
366
+ end
367
+
368
+ def exception(ex, msg = nil)
369
+ ex_class = ex.class.to_s
370
+ backtrace = ex.backtrace if ex.respond_to?(:backtrace)
371
+ message = ex.message if ex.respond_to?(:message)
372
+ set_info :backtrace, backtrace
373
+ set_info :exception, {:class => ex_class, :message => message, :backtrace => backtrace}
374
+ if msg.nil?
375
+ log :error, "#{ex_class} -- #{message}"
376
+ else
377
+ log :error, "#{msg} -- #{message}"
378
+ end
379
+ self._abort
380
+ end
381
+
382
+ def get_exception
383
+ if info[:exception].nil?
384
+ return Aborted if aborted?
385
+ return Exception.new(messages.last) if error?
386
+ Exception.new ""
387
+ else
388
+ ex_class, ex_message, ex_backtrace = info[:exception].values_at :class, :message, :backtrace
389
+ begin
390
+ klass = Kernel.const_get(ex_class)
391
+ ex = klass.new ex_message
392
+ ex.set_backtrace ex_backtrace unless ex_backtrace.nil? or ex_backtrace.empty?
393
+ ex
394
+ rescue
395
+ Log.exception $!
396
+ Exception.new ex_message
397
+ end
398
+ end
399
+ end
400
+
401
+ def recoverable_error?
402
+ return true if aborted?
403
+ return false unless error?
404
+ begin
405
+ return true unless info[:exception]
406
+ klass = Kernel.const_get(info[:exception][:class])
407
+ ! (klass <= RbbtException)
408
+ rescue Exception
409
+ true
410
+ end
411
+ end
412
+
413
+ def started?
414
+ Open.exists?(path) or (Open.exists?(pid_file) && Open.exists?(info_file))
415
+ end
416
+
417
+ def waiting?
418
+ Open.exists?(info_file) and not started?
419
+ end
420
+
421
+ def dirty_files
422
+ rec_dependencies = self.rec_dependencies
423
+ return [] if rec_dependencies.empty?
424
+ canfail_paths = self.canfail_paths
425
+ dirty_files = rec_dependencies.reject{|dep|
426
+ (defined?(WorkflowRESTClient) && WorkflowRESTClient::RemoteStep === dep) ||
427
+ ! Open.exists?(dep.info_file) ||
428
+ (dep.path && (Open.exists?(dep.path) || Open.remote?(dep.path))) ||
429
+ ((dep.error? || dep.aborted? || dep.waiting?) && (! dep.recoverable_error? || canfail_paths.include?(dep.path)))
430
+ }
431
+ end
432
+
433
+ def dirty?
434
+ return true if Open.exists?(pid_file) && ! ( Open.exists?(info_file) || done? )
435
+ return false unless done? || status == :done
436
+ return false unless ENV["RBBT_UPDATE"] == "true"
437
+
438
+ status = self.status
439
+
440
+ if done? and not (status == :done or status == :ending or status == :producing) and not status == :noinfo
441
+ return true
442
+ end
443
+
444
+ if status == :done and not done?
445
+ return true
446
+ end
447
+
448
+ if dirty_files.any?
449
+ Log.low "Some dirty files found for #{self.path}: #{Misc.fingerprint dirty_files}"
450
+ true
451
+ else
452
+ ! self.updated?
453
+ end
454
+ end
455
+
456
+ def done?
457
+ path and Open.exists? path
458
+ end
459
+
460
+ def streaming?
461
+ (IO === @result) or (not @saved_stream.nil?) or status == :streaming
462
+ end
463
+
464
+ def noinfo?
465
+ status == :noinfo
466
+ end
467
+
468
+ def running?
469
+ return false if ! (started? || status == :ending)
470
+ pid = info[:pid]
471
+ return nil if pid.nil?
472
+
473
+ return false if done? or error? or aborted?
474
+
475
+ if Misc.pid_exists?(pid)
476
+ pid
477
+ else
478
+ done? or error? or aborted?
479
+ end
480
+ end
481
+
482
+ def stalled?
483
+ started? && ! (done? || running? || done? || error? || aborted?)
484
+ end
485
+
486
+ def missing?
487
+ status == :done && ! Open.exists?(path)
488
+ end
489
+
490
+ def error?
491
+ status == :error
492
+ end
493
+
494
+ def nopid?
495
+ pid = info[:pid] || Open.exists?(pid_file)
496
+ ! pid && ! (status.nil? || status == :aborted || status == :done || status == :error)
497
+ end
498
+
499
+ def aborted?
500
+ status = self.status
501
+ status == :aborted || ((status != :noinfo && status != :setup && status != :noinfo) && nopid?)
502
+ end
503
+
504
+ # {{{ INFO
505
+
506
+ def files_dir
507
+ @files_dir ||= Step.files_dir path
508
+ end
509
+
510
+ def tmp_path
511
+ @tmp_path ||= Step.tmp_path path
512
+ end
513
+
514
+ def files
515
+ files = Dir.glob(File.join(files_dir, '**', '*')).reject{|path| File.directory? path}.collect do |path|
516
+ Misc.path_relative_to(files_dir, path)
517
+ end
518
+ files
519
+ end
520
+
521
+ def file(name)
522
+ Path.setup(File.join(files_dir, name.to_s))
523
+ end
524
+
525
+ def save_file(name, content)
526
+ content = case
527
+ when String === content
528
+ content
529
+ when Array === content
530
+ content * "\n"
531
+ when TSV === content
532
+ content.to_s
533
+ when Hash === content
534
+ content.collect{|*p| p * "\t"} * "\n"
535
+ else
536
+ content.to_s
537
+ end
538
+ Open.write(file(name), content)
539
+ end
540
+
541
+ def load_file(name, type = nil, options = {})
542
+ if type.nil? and name =~ /.*\.(\w+)$/
543
+ extension = name.match(/.*\.(\w+)$/)[1]
544
+ case extension
545
+ when "tc"
546
+ type = :tc
547
+ when "tsv"
548
+ type = :tsv
549
+ when "list", "ary", "array"
550
+ type = :array
551
+ when "yaml"
552
+ type = :yaml
553
+ when "marshal"
554
+ type = :marshal
555
+ else
556
+ type = :other
557
+ end
558
+ else
559
+ type ||= :other
560
+ end
561
+
562
+ case type.to_sym
563
+ when :tc
564
+ Persist.open_tokyocabinet(file(name), false)
565
+ when :tsv
566
+ TSV.open Open.open(file(name)), options
567
+ when :array
568
+ #Open.read(file(name)).split /\n|,\s*/
569
+ Open.read(file(name)).split "\n"
570
+ when :yaml
571
+ YAML.load(Open.open(file(name)))
572
+ when :marshal
573
+ Marshal.load(Open.open(file(name)))
574
+ else
575
+ Open.read(file(name))
576
+ end
577
+ end
578
+
579
+ def provenance
580
+ provenance = {}
581
+ dependencies.each do |dep|
582
+ next unless dep.path.exists?
583
+ if Open.exists? dep.info_file
584
+ provenance[dep.path] = dep.provenance if Open.exists? dep.path
585
+ else
586
+ provenance[dep.path] = nil
587
+ end
588
+ end
589
+ {:inputs => info[:inputs], :provenance => provenance}
590
+ end
591
+
592
+ def provenance_paths
593
+ provenance = {}
594
+ dependencies.each do |dep|
595
+ provenance[dep.path] = dep.provenance_paths if Open.exists? dep.path
596
+ end
597
+ provenance
598
+ end
599
+
600
+ def config(key, *tokens)
601
+ options = tokens.pop if Hash === tokens.last
602
+ options ||= {}
603
+
604
+ new_tokens = []
605
+ if workflow
606
+ workflow_name = workflow.to_s
607
+ new_tokens << ("workflow:" << workflow_name)
608
+ new_tokens << ("task:" << workflow_name << "#" << task_name.to_s)
609
+ end
610
+ new_tokens << ("task:" << task_name.to_s)
611
+
612
+ Rbbt::Config.get(key, tokens + new_tokens, options)
613
+ end
614
+
615
+ def access
616
+ CMD.cmd("touch -c -h -a #{self.path} #{self.info_file}")
617
+ end
618
+
619
+ def rec_access
620
+ access
621
+ rec_dependencies.each do |dep|
622
+ dep.access
623
+ end
624
+ end
625
+
626
+ def monitor_stream(stream, options = {}, &block)
627
+ case options[:bar]
628
+ when TrueClass
629
+ bar = progress_bar
630
+ when Hash
631
+ bar = progress_bar options[:bar]
632
+ when Numeric
633
+ bar = progress_bar :max => options[:bar]
634
+ else
635
+ bar = options[:bar]
636
+ end
637
+
638
+ out = if bar.nil?
639
+ Misc.line_monitor_stream stream, &block
640
+ elsif (block.nil? || block.arity == 0)
641
+ Misc.line_monitor_stream stream do
642
+ bar.tick
643
+ end
644
+ elsif block.arity == 1
645
+ Misc.line_monitor_stream stream do |line|
646
+ bar.tick
647
+ block.call line
648
+ end
649
+ elsif block.arity == 2
650
+ Misc.line_monitor_stream stream do |line|
651
+ block.call line, bar
652
+ end
653
+ end
654
+
655
+ ConcurrentStream.setup(out, :abort_callback => Proc.new{
656
+ Log::ProgressBar.remove_bar(bar, true) if bar
657
+ }, :callback => Proc.new{
658
+ Log::ProgressBar.remove_bar(bar) if bar
659
+ })
660
+
661
+ bgzip = (options[:compress] || options[:gzip]).to_s == 'bgzip'
662
+ bgzip = true if options[:bgzip]
663
+
664
+ gzip = true if options[:compress] || options[:gzip]
665
+ if bgzip
666
+ Open.bgzip(out)
667
+ elsif gzip
668
+ Open.gzip(out)
669
+ else
670
+ out
671
+ end
672
+ end
673
+
674
+ def relocated?
675
+ done? && info[:path] && info[:path] != path
676
+ end
677
+
678
+ def knowledge_base(organism = nil)
679
+ @_kb ||= begin
680
+ kb_dir = self.file('knowledge_base')
681
+ KnowledgeBase.new kb_dir, organism
682
+ end
683
+ end
684
+
685
+ end