rbbt-util 5.26.77 → 5.26.78

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,7 +2,8 @@ require 'rbbt/persist'
2
2
  require 'rbbt/persist/tsv'
3
3
  require 'rbbt/util/log'
4
4
  require 'rbbt/util/semaphore'
5
- require 'rbbt/workflow/accessor'
5
+ require 'rbbt/workflow/step/accessor'
6
+ require 'rbbt/workflow/step/prepare'
6
7
 
7
8
  class Step
8
9
  attr_accessor :clean_name, :path, :task, :workflow, :inputs, :dependencies, :bindings
@@ -140,7 +141,6 @@ class Step
140
141
  dep.inputs.zip(dep.inputs.fields).each do |v,f|
141
142
  if i.include?(f) && i[f] != v
142
143
  Log.debug "Conflict in #{ f }: #{[Misc.fingerprint(i[f]), Misc.fingerprint(v)] * " <-> "}"
143
- i[f] = nil
144
144
  else
145
145
  i[f] = v
146
146
  end
@@ -309,6 +309,46 @@ class Step
309
309
  child_pid
310
310
  end
311
311
 
312
+ def cmd(*args)
313
+ all_args = *args
314
+
315
+ all_args << {} unless Hash === all_args.last
316
+
317
+ level = all_args.last[:log] || 0
318
+ level = 0 if TrueClass === level
319
+ level = 10 if FalseClass === level
320
+ level = level.to_i
321
+
322
+ all_args.last[:log] = true
323
+ all_args.last[:pipe] = true
324
+
325
+ io = CMD.cmd(*all_args)
326
+ child_pid = io.pids.first
327
+
328
+ children_pids = info[:children_pids]
329
+ if children_pids.nil?
330
+ children_pids = [child_pid]
331
+ else
332
+ children_pids << child_pid
333
+ end
334
+ set_info :children_pids, children_pids
335
+
336
+ while c = io.getc
337
+ STDERR << c if Log.severity <= level
338
+ if c == "\n"
339
+ if pid
340
+ Log.logn "STDOUT [#{pid}]: ", level
341
+ else
342
+ Log.logn "STDOUT: ", level
343
+ end
344
+ end
345
+ end
346
+
347
+ io.join
348
+
349
+ nil
350
+ end
351
+
312
352
 
313
353
  def load
314
354
  res = begin
@@ -0,0 +1,685 @@
1
+ class Step
2
+
3
+ INFO_SERIALIAZER = Marshal
4
+
5
+ def self.wait_for_jobs(jobs)
6
+ jobs = [jobs] if Step === jobs
7
+ begin
8
+ threads = []
9
+
10
+ threads = jobs.collect do |j|
11
+ Thread.new do
12
+ begin
13
+ j.join unless j.done?
14
+ rescue Exception
15
+ Log.error "Exception waiting for job: #{Log.color :blue, j.path}"
16
+ raise $!
17
+ end
18
+ end
19
+ end
20
+
21
+ threads.each{|t| t.join }
22
+ rescue Exception
23
+ threads.each{|t| t.exit }
24
+ jobs.each do |j| j.abort end
25
+ raise $!
26
+ end
27
+ end
28
+
29
+ def self.files_dir(path)
30
+ path.nil? ? nil : path + '.files'
31
+ end
32
+
33
+ def self.info_file(path)
34
+ path.nil? ? nil : path + '.info'
35
+ end
36
+
37
+ def self.tmp_path(path)
38
+ path = path.find if Path === path
39
+ path = File.expand_path(path)
40
+ dir = File.dirname(path)
41
+ filename = File.basename(path)
42
+ File.join(dir, '.' << filename)
43
+ end
44
+
45
+ def self.md5_file(path)
46
+ path.nil? ? nil : path + '.md5'
47
+ end
48
+
49
+ def self.pid_file(path)
50
+ path.nil? ? nil : path + '.pid'
51
+ end
52
+
53
+ def self.step_info(path)
54
+ begin
55
+ Open.open(info_file(path), :mode => 'rb') do |f|
56
+ INFO_SERIALIAZER.load(f)
57
+ end
58
+ rescue Exception
59
+ Log.exception $!
60
+ {}
61
+ end
62
+ end
63
+
64
+ def self.job_name_for_info_file(info_file, extension = nil)
65
+ if extension and not extension.empty?
66
+ info_file.sub(/\.#{extension}\.info$/,'')
67
+ else
68
+ info_file.sub(/\.info$/,'')
69
+ end
70
+ end
71
+
72
+ def self.save_inputs(inputs, input_types, dir)
73
+ inputs.each do |name,value|
74
+ type = input_types[name].to_s
75
+ path = File.join(dir, name.to_s)
76
+
77
+ Log.debug "Saving job input #{name} (#{type}) into #{path}"
78
+ case
79
+ when Array === value
80
+ Open.write(path, value * "\n")
81
+ when IO === value
82
+ Open.write(path, value)
83
+ when type == "file"
84
+ if String === value && File.exists?(value)
85
+ Open.link(value, path)
86
+ else
87
+ Open.write(path + '.read', value.to_s)
88
+ end
89
+ else
90
+ Open.write(path, value.to_s)
91
+ end
92
+ end.any?
93
+ end
94
+
95
+ def self.save_job_inputs(job, dir, options = nil)
96
+ options = IndiferentHash.setup options.dup if options
97
+
98
+ task_name = job.task_name
99
+ workflow = job.workflow
100
+ workflow = Kernel.const_get workflow if String === workflow
101
+ task_info = workflow.task_info(task_name)
102
+ input_types = task_info[:input_types]
103
+ task_inputs = task_info[:inputs]
104
+
105
+ inputs = {}
106
+ job.recursive_inputs.zip(job.recursive_inputs.fields).each do |value,name|
107
+ next unless task_inputs.include? name.to_sym
108
+ next if options and ! options.include?(name)
109
+ next if value.nil?
110
+ inputs[name] = value
111
+ end
112
+ save_inputs(inputs, input_types, dir)
113
+
114
+ inputs.any?
115
+ end
116
+
117
+ def name
118
+ @name ||= path.sub(/.*\/#{Regexp.quote task_name.to_s}\/(.*)/, '\1')
119
+ end
120
+
121
+ def short_path
122
+ [task_name, name] * "/"
123
+ end
124
+
125
+ def task_name
126
+ @task_name ||= task.name
127
+ end
128
+
129
+ # {{{ INFO
130
+
131
+ def info_file
132
+ @info_file ||= Step.info_file(path)
133
+ end
134
+
135
+ def pid_file
136
+ @pid_file ||= Step.pid_file(path)
137
+ end
138
+
139
+ def info_lock
140
+ @info_lock = begin
141
+ path = Persist.persistence_path(info_file + '.lock', {:dir => Step.lock_dir})
142
+ #Lockfile.new path, :refresh => false, :dont_use_lock_id => true
143
+ Lockfile.new path
144
+ end if @info_lock.nil?
145
+ @info_lock
146
+ end
147
+
148
+ def status_lock
149
+ return @mutex
150
+ #@status_lock = begin
151
+ # path = Persist.persistence_path(info_file + '.status.lock', {:dir => Step.lock_dir})
152
+ # Lockfile.new path, :refresh => false, :dont_use_lock_id => true
153
+ # end if @status_lock.nil?
154
+ #@status_lock
155
+ end
156
+
157
+ def info(check_lock = true)
158
+ return {:status => :noinfo} if info_file.nil? or not Open.exists? info_file
159
+ begin
160
+ Misc.insist do
161
+ begin
162
+ return @info_cache if @info_cache and @info_cache_time and Open.ctime(info_file) < @info_cache_time
163
+ rescue Exception
164
+ raise $!
165
+ end
166
+
167
+ begin
168
+ @info_cache = Misc.insist(3, 1.6, info_file) do
169
+ Misc.insist(2, 1, info_file) do
170
+ Misc.insist(3, 0.2, info_file) do
171
+ raise TryAgain, "Info locked" if check_lock and info_lock.locked?
172
+ info_lock.lock if check_lock and false
173
+ begin
174
+ Open.open(info_file, :mode => 'rb') do |file|
175
+ INFO_SERIALIAZER.load(file) #|| {}
176
+ end
177
+ ensure
178
+ info_lock.unlock if check_lock and false
179
+ end
180
+ end
181
+ end
182
+ end
183
+ @info_cache_time = Time.now
184
+ @info_cache
185
+ end
186
+ end
187
+ rescue Exception
188
+ Log.debug{"Error loading info file: " + info_file}
189
+ Log.exception $!
190
+ Open.rm info_file
191
+ Misc.sensiblewrite(info_file, INFO_SERIALIAZER.dump({:status => :error, :messages => ["Info file lost"]}))
192
+ raise $!
193
+ end
194
+ end
195
+
196
+ def init_info(force = false)
197
+ return nil if @exec or info_file.nil? or (Open.exists?(info_file) and ! force)
198
+ Open.lock(info_file, :lock => info_lock) do
199
+ i = {:status => :waiting, :pid => Process.pid, :path => path}
200
+ i[:dependencies] = dependencies.collect{|dep| [dep.task_name, dep.name, dep.path]} if dependencies
201
+ @info_cache = i
202
+ Misc.sensiblewrite(info_file, INFO_SERIALIAZER.dump(i), :force => true, :lock => false)
203
+ @info_cache_time = Time.now
204
+ end
205
+ end
206
+
207
+ def set_info(key, value)
208
+ return nil if @exec or info_file.nil?
209
+ return nil if ! writable?
210
+ value = Annotated.purge value if defined? Annotated
211
+ Open.lock(info_file, :lock => info_lock) do
212
+ i = info(false).dup
213
+ i[key] = value
214
+ @info_cache = i
215
+ dump = INFO_SERIALIAZER.dump(i)
216
+ Misc.sensiblewrite(info_file, dump, :force => true, :lock => false)
217
+ @info_cache_time = Time.now
218
+ value
219
+ end
220
+ end
221
+
222
+ def merge_info(hash)
223
+ return nil if @exec or info_file.nil?
224
+ return nil if ! writable?
225
+ value = Annotated.purge value if defined? Annotated
226
+ Open.lock(info_file, :lock => info_lock) do
227
+ i = info(false)
228
+ i.merge! hash
229
+ @info_cache = i
230
+ dump = INFO_SERIALIAZER.dump(i)
231
+ Misc.sensiblewrite(info_file, dump, :force => true, :lock => false)
232
+ @info_cache_time = Time.now
233
+ value
234
+ end
235
+ end
236
+
237
+ def status
238
+ begin
239
+ info[:status]
240
+ rescue Exception
241
+ Log.error "Exception reading status: #{$!.message}"
242
+ :error
243
+ end
244
+ end
245
+
246
+ def status=(status)
247
+ set_info(:status, status)
248
+ end
249
+
250
+ def messages
251
+ if messages = info[:messages]
252
+ messages
253
+ else
254
+ set_info(:messages, []) if self.respond_to?(:set_info)
255
+ end
256
+ end
257
+
258
+ def message(message)
259
+ message = Log.uncolor(message)
260
+ set_info(:messages, (messages || []) << message)
261
+ end
262
+
263
+ def self.status_color(status)
264
+ status = status.split(">").last
265
+ case status
266
+ when "starting"
267
+ :yellow
268
+ when "error", "aborted"
269
+ :red
270
+ when "done"
271
+ :green
272
+ else
273
+ :cyan
274
+ end
275
+ end
276
+
277
+ def self.log_block(status, message, path, &block)
278
+ start = Time.now
279
+ status = status.to_s
280
+ status_color = self.status_color status
281
+
282
+ Log.info do
283
+ now = Time.now
284
+ str = Log.color :reset
285
+ str << "#{ Log.color status_color, status}"
286
+ str << ": #{ message }" if message
287
+ str << " -- #{Log.color :blue, path.to_s}" if path
288
+ str << " #{Log.color :yellow, Process.pid}"
289
+ str
290
+ end
291
+ res = yield
292
+ eend = Time.now
293
+ Log.info do
294
+ now = Time.now
295
+ str = "#{ Log.color :cyan, status.to_s } +#{Log.color :green, "%.2f" % (eend - start)}"
296
+ str << " -- #{Log.color :blue, path.to_s}" if path
297
+ str << " #{Log.color :yellow, Process.pid}"
298
+ str
299
+ end
300
+ res
301
+ end
302
+
303
+ def self.log_string(status, message, path)
304
+ Log.info do
305
+
306
+ status = status.to_s
307
+ status_color = self.status_color status
308
+
309
+ str = Log.color :reset
310
+ str << "#{ Log.color status_color, status}"
311
+ str << ": #{ message }" if message
312
+ str << " -- #{Log.color :blue, path.to_s}" if path
313
+ str << " #{Log.color :yellow, Process.pid}"
314
+ str
315
+ end
316
+ end
317
+
318
+ def self.log_progress(status, options = {}, path = nil, &block)
319
+ options = Misc.add_defaults options, :severity => Log::INFO, :file => path
320
+ max = Misc.process_options options, :max
321
+ Log::ProgressBar.with_bar(max, options) do |bar|
322
+ begin
323
+ res = yield bar
324
+ raise KeepBar.new res if IO === res
325
+ res
326
+ rescue
327
+ Log.exception $!
328
+ raise $!
329
+ end
330
+ end
331
+ end
332
+
333
+ def log_progress(status, options = {}, &block)
334
+ Step.log_progress(status, options, file(:progress), &block)
335
+ end
336
+
337
+ def progress_bar(msg = "Progress", options = nil)
338
+ if Hash === msg and options.nil?
339
+ options = msg
340
+ msg = nil
341
+ end
342
+ options = {} if options.nil?
343
+
344
+ max = options[:max]
345
+ Log::ProgressBar.new_bar(max, {:desc => msg, :file => file(:progress)}.merge(options))
346
+ end
347
+
348
+ def self.log(status, message, path, &block)
349
+ if block
350
+ if Hash === message
351
+ log_progress(status, message, path, &block)
352
+ else
353
+ log_block(status, message, path, &block)
354
+ end
355
+ else
356
+ log_string(status, message, path)
357
+ end
358
+ end
359
+
360
+ def log(status, message = nil, &block)
361
+ self.status = status
362
+ if message
363
+ self.message Log.uncolor(message)
364
+ end
365
+ Step.log(status, message, path, &block)
366
+ end
367
+
368
+ def exception(ex, msg = nil)
369
+ ex_class = ex.class.to_s
370
+ backtrace = ex.backtrace if ex.respond_to?(:backtrace)
371
+ message = ex.message if ex.respond_to?(:message)
372
+ set_info :backtrace, backtrace
373
+ set_info :exception, {:class => ex_class, :message => message, :backtrace => backtrace}
374
+ if msg.nil?
375
+ log :error, "#{ex_class} -- #{message}"
376
+ else
377
+ log :error, "#{msg} -- #{message}"
378
+ end
379
+ self._abort
380
+ end
381
+
382
+ def get_exception
383
+ if info[:exception].nil?
384
+ return Aborted if aborted?
385
+ return Exception.new(messages.last) if error?
386
+ Exception.new ""
387
+ else
388
+ ex_class, ex_message, ex_backtrace = info[:exception].values_at :class, :message, :backtrace
389
+ begin
390
+ klass = Kernel.const_get(ex_class)
391
+ ex = klass.new ex_message
392
+ ex.set_backtrace ex_backtrace unless ex_backtrace.nil? or ex_backtrace.empty?
393
+ ex
394
+ rescue
395
+ Log.exception $!
396
+ Exception.new ex_message
397
+ end
398
+ end
399
+ end
400
+
401
+ def recoverable_error?
402
+ return true if aborted?
403
+ return false unless error?
404
+ begin
405
+ return true unless info[:exception]
406
+ klass = Kernel.const_get(info[:exception][:class])
407
+ ! (klass <= RbbtException)
408
+ rescue Exception
409
+ true
410
+ end
411
+ end
412
+
413
+ def started?
414
+ Open.exists?(path) or (Open.exists?(pid_file) && Open.exists?(info_file))
415
+ end
416
+
417
+ def waiting?
418
+ Open.exists?(info_file) and not started?
419
+ end
420
+
421
+ def dirty_files
422
+ rec_dependencies = self.rec_dependencies
423
+ return [] if rec_dependencies.empty?
424
+ canfail_paths = self.canfail_paths
425
+ dirty_files = rec_dependencies.reject{|dep|
426
+ (defined?(WorkflowRESTClient) && WorkflowRESTClient::RemoteStep === dep) ||
427
+ ! Open.exists?(dep.info_file) ||
428
+ (dep.path && (Open.exists?(dep.path) || Open.remote?(dep.path))) ||
429
+ ((dep.error? || dep.aborted? || dep.waiting?) && (! dep.recoverable_error? || canfail_paths.include?(dep.path)))
430
+ }
431
+ end
432
+
433
+ def dirty?
434
+ return true if Open.exists?(pid_file) && ! ( Open.exists?(info_file) || done? )
435
+ return false unless done? || status == :done
436
+ return false unless ENV["RBBT_UPDATE"] == "true"
437
+
438
+ status = self.status
439
+
440
+ if done? and not (status == :done or status == :ending or status == :producing) and not status == :noinfo
441
+ return true
442
+ end
443
+
444
+ if status == :done and not done?
445
+ return true
446
+ end
447
+
448
+ if dirty_files.any?
449
+ Log.low "Some dirty files found for #{self.path}: #{Misc.fingerprint dirty_files}"
450
+ true
451
+ else
452
+ ! self.updated?
453
+ end
454
+ end
455
+
456
+ def done?
457
+ path and Open.exists? path
458
+ end
459
+
460
+ def streaming?
461
+ (IO === @result) or (not @saved_stream.nil?) or status == :streaming
462
+ end
463
+
464
+ def noinfo?
465
+ status == :noinfo
466
+ end
467
+
468
+ def running?
469
+ return false if ! (started? || status == :ending)
470
+ pid = info[:pid]
471
+ return nil if pid.nil?
472
+
473
+ return false if done? or error? or aborted?
474
+
475
+ if Misc.pid_exists?(pid)
476
+ pid
477
+ else
478
+ done? or error? or aborted?
479
+ end
480
+ end
481
+
482
+ def stalled?
483
+ started? && ! (done? || running? || done? || error? || aborted?)
484
+ end
485
+
486
+ def missing?
487
+ status == :done && ! Open.exists?(path)
488
+ end
489
+
490
+ def error?
491
+ status == :error
492
+ end
493
+
494
+ def nopid?
495
+ pid = info[:pid] || Open.exists?(pid_file)
496
+ ! pid && ! (status.nil? || status == :aborted || status == :done || status == :error)
497
+ end
498
+
499
+ def aborted?
500
+ status = self.status
501
+ status == :aborted || ((status != :noinfo && status != :setup && status != :noinfo) && nopid?)
502
+ end
503
+
504
+ # {{{ INFO
505
+
506
+ def files_dir
507
+ @files_dir ||= Step.files_dir path
508
+ end
509
+
510
+ def tmp_path
511
+ @tmp_path ||= Step.tmp_path path
512
+ end
513
+
514
+ def files
515
+ files = Dir.glob(File.join(files_dir, '**', '*')).reject{|path| File.directory? path}.collect do |path|
516
+ Misc.path_relative_to(files_dir, path)
517
+ end
518
+ files
519
+ end
520
+
521
+ def file(name)
522
+ Path.setup(File.join(files_dir, name.to_s))
523
+ end
524
+
525
+ def save_file(name, content)
526
+ content = case
527
+ when String === content
528
+ content
529
+ when Array === content
530
+ content * "\n"
531
+ when TSV === content
532
+ content.to_s
533
+ when Hash === content
534
+ content.collect{|*p| p * "\t"} * "\n"
535
+ else
536
+ content.to_s
537
+ end
538
+ Open.write(file(name), content)
539
+ end
540
+
541
+ def load_file(name, type = nil, options = {})
542
+ if type.nil? and name =~ /.*\.(\w+)$/
543
+ extension = name.match(/.*\.(\w+)$/)[1]
544
+ case extension
545
+ when "tc"
546
+ type = :tc
547
+ when "tsv"
548
+ type = :tsv
549
+ when "list", "ary", "array"
550
+ type = :array
551
+ when "yaml"
552
+ type = :yaml
553
+ when "marshal"
554
+ type = :marshal
555
+ else
556
+ type = :other
557
+ end
558
+ else
559
+ type ||= :other
560
+ end
561
+
562
+ case type.to_sym
563
+ when :tc
564
+ Persist.open_tokyocabinet(file(name), false)
565
+ when :tsv
566
+ TSV.open Open.open(file(name)), options
567
+ when :array
568
+ #Open.read(file(name)).split /\n|,\s*/
569
+ Open.read(file(name)).split "\n"
570
+ when :yaml
571
+ YAML.load(Open.open(file(name)))
572
+ when :marshal
573
+ Marshal.load(Open.open(file(name)))
574
+ else
575
+ Open.read(file(name))
576
+ end
577
+ end
578
+
579
+ def provenance
580
+ provenance = {}
581
+ dependencies.each do |dep|
582
+ next unless dep.path.exists?
583
+ if Open.exists? dep.info_file
584
+ provenance[dep.path] = dep.provenance if Open.exists? dep.path
585
+ else
586
+ provenance[dep.path] = nil
587
+ end
588
+ end
589
+ {:inputs => info[:inputs], :provenance => provenance}
590
+ end
591
+
592
+ def provenance_paths
593
+ provenance = {}
594
+ dependencies.each do |dep|
595
+ provenance[dep.path] = dep.provenance_paths if Open.exists? dep.path
596
+ end
597
+ provenance
598
+ end
599
+
600
+ def config(key, *tokens)
601
+ options = tokens.pop if Hash === tokens.last
602
+ options ||= {}
603
+
604
+ new_tokens = []
605
+ if workflow
606
+ workflow_name = workflow.to_s
607
+ new_tokens << ("workflow:" << workflow_name)
608
+ new_tokens << ("task:" << workflow_name << "#" << task_name.to_s)
609
+ end
610
+ new_tokens << ("task:" << task_name.to_s)
611
+
612
+ Rbbt::Config.get(key, tokens + new_tokens, options)
613
+ end
614
+
615
+ def access
616
+ CMD.cmd("touch -c -h -a #{self.path} #{self.info_file}")
617
+ end
618
+
619
+ def rec_access
620
+ access
621
+ rec_dependencies.each do |dep|
622
+ dep.access
623
+ end
624
+ end
625
+
626
+ def monitor_stream(stream, options = {}, &block)
627
+ case options[:bar]
628
+ when TrueClass
629
+ bar = progress_bar
630
+ when Hash
631
+ bar = progress_bar options[:bar]
632
+ when Numeric
633
+ bar = progress_bar :max => options[:bar]
634
+ else
635
+ bar = options[:bar]
636
+ end
637
+
638
+ out = if bar.nil?
639
+ Misc.line_monitor_stream stream, &block
640
+ elsif (block.nil? || block.arity == 0)
641
+ Misc.line_monitor_stream stream do
642
+ bar.tick
643
+ end
644
+ elsif block.arity == 1
645
+ Misc.line_monitor_stream stream do |line|
646
+ bar.tick
647
+ block.call line
648
+ end
649
+ elsif block.arity == 2
650
+ Misc.line_monitor_stream stream do |line|
651
+ block.call line, bar
652
+ end
653
+ end
654
+
655
+ ConcurrentStream.setup(out, :abort_callback => Proc.new{
656
+ Log::ProgressBar.remove_bar(bar, true) if bar
657
+ }, :callback => Proc.new{
658
+ Log::ProgressBar.remove_bar(bar) if bar
659
+ })
660
+
661
+ bgzip = (options[:compress] || options[:gzip]).to_s == 'bgzip'
662
+ bgzip = true if options[:bgzip]
663
+
664
+ gzip = true if options[:compress] || options[:gzip]
665
+ if bgzip
666
+ Open.bgzip(out)
667
+ elsif gzip
668
+ Open.gzip(out)
669
+ else
670
+ out
671
+ end
672
+ end
673
+
674
+ def relocated?
675
+ done? && info[:path] && info[:path] != path
676
+ end
677
+
678
+ def knowledge_base(organism = nil)
679
+ @_kb ||= begin
680
+ kb_dir = self.file('knowledge_base')
681
+ KnowledgeBase.new kb_dir, organism
682
+ end
683
+ end
684
+
685
+ end