rbbt-util 5.28.2 → 5.28.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -54,9 +54,9 @@ app = ARGV.shift
54
54
 
55
55
  ENV["RServe-session"] = options[:RServe_session] || app
56
56
 
57
- app_dir = Rbbt.etc.app_dir.exists? ? Path.setup(Rbbt.etc.app_dir.read.strip) : Rbbt.apps.find
57
+ app_dir = Rbbt.etc.app_dir.exists? ? Path.setup(Rbbt.etc.app_dir.read.strip) : Rbbt.apps
58
58
 
59
- app_dir = app_dir[app]
59
+ app_dir = app_dir[app].find
60
60
 
61
61
  server = options[:server] || 'puma'
62
62
  Misc.in_dir(app_dir) do
@@ -3,7 +3,6 @@
3
3
  require 'rbbt-util'
4
4
  require 'rbbt/util/simpleopt'
5
5
  require 'rbbt/workflow'
6
- require 'rbbt/workflow/remote/ssh/get'
7
6
 
8
7
  $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
9
8
 
@@ -14,8 +13,7 @@ Remove a job and its dependencies
14
13
  $ rbbt purge [options] <job_path>
15
14
 
16
15
  -h--help Print this help
17
- -t--test Do a verbose dry run
18
- -r--relocate Include relocated dependencies
16
+ -r--recursive Remove recursively
19
17
 
20
18
  EOF
21
19
  if options[:help]
@@ -31,4 +29,4 @@ path, search_path, _sep, *other = ARGV
31
29
 
32
30
  raise ParameterException, "No path given" if path.nil?
33
31
 
34
- Step.purge(path, options[:relocate])
32
+ Step.purge(path, options[:recursive])
@@ -201,7 +201,7 @@ workflows.sort.each do |workflow,tasks|
201
201
  Step::INFO_SERIALIZER.load(f)
202
202
  end
203
203
  rescue
204
- Log.exception $!
204
+ #Log.exception $!
205
205
  {:status => :noinfo}
206
206
  end
207
207
  IndiferentHash.setup(info)
@@ -29,8 +29,6 @@ path = ARGV[0]
29
29
  raise ParameterException, "No path given" if path.nil?
30
30
  step = Workflow.load_step path
31
31
 
32
-
33
- iif step.rec_dependencies
34
-
35
32
  step.archive_deps
33
+ step.copy_files_dir
36
34
  step.set_info :dependencies, []
@@ -25,6 +25,7 @@ $ rbbt workflow server [options] <Workflow>
25
25
  --stream Activate streaming of workflow tasks
26
26
  -fs--file_server Activate file serving for resources
27
27
  -mj--monitor_jobs Monitor jobs (UNSAFE)
28
+ -a--app_dir* Application execution directory
28
29
  --export_all Export all workflow tasks (use with caution!)
29
30
  --export* Export workflow tasks (asynchronous)
30
31
  --export_asynchronous* Export workflow tasks as asynchronous
@@ -74,6 +75,7 @@ sync_exports = options[:export_synchronous].split(/\s*,/) if options[:export_syn
74
75
  exec_exports = options[:export_exec].split(/\s*,/) if options[:export_exec]
75
76
 
76
77
  TmpFile.with_file do |app_dir|
78
+ app_dir = options[:app_dir] if options[:app_dir]
77
79
  Misc.in_dir(app_dir) do
78
80
  app_dir = Path.setup(app_dir.dup)
79
81
  Open.write(app_dir.etc.target_workflow.find, workflow)
@@ -392,6 +392,20 @@ class TestTSVParallelThrough < Test::Unit::TestCase
392
392
  assert_equal size, stream.read.split("\n").length
393
393
  end
394
394
 
395
+ def test_traverse_into_path
396
+ size = 100
397
+ array = (1..size).to_a.collect{|n| n.to_s}
398
+ TmpFile.with_file do |tmpfile|
399
+ Path.setup(tmpfile)
400
+ io = TSV.traverse array, :into => tmpfile do |e|
401
+ e
402
+ end
403
+ io.join
404
+ assert_equal size, Open.read(tmpfile).split("\n").length
405
+ end
406
+ end
407
+
408
+
395
409
  def test_traverse_progress
396
410
  size = 1000
397
411
  array = (1..size).to_a.collect{|n| n.to_s}
@@ -117,6 +117,26 @@ row2 A B C
117
117
  end
118
118
  end
119
119
 
120
+ def test_slice_empty
121
+ content =<<-EOF
122
+ #ID ValueA ValueB Comment
123
+ row1 a b c
124
+ row2 A B C
125
+ EOF
126
+
127
+ TmpFile.with_file(content) do |filename|
128
+ tsv = TSV.open(File.open(filename), :type => :list, :sep => /\s/)
129
+ tsv = tsv.slice []
130
+ assert tsv.fields.empty?
131
+ TmpFile.with_file do |tmpfile|
132
+ iii tsv.to_s
133
+ Open.write(tmpfile, tsv.to_s)
134
+ tsv = TSV.open tmpfile
135
+ assert tsv.fields.empty?
136
+ end
137
+ end
138
+ end
139
+
120
140
  def test_select
121
141
  content =<<-EOF
122
142
  #Id ValueA ValueB OtherID
File without changes
@@ -0,0 +1,136 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/workflow/util/orchestrator'
3
+ require 'rbbt/workflow/util/trace'
4
+ require 'rbbt-util'
5
+ require 'rbbt/workflow'
6
+
7
+ module TestWF
8
+ extend Workflow
9
+
10
+ MULT = 0.1
11
+ task :a => :text do
12
+ sleep(TestWF::MULT * (rand(10) + 2))
13
+ end
14
+
15
+ dep :a
16
+ task :b => :text do
17
+ sleep(TestWF::MULT * (rand(10) + 2))
18
+ end
19
+
20
+ dep :b
21
+ task :c => :text do
22
+ sleep(TestWF::MULT * (rand(10) + 2))
23
+ end
24
+
25
+ dep :c
26
+ task :d => :text do
27
+ sleep(TestWF::MULT * (rand(10) + 2))
28
+ end
29
+ end
30
+
31
+ class TestClass < Test::Unit::TestCase
32
+ def _test_orchestrate
33
+
34
+ jobs =[]
35
+
36
+ num = 10
37
+ num.times do |i|
38
+ jobs.concat %w(test1 _test2).collect{|name| TestWF.job(:d, name + " #{i}") }
39
+ end
40
+ jobs.each do |j| j.recursive_clean end
41
+
42
+ rules = YAML.load <<-EOF
43
+ defaults:
44
+ log: 4
45
+ default_resources:
46
+ IO: 1
47
+ TestWF:
48
+ a:
49
+ resources:
50
+ cpus: 7
51
+ b:
52
+ resources:
53
+ cpus: 2
54
+ c:
55
+ resources:
56
+ cpus: 10
57
+ d:
58
+ resources:
59
+ cpus: 15
60
+ EOF
61
+
62
+ orchestrator = Workflow::Orchestrator.new(TestWF::MULT, "cpus" => 30, "IO" => 4, "size" => 10 )
63
+ Log.with_severity 0 do
64
+ orchestrator.process(rules, jobs)
65
+ end
66
+
67
+ data = Workflow.trace jobs, :plot_data => true
68
+ eend = data.column("End.second").values.collect{|v| v.to_f}.max
69
+ second_cpus = TSV.setup({}, "Second~CPUS#:type=:single#:cast=:to_f")
70
+ (0..eend.to_i).each do |second|
71
+ tasks = data.select("Start.second"){|s| s <= second}.select("End.second"){|s| s > second}
72
+ cpus = 0
73
+ tasks.through :key, ["Workflow", "Task"] do |k, values|
74
+ workflow, task = values
75
+ cpus += rules[workflow][task.to_s]["resources"]["cpus"]
76
+ end
77
+ second_cpus[second] = cpus
78
+ end
79
+
80
+ assert Misc.mean(second_cpus.values) > 15
81
+ assert Misc.mean(second_cpus.values) < 30
82
+ end
83
+
84
+ def test_orchestrate_size
85
+
86
+ jobs =[]
87
+
88
+ num = 10
89
+ num.times do |i|
90
+ jobs.concat %w(test1 _test2).collect{|name| TestWF.job(:d, name + " #{i}") }
91
+ end
92
+ jobs.each do |j| j.recursive_clean end
93
+
94
+ rules = YAML.load <<-EOF
95
+ defaults:
96
+ log: 4
97
+ default_resources:
98
+ IO: 1
99
+ TestWF:
100
+ a:
101
+ resources:
102
+ cpus: 7
103
+ b:
104
+ resources:
105
+ cpus: 2
106
+ c:
107
+ resources:
108
+ cpus: 10
109
+ d:
110
+ resources:
111
+ cpus: 15
112
+ EOF
113
+
114
+ orchestrator = Workflow::Orchestrator.new(TestWF::MULT, "cpus" => 30, "IO" => 4, "size" => 10 )
115
+ Log.with_severity 0 do
116
+ orchestrator.process(rules, jobs)
117
+ end
118
+
119
+ data = Workflow.trace jobs, :plot_data => true
120
+ eend = data.column("End.second").values.collect{|v| v.to_f}.max
121
+ second_cpus = TSV.setup({}, "Second~CPUS#:type=:single#:cast=:to_f")
122
+ (0..eend.to_i).each do |second|
123
+ tasks = data.select("Start.second"){|s| s <= second}.select("End.second"){|s| s > second}
124
+ cpus = 0
125
+ tasks.through :key, ["Workflow", "Task"] do |k, values|
126
+ workflow, task = values
127
+ cpus += rules[workflow][task.to_s]["resources"]["cpus"]
128
+ end
129
+ second_cpus[second] = cpus
130
+ end
131
+
132
+ assert Misc.mean(second_cpus.values) > 15
133
+ assert Misc.mean(second_cpus.values) < 30
134
+ end
135
+ end
136
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.28.2
4
+ version: 5.28.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-12 00:00:00.000000000 Z
11
+ date: 2020-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -327,7 +327,6 @@ files:
327
327
  - lib/rbbt/workflow/remote_workflow/remote_step.rb
328
328
  - lib/rbbt/workflow/remote_workflow/remote_step/rest.rb
329
329
  - lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb
330
- - lib/rbbt/workflow/schedule.rb
331
330
  - lib/rbbt/workflow/soap.rb
332
331
  - lib/rbbt/workflow/step.rb
333
332
  - lib/rbbt/workflow/step/accessor.rb
@@ -337,7 +336,9 @@ files:
337
336
  - lib/rbbt/workflow/task.rb
338
337
  - lib/rbbt/workflow/usage.rb
339
338
  - lib/rbbt/workflow/util/archive.rb
339
+ - lib/rbbt/workflow/util/orchestrator.rb
340
340
  - lib/rbbt/workflow/util/provenance.rb
341
+ - lib/rbbt/workflow/util/trace.rb
341
342
  - share/Rlib/plot.R
342
343
  - share/Rlib/svg.R
343
344
  - share/Rlib/util.R
@@ -516,8 +517,10 @@ files:
516
517
  - test/rbbt/workflow/step/test_dependencies.rb
517
518
  - test/rbbt/workflow/test_doc.rb
518
519
  - test/rbbt/workflow/test_remote_workflow.rb
520
+ - test/rbbt/workflow/test_schedule.rb
519
521
  - test/rbbt/workflow/test_step.rb
520
522
  - test/rbbt/workflow/test_task.rb
523
+ - test/rbbt/workflow/util/test_orchestrator.rb
521
524
  - test/test_helper.rb
522
525
  homepage: http://github.com/mikisvaz/rbbt-util
523
526
  licenses:
@@ -545,7 +548,9 @@ summary: Utilities for the Ruby Bioinformatics Toolkit (rbbt)
545
548
  test_files:
546
549
  - test/rbbt/test_entity.rb
547
550
  - test/rbbt/workflow/test_remote_workflow.rb
551
+ - test/rbbt/workflow/util/test_orchestrator.rb
548
552
  - test/rbbt/workflow/test_doc.rb
553
+ - test/rbbt/workflow/test_schedule.rb
549
554
  - test/rbbt/workflow/test_step.rb
550
555
  - test/rbbt/workflow/remote/test_client.rb
551
556
  - test/rbbt/workflow/step/test_dependencies.rb
@@ -1,238 +0,0 @@
1
- class Step
2
- class ResourceManager
3
- class NotEnoughResources < Exception
4
- end
5
-
6
- attr_accessor :cpus, :memory
7
- def initialize(cpus = nil, memory = nil)
8
- @cpus = cpus
9
- @memory = memory
10
- @sem_file = "ResourceManager-" + rand(10000).to_s
11
- @semaphore = RbbtSemaphore.create_semaphore(@sem_file, 1)
12
- end
13
-
14
- def allocate(cpus = nil, memory = nil, &block)
15
- RbbtSemaphore.synchronize(@semaphore) do
16
- if (@cpus && cpus && @cpus < cups) ||
17
- (@memory && memory && @memory < memory)
18
- raise NotEnoughResources
19
- end
20
- begin
21
- @cpus -= cpus
22
- @memory -= memory
23
- yield
24
- rescue
25
- @cpus += cpus
26
- @memory += memory
27
- end
28
- end
29
- end
30
-
31
- def finalize(manager)
32
- RbbtSemaphore.delete_semaphore(@sem_file)
33
- end
34
-
35
- def self.finalize(manager)
36
- proc { manager.finalize }
37
- end
38
- end
39
-
40
- class Scheduler
41
- attr_accessor :jobs, :cpus, :dep_jobs, :job_deps, :jobps
42
- def initialize(jobs, cpus)
43
- @jobs = jobs
44
- @cpus = cpus
45
-
46
- @job_deps = {}
47
-
48
- with_deps = jobs.dup
49
- @dep_jobs = {}
50
- @job_deps = {}
51
- @jobps = {}
52
- @missing = Set.new
53
- while with_deps.any?
54
- job = with_deps.pop
55
- @jobps[job.path] = job
56
- @missing << job.path unless job.done?
57
-
58
- jdeps = job.dependencies
59
- jdeps += job.inputs.flatten.select{|i| Step === i}
60
-
61
- jdeps.reject!{|dep| dep.done? }
62
- @job_deps[job.path] = []
63
- jdeps.each do |dep|
64
- #next if dep.done?
65
- @dep_jobs[dep.path] ||= []
66
- @job_deps[job.path] << dep.path
67
- @dep_jobs[dep.path] << job.path
68
- with_deps << dep unless @job_deps.include? dep.path
69
- end
70
- end
71
-
72
- def ready
73
- @job_deps.select do |jobp,deps|
74
- (@missing & deps).empty?
75
- end.collect{|jobp,deps| jobp}
76
- end
77
-
78
- def used
79
- iii @dep_jobs
80
- @dep_jobs.select do |dep,jobs|
81
- iif [dep, @missing.to_a, jobs]
82
- (@missing & jobs).empty?
83
- end.keys
84
- end
85
-
86
- def next
87
- priorities = {}
88
- @jobs.each do |job|
89
- priorities[job.path] = 1
90
- end
91
-
92
- @missing.each do |jobp|
93
- end
94
-
95
- ready.first
96
- end
97
- end
98
- end
99
-
100
- #def self._priorities(jobs)
101
- # job_level = {}
102
- # jobs.each do |job|
103
- # job_level[job.path] = 1.0
104
- # end
105
-
106
- # with_deps = jobs.dup
107
- # dep_jobs = {}
108
- # job_deps = {}
109
- # while with_deps.any?
110
- # job = with_deps.pop
111
- # level = job_level[job.path]
112
- # job_deps[job.path] = []
113
- # jdeps = job.dependencies
114
- # jdeps += job.inputs.flatten.select{|i| Step === i}
115
-
116
- # jdeps.reject!{|dep| dep.done? }
117
- # jdeps.each do |dep|
118
- # next if dep.done?
119
- # dep_jobs[dep.path] ||= []
120
- # job_level[dep.path] = level / (10 * jdeps.length) if job_level[dep.path].nil? || job_level[dep.path] < level / (10 * jdeps.length)
121
- # job_deps[job.path] << dep.path
122
- # dep_jobs[dep.path] << job.path
123
- # with_deps << dep unless job_deps.include? dep.path
124
- # end
125
- # end
126
- # [job_level, job_deps, dep_jobs]
127
- #end
128
-
129
- #def self.produce_jobs(jobs, cpus, step_cpus = {})
130
- # require 'fc'
131
-
132
- # step_cpus = IndiferentHash.setup(step_cpus || {})
133
-
134
- # deps = []
135
-
136
- # jobs = [jobs] unless Array === jobs
137
-
138
- # job_level, job_deps, dep_jobs = self._priorities(jobs)
139
-
140
- # jobps = {}
141
- # (jobs + jobs.collect{|job| job.rec_dependencies}).flatten.uniq.each do |job|
142
- # jobps[job.path] = job
143
- # end
144
-
145
- # prio_queue = FastContainers::PriorityQueue.new :max
146
-
147
- # job_deps.each do |jobp,depps|
148
- # next if depps.any?
149
- # level = job_level[jobp]
150
-
151
- # prio_queue.push(jobp, level)
152
- # end
153
-
154
- # queue = RbbtProcessQueue.new cpus
155
-
156
- # missing = job_deps.keys
157
- # queue.callback do |jobp|
158
- # Log.info "Done: #{jobp}"
159
- # missing -= [jobp]
160
-
161
- # job_level, job_deps, dep_jobs = self._priorities(jobs)
162
-
163
- # parentsp = dep_jobs[jobp]
164
-
165
- # parentsp.each do |parentp|
166
- # next unless job_deps[parentp].include? jobp
167
- # job_deps[parentp] -= [jobp]
168
- # if job_deps[parentp].empty?
169
- # level = job_level[parentp]
170
- # prio_queue.push(parentp, level )
171
- # end
172
- # end if parentsp
173
- # prio_queue_new = FastContainers::PriorityQueue.new :max
174
- # while prio_queue.any?
175
- # elem = prio_queue.pop
176
- # prio_queue_new.push(elem, job_level[elem])
177
- # end
178
- # prio_queue = prio_queue_new
179
- # end
180
- #
181
- # queue.init do |jobp|
182
- # Log.info "Processing: #{jobp}"
183
- # job = jobps[jobp]
184
- # job_cpus = step_cpus[job.task_name] || 1
185
- # sleep 0.5
186
- # #job.produce
187
- # jobp
188
- # end
189
-
190
- # while missing.any?
191
- # while prio_queue.empty? && missing.any?
192
- # sleep 1
193
- # end
194
- # break if missing.empty?
195
- # jobp = prio_queue.pop
196
- # queue.process jobp
197
- # end
198
-
199
- # queue.join
200
- #end
201
- end
202
-
203
-
204
- if __FILE__ == $0
205
- require 'rbbt/workflow'
206
-
207
- module TestWF
208
- extend Workflow
209
- input :num, :integer
210
- task :dep => :integer do |num|
211
- num
212
- end
213
- dep :dep, :num => 1
214
- dep :dep, :num => 2
215
- dep :dep, :num => 3
216
- task :test do
217
- dependencies.collect{|d| d.load.to_s} * ","
218
- end
219
- end
220
- Log.severity = 0
221
- job = TestWF.job(:test)
222
- job.recursive_clean
223
-
224
- Rbbt::Config.load_file Rbbt.etc.config_profile.HTS.find
225
- Workflow.require_workflow "Sample"
226
- Workflow.require_workflow "HTS"
227
-
228
- jobs = []
229
- # jobs << Sample.job(:mutect2, "QUINTANA-15")
230
- # jobs << Sample.job(:mutect2, "QUINTANA-25")
231
- # jobs << Sample.job(:mutect2, "QUINTANA-22")
232
- jobs << Sample.job(:mutect2, "small")
233
-
234
- sched = Step::Scheduler.new(jobs, 3)
235
- iii sched.ready
236
- iii sched.used
237
- iii sched.next
238
- end