rbbt-util 5.26.1 → 5.26.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 88ac452446d8a694bdb7d61c0bbd20b1c5f7e0d9
4
- data.tar.gz: c6026be2d4aaa199b94cbfe5a3712bcf38577a42
3
+ metadata.gz: 81378fa8b108cf6280f0056bc0bcfcd2a4e380cb
4
+ data.tar.gz: 5ed2d44e0bb591608aab12753ff78997251b8828
5
5
  SHA512:
6
- metadata.gz: 82a0ba5ee4b35a434a40376efd331278a7f2da8c1de248f55f55bd44625ce9bfd2c768e22521d6977a726b2ccd95205d4bbfc218aa2571268f553d1d91355183
7
- data.tar.gz: 677e482567fe02fed13f84c263874b9315eef52bbbad45029873a0f56e83f6b07ad3847aa3e4ea12dc1e8fea7755129c7fd5087220e55582450990fd69f2f438
6
+ metadata.gz: 2930697a87736c4c3753a1dc87c91d544d28630e79ba261fcedf458e049de6097d87286f4c2aa293c735faa7b833a673e5a7be558bad064db0087fcfa4d986c1
7
+ data.tar.gz: 6d59e6b9993d498f62df6005b08dd68567676069f83ee7b22763529a452ee27a00e2289dbfbc6965593d98ae5411f6a6b86becaa625d5e45dca004d66583f07f
@@ -176,6 +176,44 @@ module TSV
176
176
  end
177
177
  end
178
178
 
179
+ def self.traverse_priority_queue(queue, options = {}, &block)
180
+ callback, bar, join = Misc.process_options options, :callback, :bar, :join
181
+
182
+ begin
183
+ error = false
184
+ if callback
185
+ bar.init if bar
186
+ while queue.any?
187
+ e = queue.pop
188
+ begin
189
+ callback.call yield(e)
190
+ ensure
191
+ bar.tick if bar
192
+ end
193
+ end
194
+ else
195
+ bar.init if bar
196
+ while queue.any?
197
+ e = queue.pop
198
+ begin
199
+ yield e
200
+ rescue Exception
201
+ Log.exception $!
202
+ raise $!
203
+ ensure
204
+ bar.tick if bar
205
+ end
206
+ end
207
+ end
208
+
209
+ rescue
210
+ error = true
211
+ raise $!
212
+ ensure
213
+ Log::ProgressBar.remove_bar(bar) if bar
214
+ join.call(error) if join
215
+ end
216
+ end
179
217
  def self.traverse_io_array(io, options = {}, &block)
180
218
  callback, bar, join = Misc.process_options options, :callback, :bar, :join
181
219
  begin
@@ -278,6 +316,8 @@ module TSV
278
316
  Log.low{"Traversing #{name} #{Log.color :green, "->"} #{stream_name(options[:into])}"}
279
317
  begin
280
318
  case obj
319
+ when (defined? FastContainers and FastContainers::PriorityQueue)
320
+ traverse_priority_queue(obj, options, &block)
281
321
  when TSV
282
322
  traverse_tsv(obj, options, &block)
283
323
  when Hash
@@ -0,0 +1,227 @@
1
+ class Step
2
+ class ResourceManager
3
+ class NotEoughResources < Exception
4
+ end
5
+
6
+ attr_accessor :cpus, :memory
7
+ def initialize(cpus = nil, memory = nil)
8
+ @cpus = cpus
9
+ @memory = memory
10
+ @sem_file = "ResourceManager-" + rand(10000).to_s
11
+ @semaphore = RbbtSemaphore.create_semaphore(@sem_file, 1)
12
+ end
13
+
14
+ def allocate(cpus = nil, memory = nil, &block)
15
+ RbbtSemaphore.synchronize(@semaphore) do
16
+ if (@cpus && cpus && @cpus < cups) ||
17
+ (@memory && memory && @memory < memory)
18
+ raise NotEoughResources
19
+ end
20
+ begin
21
+ @cpus -= cpus
22
+ @memory -= memory
23
+ yield
24
+ rescue
25
+ @cpus += cpus
26
+ @memory += memory
27
+ end
28
+ end
29
+ end
30
+
31
+ def finalize(manager)
32
+ RbbtSemaphore.delete_semaphore(@sem_file)
33
+ end
34
+
35
+ def self.finalize(manager)
36
+ proc { manager.finalize }
37
+ end
38
+ end
39
+
40
+ class Scheduler
41
+ attr_accessor :jobs, :cpus, :dep_jobs, :job_deps, :jobps
42
+ def initialize(jobs, cpus)
43
+ @jobs = jobs
44
+ @cpus = cpus
45
+
46
+ @job_deps = {}
47
+
48
+ with_deps = jobs.dup
49
+ @dep_jobs = {}
50
+ @job_deps = {}
51
+ @jobps = {}
52
+ @missing = Set.new
53
+ while with_deps.any?
54
+ job = with_deps.pop
55
+ @jobps[job.path] = job
56
+ @missing << job.path unless job.done?
57
+
58
+ jdeps = job.dependencies
59
+ jdeps += job.inputs.flatten.select{|i| Step === i}
60
+
61
+ jdeps.reject!{|dep| dep.done? }
62
+ @job_deps[job.path] = []
63
+ jdeps.each do |dep|
64
+ next if dep.done?
65
+ @dep_jobs[dep.path] ||= []
66
+ @job_deps[job.path] << dep.path
67
+ @dep_jobs[dep.path] << job.path
68
+ with_deps << dep unless @job_deps.include? dep.path
69
+ end
70
+ end
71
+
72
+
73
+ def self.ready
74
+ @job_deps.select do |jobp,deps|
75
+ (@missing & deps).empty?
76
+ end.collect{|jobp,deps| jobp}
77
+ end
78
+
79
+ def self.next
80
+ priorities = {}
81
+ @jobs.each do |job|
82
+ priorities = 1
83
+ end
84
+
85
+ @missing.each do |jobp|
86
+ end
87
+
88
+ @dep_jobsb
89
+ end
90
+ end
91
+ end
92
+
93
+ def self._priorities(jobs)
94
+ job_level = {}
95
+ jobs.each do |job|
96
+ job_level[job.path] = 1.0
97
+ end
98
+
99
+ with_deps = jobs.dup
100
+ dep_jobs = {}
101
+ job_deps = {}
102
+ while with_deps.any?
103
+ job = with_deps.pop
104
+ level = job_level[job.path]
105
+ job_deps[job.path] = []
106
+ jdeps = job.dependencies
107
+ jdeps += job.inputs.flatten.select{|i| Step === i}
108
+
109
+ jdeps.reject!{|dep| dep.done? }
110
+ jdeps.each do |dep|
111
+ next if dep.done?
112
+ dep_jobs[dep.path] ||= []
113
+ job_level[dep.path] = level / (10 * jdeps.length) if job_level[dep.path].nil? || job_level[dep.path] < level / (10 * jdeps.length)
114
+ job_deps[job.path] << dep.path
115
+ dep_jobs[dep.path] << job.path
116
+ with_deps << dep unless job_deps.include? dep.path
117
+ end
118
+ end
119
+ [job_level, job_deps, dep_jobs]
120
+ end
121
+
122
+ def self.produce_jobs(jobs, cpus, step_cpus = {})
123
+ require 'fc'
124
+
125
+ step_cpus = IndiferentHash.setup(step_cpus || {})
126
+
127
+ deps = []
128
+
129
+ jobs = [jobs] unless Array === jobs
130
+
131
+ job_level, job_deps, dep_jobs = self._priorities(jobs)
132
+
133
+ jobps = {}
134
+ (jobs + jobs.collect{|job| job.rec_dependencies}).flatten.uniq.each do |job|
135
+ jobps[job.path] = job
136
+ end
137
+
138
+ prio_queue = FastContainers::PriorityQueue.new :max
139
+
140
+ job_deps.each do |jobp,depps|
141
+ next if depps.any?
142
+ level = job_level[jobp]
143
+
144
+ prio_queue.push(jobp, level)
145
+ end
146
+
147
+ queue = RbbtProcessQueue.new cpus
148
+
149
+ missing = job_deps.keys
150
+ queue.callback do |jobp|
151
+ Log.info "Done: #{jobp}"
152
+ missing -= [jobp]
153
+
154
+ job_level, job_deps, dep_jobs = self._priorities(jobs)
155
+
156
+ parentsp = dep_jobs[jobp]
157
+
158
+ parentsp.each do |parentp|
159
+ next unless job_deps[parentp].include? jobp
160
+ job_deps[parentp] -= [jobp]
161
+ if job_deps[parentp].empty?
162
+ level = job_level[parentp]
163
+ prio_queue.push(parentp, level )
164
+ end
165
+ end if parentsp
166
+ prio_queue_new = FastContainers::PriorityQueue.new :max
167
+ while prio_queue.any?
168
+ elem = prio_queue.pop
169
+ prio_queue_new.push(elem, job_level[elem])
170
+ end
171
+ prio_queue = prio_queue_new
172
+ end
173
+
174
+ queue.init do |jobp|
175
+ Log.info "Processing: #{jobp}"
176
+ job = jobps[jobp]
177
+ job_cpus = step_cpus[job.task_name] || 1
178
+ sleep 0.5
179
+ #job.produce
180
+ jobp
181
+ end
182
+
183
+ while missing.any?
184
+ while prio_queue.empty? && missing.any?
185
+ sleep 1
186
+ end
187
+ break if missing.empty?
188
+ jobp = prio_queue.pop
189
+ queue.process jobp
190
+ end
191
+
192
+ queue.join
193
+ end
194
+ end
195
+
196
+
197
+ if __FILE__ == $0
198
+ require 'rbbt/workflow'
199
+
200
+ module TestWF
201
+ extend Workflow
202
+ input :num, :integer
203
+ task :dep => :integer do |num|
204
+ num
205
+ end
206
+ dep :dep, :num => 1
207
+ dep :dep, :num => 2
208
+ dep :dep, :num => 3
209
+ task :test do
210
+ dependencies.collect{|d| d.load.to_s} * ","
211
+ end
212
+ end
213
+ Log.severity = 0
214
+ job = TestWF.job(:test)
215
+ job.recursive_clean
216
+
217
+ Rbbt::Config.load_file Rbbt.etc.config_profile.HTS.find
218
+ Workflow.require_workflow "Sample"
219
+ Workflow.require_workflow "HTS"
220
+
221
+ jobs = []
222
+ jobs << Sample.job(:mutect2, "QUINTANA-15")
223
+ jobs << Sample.job(:mutect2, "QUINTANA-25")
224
+ jobs << Sample.job(:mutect2, "QUINTANA-28")
225
+
226
+ sched = Step::Scheduler.new(jobs, 3)
227
+ end
@@ -298,6 +298,7 @@ class Step
298
298
  pid_file = Step.pid_file path
299
299
  md5_file = Step.md5_file path
300
300
  files_dir = Step.files_dir path
301
+ tmp_path = Step.tmp_path path
301
302
 
302
303
  if ! (Open.writable?(path) && Open.writable?(info_file))
303
304
  Log.warn "Could not clean #{path}: not writable"
@@ -315,6 +316,7 @@ class Step
315
316
  Open.rm path if (Open.exists?(path) or Open.broken_link?(path))
316
317
  Open.rm_rf files_dir if Open.exists?(files_dir)
317
318
  Open.rm pid_file if Open.exists?(pid_file)
319
+ Open.rm tmp_path if Open.exists?(tmp_path)
318
320
  end
319
321
  end
320
322
  end
@@ -180,7 +180,7 @@ compile(){
180
180
  local extra="$@"
181
181
 
182
182
  if [ -f Makefile -o -f makefile ]; then
183
- make || exit -1
183
+ make -j 4 || exit -1
184
184
  make install || echo "No install"
185
185
  fi
186
186
 
@@ -205,7 +205,6 @@ prepare_make(){
205
205
 
206
206
  [ -d src -a ! -e CMakeLists.txt -a ! -e Makefile -a ! -e configure ] && cd src
207
207
 
208
-
209
208
  if [ -f config/m4 ]; then
210
209
  libtoolize --force
211
210
  aclocal
@@ -257,7 +256,9 @@ build_make(){
257
256
  echo "Extra params: $extra"
258
257
  fi
259
258
 
260
- prepare_make $name
259
+ if [ ! -f configure ]; then
260
+ prepare_make $name
261
+ fi
261
262
 
262
263
  if [ -f configure ]; then
263
264
  ./configure --prefix="$(opt_dir "$name")" $extra
@@ -151,6 +151,8 @@ tasks_info.each do |task, info|
151
151
  stats[task] = [calls,avg_time, cpus, spark, shard]
152
152
  end
153
153
 
154
+ raise "No jobs to process" if data.size == 0
155
+
154
156
  start = data.column("Start.second").values.flatten.collect{|v| v.to_i}.min
155
157
  eend = data.column("End.second").values.flatten.collect{|v| v.to_i}.max
156
158
  total = eend - start
@@ -120,6 +120,21 @@ class TestTSVParallelThrough < Test::Unit::TestCase
120
120
 
121
121
  assert_equal array, res
122
122
  end
123
+
124
+ def test_traverse_priority
125
+ require 'fc'
126
+
127
+ queue = FastContainers::PriorityQueue.new(:min)
128
+
129
+ array = []
130
+ 100.times do e = rand(1000).to_i; array << e; queue.push(e,e) end
131
+
132
+ res = TSV.traverse queue, :into => [] do |v|
133
+ v
134
+ end
135
+
136
+ assert_equal array.sort, res
137
+ end
123
138
 
124
139
  def test_traverse_array_threads
125
140
  require 'rbbt/sources/organism'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.26.1
4
+ version: 5.26.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-04-18 00:00:00.000000000 Z
11
+ date: 2019-04-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -325,6 +325,7 @@ files:
325
325
  - lib/rbbt/workflow/examples.rb
326
326
  - lib/rbbt/workflow/prepare.rb
327
327
  - lib/rbbt/workflow/provenance.rb
328
+ - lib/rbbt/workflow/schedule.rb
328
329
  - lib/rbbt/workflow/soap.rb
329
330
  - lib/rbbt/workflow/step.rb
330
331
  - lib/rbbt/workflow/step/dependencies.rb