rbbt-util 5.28.6 → 5.28.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 900106fb1799c857a482187c7cb7d18a40c35dbeb5dd231bf8e1851235e28c63
4
- data.tar.gz: e5ec2271b3ada0b282963f4ffc7e2b623d8bb84fcb72867a6848b93a80052463
3
+ metadata.gz: f13812c850239f7d78c94e74288ea7807663c72146002e13a13aa99babb18555
4
+ data.tar.gz: 3568f646d3095448ac6c8d99f32a5513421c80fd9d8b1f9a049a7ce512f3139d
5
5
  SHA512:
6
- metadata.gz: 1a418d5b4fe4369f7c25dd063c60a65c2b6c0bd7e3dbf9fd156fe875f85ef1b2eeb4d2eb1a03472e191de98bbfbae20abdb86877d381193f86fe4c8325d2f391
7
- data.tar.gz: 5225d6dbdbab08d0ed5239eba91bbe3d2a8dd16c92c24fe177b66ed3b9d4baef4e24818057de8bf694edf715aabd72b481926a2466c76492c27b75be25634b27
6
+ metadata.gz: 644bcc769e818310492993b15a024b83de399d65a0cfc74b9dc06ee233bfd66da3c0951a26899c66ce62a8410104f00b8b84c07f77a4a1159a7fb5149befb5a1
7
+ data.tar.gz: da5fd395014feded588f223686bd032dd86e46cac0e48e4369e9d1ea2e1f0d021e0fab02539ab2be1f483efc5463b9487952783edf2e00f22d7158879b5fd32d
@@ -25,6 +25,7 @@ module Persist
25
25
  MEMORY = {} unless defined? MEMORY
26
26
  MAX_FILE_LENGTH = 150
27
27
 
28
+ # Return non-false if the first file is newer than the second file
28
29
  def self.newer?(path, file)
29
30
  return true if not Open.exists?(file)
30
31
  path = path.find if Path === path
@@ -550,9 +550,17 @@ module TSV
550
550
  "\t" << ([""] * fields.length) * "\t" << "\n"
551
551
  end
552
552
  when Array
553
- "\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
553
+ if fields.nil? or fields.empty?
554
+ "\n"
555
+ else
556
+ "\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
557
+ end
554
558
  else
555
- "\t" << values.to_s << "\n"
559
+ if fields.nil? or fields.empty?
560
+ "\n"
561
+ else
562
+ "\t" << values.to_s << "\n"
563
+ end
556
564
  end
557
565
  end
558
566
 
@@ -32,9 +32,17 @@ module TSV
32
32
  sep + ([""] * fields.length) * sep << "\n"
33
33
  end
34
34
  when Array
35
- sep + (values.collect{|v| Array === v ? v * "|" : v} * sep) << "\n"
35
+ if fields.nil? or fields.empty?
36
+ "\n"
37
+ else
38
+ sep + (values.collect{|v| Array === v ? v * "|" : v} * sep) << "\n"
39
+ end
36
40
  else
37
- sep + values.to_s << "\n"
41
+ if fields.nil? or fields.empty?
42
+ "\n"
43
+ else
44
+ sep + values.to_s << "\n"
45
+ end
38
46
  end
39
47
  end
40
48
 
@@ -624,6 +624,8 @@ module TSV
624
624
  def self.traverse(obj, options = {}, &block)
625
625
  into = options[:into]
626
626
 
627
+ into = options[:into] = Open.open(into, :mode => "w") if Misc.is_filename?(into)
628
+
627
629
  case into
628
630
  when :stream
629
631
  sout = Misc.open_pipe false, false do |sin|
@@ -184,7 +184,11 @@ module TSV
184
184
  str = ""
185
185
  str << preamble.strip << "\n" if preamble and not preamble.empty?
186
186
  if fields
187
- str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
187
+ if fields.empty?
188
+ str << header_hash << (key_field || "ID").to_s << "\n"
189
+ else
190
+ str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
191
+ end
188
192
  end
189
193
 
190
194
  str
@@ -15,7 +15,7 @@ module Rbbt::Config
15
15
  end
16
16
 
17
17
  def self.load_file(file)
18
- Log.debug "Loading file: #{ file }"
18
+ Log.debug "Loading config file: #{ file }"
19
19
  TSV.traverse file, :type => :array do |line|
20
20
  next if line =~ /^#/
21
21
  key, value, *tokens = line.strip.split(/\s/)
@@ -85,6 +85,7 @@ module Rbbt::Config
85
85
  priorities
86
86
  end
87
87
 
88
+ # For equal priorities the matching prioritizes tokens ealier in the list
88
89
  def self.get(key, *tokens)
89
90
  options = tokens.pop if Hash === tokens.last
90
91
  default = options.nil? ? nil : options[:default]
@@ -113,7 +113,7 @@ end
113
113
  end
114
114
 
115
115
  def self.is_filename?(string)
116
- return true if defined? PATH and Path === string
116
+ return true if defined? Path and Path === string
117
117
  return true if string.respond_to? :exists
118
118
  return true if String === string and string.length < 265 and File.exist?(string)
119
119
  return false
@@ -83,7 +83,7 @@ module Workflow
83
83
  self.archive_deps
84
84
  self.dependencies = self.dependencies - [dep]
85
85
  Open.rm_rf self.files_dir if Open.exist? self.files_dir
86
- FileUtils.cp_r dep.files_dir, self.files_dir if Open.exist? dep.files_dir
86
+ FileUtils.cp_r dep.files_dir, self.files_dir if Open.exist?(dep.files_dir)
87
87
  Open.ln_h dep.path, self.tmp_path
88
88
  case remove.to_s
89
89
  when 'true'
@@ -92,8 +92,10 @@ module Workflow
92
92
  dep.recursive_clean
93
93
  end
94
94
  else
95
- Open.rm_rf self.files_dir
96
- Open.link dep.files_dir, self.files_dir
95
+ if Open.exists?(dep.files_dir)
96
+ Open.rm_rf self.files_dir
97
+ Open.link dep.files_dir, self.files_dir
98
+ end
97
99
  Open.link dep.path, self.path
98
100
  end
99
101
  nil
@@ -84,6 +84,7 @@ class Step
84
84
  end
85
85
 
86
86
  def load_dependencies_from_info
87
+ relocated = nil
87
88
  @dependencies = (self.info[:dependencies] || []).collect do |task,name,dep_path|
88
89
  if Open.exists?(dep_path) || Open.exists?(dep_path + '.info')
89
90
  Workflow._load_step dep_path
@@ -411,7 +412,7 @@ class Step
411
412
  return
412
413
  end
413
414
 
414
- if (Open.exists?(path) or Open.broken_link?(path)) or Open.exists?(pid_file) or Open.exists?(info_file) or Open.exists?(files_dir)
415
+ if (Open.exists?(path) or Open.broken_link?(path)) or Open.exists?(pid_file) or Open.exists?(info_file) or Open.exists?(files_dir) or Open.broken_link?(files_dir)
415
416
 
416
417
  @result = nil
417
418
  @pid = nil
@@ -419,8 +420,8 @@ class Step
419
420
  Misc.insist do
420
421
  Open.rm info_file if Open.exists?(info_file)
421
422
  Open.rm md5_file if Open.exists?(md5_file)
422
- Open.rm path if (Open.exists?(path) or Open.broken_link?(path))
423
- Open.rm_rf files_dir if Open.exists?(files_dir)
423
+ Open.rm path if (Open.exists?(path) || Open.broken_link?(path))
424
+ Open.rm_rf files_dir if Open.exists?(files_dir) || Open.broken_link?(files_dir)
424
425
  Open.rm pid_file if Open.exists?(pid_file)
425
426
  Open.rm tmp_path if Open.exists?(tmp_path)
426
427
  end
@@ -262,6 +262,7 @@ puts resource[path].find(search_path)
262
262
 
263
263
  job_files.each do |file|
264
264
  begin
265
+ Log.debug "Purging #{file}"
265
266
  Open.rm_rf file if Open.exists?(file)
266
267
  rescue
267
268
  Log.warn "Could not erase '#{file}': #{$!.message}"
@@ -0,0 +1,190 @@
1
+ require 'rbbt/workflow'
2
+
3
+ module Workflow
4
+ class Orchestrator
5
+
6
+ def self.job_workload(job)
7
+ workload = {job => []}
8
+ return workload if job.done?
9
+
10
+ job.dependencies.each do |dep|
11
+ next if dep.done?
12
+ workload.merge!(job_workload(dep))
13
+ workload[job] += workload[dep]
14
+ workload[job] << dep
15
+ end
16
+
17
+ job.input_dependencies.each do |dep|
18
+ next if dep.done?
19
+ workload.merge!(job_workload(dep))
20
+ workload[job] += workload[dep]
21
+ workload[job] << dep
22
+ end
23
+
24
+ workload
25
+ end
26
+
27
+ def self.job_rules(rules, job)
28
+ workflow = job.workflow.to_s
29
+ task_name = job.task_name.to_s
30
+
31
+ return IndiferentHash.setup(rules["defaults"]) unless rules[workflow]
32
+ return IndiferentHash.setup(rules["defaults"]) unless rules[workflow][task_name]
33
+
34
+ job_rules = IndiferentHash.setup(rules[workflow][task_name])
35
+ rules["defaults"].each{|k,v| job_rules[k] ||= v } if rules["defaults"]
36
+ job_rules
37
+ end
38
+
39
+ def self.purge_duplicates(candidates)
40
+ seen = Set.new
41
+ candidates.select do |job|
42
+ if seen.include? job.path
43
+ false
44
+ else
45
+ seen << job.path
46
+ true
47
+ end
48
+ end
49
+ end
50
+
51
+ def self.job_resources(rules, job)
52
+ resources = (job_rules(rules, job) || {})["resources"] || {}
53
+
54
+ IndiferentHash.setup(resources)
55
+
56
+ default_resources = rules["default_resources"] || rules["defaults"]["resources"]
57
+ default_resources.each{|k,v| resources[k] ||= v } if default_resources
58
+
59
+ resources
60
+ end
61
+
62
+ def self.sort_candidates(candidates, rules)
63
+ seen = Set.new
64
+ candidates.sort_by do |job|
65
+ - job_resources(rules, job).values.inject(0){|acc,e| acc += e}
66
+ end
67
+ end
68
+
69
+ def self.candidates(workload, rules)
70
+ if rules.empty?
71
+ candidates = workload.select{|k,v| v.empty? }.
72
+ collect{|k,v| k}.
73
+ reject{|k| k.done? }
74
+ else
75
+ candidates = workload. #select{|k,v| Orchestrator.job_rules(rules, k) }.
76
+ select{|k,v| v.empty? }.
77
+ collect{|k,v| k }.
78
+ reject{|k| k.done? }
79
+ end
80
+
81
+ top_level = workload.keys - workload.values.flatten
82
+
83
+ candidates = purge_duplicates candidates
84
+ candidates = sort_candidates candidates, rules
85
+
86
+ candidates
87
+ end
88
+
89
+ attr_accessor :available_resources, :resources_requested, :resources_used, :timer
90
+
91
+ def initialize(timer = 5, available_resources = {})
92
+ @timer = timer
93
+ @available_resources = IndiferentHash.setup(available_resources)
94
+ @resources_requested = IndiferentHash.setup({})
95
+ @resources_used = IndiferentHash.setup({})
96
+ end
97
+
98
+ def release_resources(job)
99
+ if resources_used[job]
100
+ resources_used[job].each do |resource,value|
101
+ next if resource == 'size'
102
+ resources_requested[resource] -= value.to_i
103
+ end
104
+ resources_used.delete job
105
+ end
106
+ end
107
+
108
+ def check_resources(rules, job)
109
+ resources = Orchestrator.job_resources(rules, job)
110
+
111
+ limit_resources = resources.select{|resource,value| available_resources[resource] && ((resources_requested[resource] || 0) + value) > available_resources[resource] }.collect{|resource,v| resource }
112
+ if limit_resources.any?
113
+ Log.debug "Orchestrator waiting on #{job.path} due to #{limit_resources * ", "}"
114
+ else
115
+
116
+ resources_used[job] = resources
117
+ resources.each do |resource,value|
118
+ resources_requested[resource] ||= 0
119
+ resources_requested[resource] += value.to_i
120
+ end
121
+ Log.low "Orchestrator producing #{job.path} with resources #{resources}"
122
+
123
+ return yield
124
+ end
125
+ end
126
+
127
+ def run_with_rules(rules, job)
128
+ job_rules = Orchestrator.job_rules(rules, job)
129
+
130
+ Rbbt::Config.with_config do
131
+ job_rules[:config_keys].each do |config|
132
+ Rbbt::Config.process_config config
133
+ end if job_rules && job_rules[:config_keys]
134
+
135
+ log = job_rules[:log] if job_rules
136
+ log = Log.severity if log.nil?
137
+ Log.with_severity log do
138
+ job.produce(false, true)
139
+ end
140
+ end
141
+ end
142
+
143
+ def process(rules, jobs)
144
+ begin
145
+
146
+ workload = jobs.inject({}){|acc,job| acc.merge!(Orchestrator.job_workload(job)) }
147
+
148
+ while workload.values.flatten.any?
149
+
150
+ candidates = resources_used.keys + Orchestrator.candidates(workload, rules)
151
+ raise "No candidates" if candidates.empty?
152
+
153
+ candidates.each do |job|
154
+ case
155
+ when (job.error? || job.aborted?)
156
+ if job.recoverable_error?
157
+ job.clean
158
+ raise TryAgain
159
+ else
160
+ next
161
+ end
162
+ release_resources(job)
163
+ when job.done?
164
+ Log.debug "Orchestrator done #{job.path}"
165
+ release_resources(job)
166
+ raise TryAgain
167
+
168
+ when job.running?
169
+ next
170
+
171
+ else
172
+ check_resources(rules, job) do
173
+ run_with_rules(rules, job)
174
+ end
175
+ end
176
+ end
177
+
178
+ new_workload = {}
179
+ workload.each do |k,v|
180
+ next if k.done?
181
+ new_workload[k] = v.reject{|d| d.done? || (d.error? && ! d.recoverable_error?)}
182
+ end
183
+ sleep timer
184
+ end
185
+ rescue TryAgain
186
+ retry
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,182 @@
1
+ require 'rbbt/util/R'
2
+
3
+ module Workflow
4
+ def self.trace(seed_jobs, options = {})
5
+
6
+ jobs = []
7
+ seed_jobs.each{|j| jobs << j; jobs += j.rec_dependencies}
8
+
9
+ data = TSV.setup({}, "Job~Workflow,Task,Start,End#:type=:list")
10
+ min_start = nil
11
+ max_done = nil
12
+ jobs.each do |job|
13
+ next unless job.info[:done]
14
+ started = job.info[:started]
15
+ ddone = job.info[:done]
16
+
17
+ code = [job.workflow, job.task_name].compact.collect{|s| s.to_s} * "."
18
+ code = code + '.' + job.name
19
+
20
+ data[code] = [job.workflow.to_s, job.task_name, started, ddone]
21
+ if min_start.nil?
22
+ min_start = started
23
+ else
24
+ min_start = started if started < min_start
25
+ end
26
+
27
+ if max_done.nil?
28
+ max_done = ddone
29
+ else
30
+ max_done = ddone if ddone > max_done
31
+ end
32
+ end
33
+
34
+ data.add_field "Start.second" do |k,value|
35
+ value["Start"] - min_start
36
+ end
37
+
38
+ data.add_field "End.second" do |k,value|
39
+ value["End"] - min_start
40
+ end
41
+
42
+ if options[:fix_gap]
43
+ ranges = []
44
+ data.through do |k,values|
45
+ start, eend = values.values_at "Start.second", "End.second"
46
+
47
+ ranges << (start..eend)
48
+ end
49
+
50
+ gaps = {}
51
+ last = nil
52
+ Misc.collapse_ranges(ranges).each do |range|
53
+ start = range.begin
54
+ eend = range.end
55
+ if last
56
+ gaps[last] = start - last
57
+ end
58
+ last = eend
59
+ end
60
+
61
+ data.process "End.second" do |value,k,values|
62
+ gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
63
+ value - gap
64
+ end
65
+
66
+ data.process "Start.second" do |value,k,values|
67
+ gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
68
+ value - gap
69
+ end
70
+ end
71
+
72
+ tasks_info = {}
73
+
74
+ jobs.each do |dep|
75
+ next unless dep.info[:done]
76
+ task = [dep.workflow, dep.task_name].compact.collect{|s| s.to_s} * "#"
77
+ info = tasks_info[task] ||= {}
78
+
79
+ time = dep.info[:done] - dep.info[:started]
80
+ info[:time] ||= []
81
+ info[:time] << time
82
+
83
+ cpus = nil
84
+ spark = false
85
+ shard = false
86
+ dep.info[:config_keys].select do |kinfo|
87
+ key, value, tokens = kinfo
88
+ key = key.to_s
89
+ cpus = value if key.include? 'cpu'
90
+ spark = value if key == 'spark'
91
+ shard = value if key == 'shard'
92
+ end
93
+
94
+ info[:cpus] = cpus || 1
95
+ info[:spark] = spark
96
+ info[:shard] = shard
97
+ end
98
+
99
+ stats = TSV.setup({}, "Task~Calls,Avg. Time,Total Time,Cpus,Spark,Shard#:type=:list")
100
+
101
+ tasks_info.each do |task, info|
102
+ time_lists, cpus, spark, shard = info.values_at :time, :cpus, :spark, :shard
103
+ avg_time = Misc.mean(time_lists)
104
+ total_time = Misc.sum(time_lists)
105
+ calls = time_lists.length
106
+ stats[task] = [calls, avg_time, total_time, cpus, spark, shard]
107
+ end
108
+
109
+ raise "No jobs to process" if data.size == 0
110
+
111
+ start = data.column("Start.second").values.flatten.collect{|v| v.to_f}.min
112
+ eend = data.column("End.second").values.flatten.collect{|v| v.to_f}.max
113
+ total = eend - start
114
+ Log.info "Total time elapsed: #{total} seconds"
115
+
116
+ if options[:fix_gap]
117
+ total_gaps = Misc.sum(gaps.collect{|k,v| v})
118
+ Log.info "Total gaps: #{total_gaps} seconds"
119
+ end
120
+
121
+ plot, width, height = options.values_at :plot, :width, :height
122
+ if plot
123
+ data.R <<-EOF, [:svg]
124
+ rbbt.require('tidyverse')
125
+ rbbt.require('ggplot2')
126
+
127
+ names(data) <- make.names(names(data))
128
+ data$id = rownames(data)
129
+ data$content = data$Task
130
+ data$start = data$Start
131
+ data$end = data$End
132
+ data$Project = data$Workflow
133
+
134
+ tasks = data
135
+
136
+ #theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") {
137
+ theme_gantt <- function(base_size=11, base_family="Sans Serif") {
138
+ ret <- theme_bw(base_size, base_family) %+replace%
139
+ theme(panel.background = element_rect(fill="#ffffff", colour=NA),
140
+ axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
141
+ title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
142
+ panel.border = element_blank(), axis.line=element_blank(),
143
+ panel.grid.minor=element_blank(),
144
+ panel.grid.major.y = element_blank(),
145
+ panel.grid.major.x = element_line(size=0.5, colour="grey80"),
146
+ axis.ticks=element_blank(),
147
+ legend.position="bottom",
148
+ axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
149
+ strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
150
+ strip.background=element_rect(fill="#ffffff", colour=NA),
151
+ panel.spacing.y=unit(1.5, "lines"),
152
+ legend.key = element_blank())
153
+
154
+ ret
155
+ }
156
+
157
+ tasks.long <- tasks %>%
158
+ gather(date.type, task.date, -c(Project, Task, id, Start.second, End.second)) %>%
159
+ arrange(date.type, task.date) %>%
160
+ mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
161
+
162
+ x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
163
+
164
+ timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
165
+ geom_segment() +
166
+ geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
167
+ guides(colour=guide_legend(title=NULL)) +
168
+ labs(x=NULL, y=NULL) +
169
+ theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
170
+
171
+ rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
172
+ EOF
173
+ end
174
+
175
+ if options[:plot_data]
176
+ data
177
+ else
178
+ stats
179
+ end
180
+
181
+ end
182
+ end
@@ -3,7 +3,6 @@
3
3
  require 'rbbt-util'
4
4
  require 'rbbt/util/simpleopt'
5
5
  require 'rbbt/workflow'
6
- require 'rbbt/workflow/remote/ssh/get'
7
6
 
8
7
  $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
9
8
 
@@ -14,8 +13,7 @@ Remove a job and its dependencies
14
13
  $ rbbt purge [options] <job_path>
15
14
 
16
15
  -h--help Print this help
17
- -t--test Do a verbose dry run
18
- -r--relocate Include relocated dependencies
16
+ -r--recursive Remove recursively
19
17
 
20
18
  EOF
21
19
  if options[:help]
@@ -31,4 +29,4 @@ path, search_path, _sep, *other = ARGV
31
29
 
32
30
  raise ParameterException, "No path given" if path.nil?
33
31
 
34
- Step.purge(path, options[:relocate])
32
+ Step.purge(path, options[:recursive])
@@ -201,7 +201,7 @@ workflows.sort.each do |workflow,tasks|
201
201
  Step::INFO_SERIALIZER.load(f)
202
202
  end
203
203
  rescue
204
- Log.exception $!
204
+ #Log.exception $!
205
205
  {:status => :noinfo}
206
206
  end
207
207
  IndiferentHash.setup(info)
@@ -392,6 +392,20 @@ class TestTSVParallelThrough < Test::Unit::TestCase
392
392
  assert_equal size, stream.read.split("\n").length
393
393
  end
394
394
 
395
+ def test_traverse_into_path
396
+ size = 100
397
+ array = (1..size).to_a.collect{|n| n.to_s}
398
+ TmpFile.with_file do |tmpfile|
399
+ Path.setup(tmpfile)
400
+ io = TSV.traverse array, :into => tmpfile do |e|
401
+ e
402
+ end
403
+ io.join
404
+ assert_equal size, Open.read(tmpfile).split("\n").length
405
+ end
406
+ end
407
+
408
+
395
409
  def test_traverse_progress
396
410
  size = 1000
397
411
  array = (1..size).to_a.collect{|n| n.to_s}
@@ -117,6 +117,26 @@ row2 A B C
117
117
  end
118
118
  end
119
119
 
120
+ def test_slice_empty
121
+ content =<<-EOF
122
+ #ID ValueA ValueB Comment
123
+ row1 a b c
124
+ row2 A B C
125
+ EOF
126
+
127
+ TmpFile.with_file(content) do |filename|
128
+ tsv = TSV.open(File.open(filename), :type => :list, :sep => /\s/)
129
+ tsv = tsv.slice []
130
+ assert tsv.fields.empty?
131
+ TmpFile.with_file do |tmpfile|
132
+ iii tsv.to_s
133
+ Open.write(tmpfile, tsv.to_s)
134
+ tsv = TSV.open tmpfile
135
+ assert tsv.fields.empty?
136
+ end
137
+ end
138
+ end
139
+
120
140
  def test_select
121
141
  content =<<-EOF
122
142
  #Id ValueA ValueB OtherID
File without changes
@@ -0,0 +1,136 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/workflow/util/orchestrator'
3
+ require 'rbbt/workflow/util/trace'
4
+ require 'rbbt-util'
5
+ require 'rbbt/workflow'
6
+
7
+ module TestWF
8
+ extend Workflow
9
+
10
+ MULT = 0.1
11
+ task :a => :text do
12
+ sleep(TestWF::MULT * (rand(10) + 2))
13
+ end
14
+
15
+ dep :a
16
+ task :b => :text do
17
+ sleep(TestWF::MULT * (rand(10) + 2))
18
+ end
19
+
20
+ dep :b
21
+ task :c => :text do
22
+ sleep(TestWF::MULT * (rand(10) + 2))
23
+ end
24
+
25
+ dep :c
26
+ task :d => :text do
27
+ sleep(TestWF::MULT * (rand(10) + 2))
28
+ end
29
+ end
30
+
31
+ class TestClass < Test::Unit::TestCase
32
+ def _test_orchestrate
33
+
34
+ jobs =[]
35
+
36
+ num = 10
37
+ num.times do |i|
38
+ jobs.concat %w(test1 _test2).collect{|name| TestWF.job(:d, name + " #{i}") }
39
+ end
40
+ jobs.each do |j| j.recursive_clean end
41
+
42
+ rules = YAML.load <<-EOF
43
+ defaults:
44
+ log: 4
45
+ default_resources:
46
+ IO: 1
47
+ TestWF:
48
+ a:
49
+ resources:
50
+ cpus: 7
51
+ b:
52
+ resources:
53
+ cpus: 2
54
+ c:
55
+ resources:
56
+ cpus: 10
57
+ d:
58
+ resources:
59
+ cpus: 15
60
+ EOF
61
+
62
+ orchestrator = Workflow::Orchestrator.new(TestWF::MULT, "cpus" => 30, "IO" => 4, "size" => 10 )
63
+ Log.with_severity 0 do
64
+ orchestrator.process(rules, jobs)
65
+ end
66
+
67
+ data = Workflow.trace jobs, :plot_data => true
68
+ eend = data.column("End.second").values.collect{|v| v.to_f}.max
69
+ second_cpus = TSV.setup({}, "Second~CPUS#:type=:single#:cast=:to_f")
70
+ (0..eend.to_i).each do |second|
71
+ tasks = data.select("Start.second"){|s| s <= second}.select("End.second"){|s| s > second}
72
+ cpus = 0
73
+ tasks.through :key, ["Workflow", "Task"] do |k, values|
74
+ workflow, task = values
75
+ cpus += rules[workflow][task.to_s]["resources"]["cpus"]
76
+ end
77
+ second_cpus[second] = cpus
78
+ end
79
+
80
+ assert Misc.mean(second_cpus.values) > 15
81
+ assert Misc.mean(second_cpus.values) < 30
82
+ end
83
+
84
+ def test_orchestrate_size
85
+
86
+ jobs =[]
87
+
88
+ num = 10
89
+ num.times do |i|
90
+ jobs.concat %w(test1 _test2).collect{|name| TestWF.job(:d, name + " #{i}") }
91
+ end
92
+ jobs.each do |j| j.recursive_clean end
93
+
94
+ rules = YAML.load <<-EOF
95
+ defaults:
96
+ log: 4
97
+ default_resources:
98
+ IO: 1
99
+ TestWF:
100
+ a:
101
+ resources:
102
+ cpus: 7
103
+ b:
104
+ resources:
105
+ cpus: 2
106
+ c:
107
+ resources:
108
+ cpus: 10
109
+ d:
110
+ resources:
111
+ cpus: 15
112
+ EOF
113
+
114
+ orchestrator = Workflow::Orchestrator.new(TestWF::MULT, "cpus" => 30, "IO" => 4, "size" => 10 )
115
+ Log.with_severity 0 do
116
+ orchestrator.process(rules, jobs)
117
+ end
118
+
119
+ data = Workflow.trace jobs, :plot_data => true
120
+ eend = data.column("End.second").values.collect{|v| v.to_f}.max
121
+ second_cpus = TSV.setup({}, "Second~CPUS#:type=:single#:cast=:to_f")
122
+ (0..eend.to_i).each do |second|
123
+ tasks = data.select("Start.second"){|s| s <= second}.select("End.second"){|s| s > second}
124
+ cpus = 0
125
+ tasks.through :key, ["Workflow", "Task"] do |k, values|
126
+ workflow, task = values
127
+ cpus += rules[workflow][task.to_s]["resources"]["cpus"]
128
+ end
129
+ second_cpus[second] = cpus
130
+ end
131
+
132
+ assert Misc.mean(second_cpus.values) > 15
133
+ assert Misc.mean(second_cpus.values) < 30
134
+ end
135
+ end
136
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.28.6
4
+ version: 5.28.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-15 00:00:00.000000000 Z
11
+ date: 2020-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -327,7 +327,6 @@ files:
327
327
  - lib/rbbt/workflow/remote_workflow/remote_step.rb
328
328
  - lib/rbbt/workflow/remote_workflow/remote_step/rest.rb
329
329
  - lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb
330
- - lib/rbbt/workflow/schedule.rb
331
330
  - lib/rbbt/workflow/soap.rb
332
331
  - lib/rbbt/workflow/step.rb
333
332
  - lib/rbbt/workflow/step/accessor.rb
@@ -337,7 +336,9 @@ files:
337
336
  - lib/rbbt/workflow/task.rb
338
337
  - lib/rbbt/workflow/usage.rb
339
338
  - lib/rbbt/workflow/util/archive.rb
339
+ - lib/rbbt/workflow/util/orchestrator.rb
340
340
  - lib/rbbt/workflow/util/provenance.rb
341
+ - lib/rbbt/workflow/util/trace.rb
341
342
  - share/Rlib/plot.R
342
343
  - share/Rlib/svg.R
343
344
  - share/Rlib/util.R
@@ -516,8 +517,10 @@ files:
516
517
  - test/rbbt/workflow/step/test_dependencies.rb
517
518
  - test/rbbt/workflow/test_doc.rb
518
519
  - test/rbbt/workflow/test_remote_workflow.rb
520
+ - test/rbbt/workflow/test_schedule.rb
519
521
  - test/rbbt/workflow/test_step.rb
520
522
  - test/rbbt/workflow/test_task.rb
523
+ - test/rbbt/workflow/util/test_orchestrator.rb
521
524
  - test/test_helper.rb
522
525
  homepage: http://github.com/mikisvaz/rbbt-util
523
526
  licenses:
@@ -545,7 +548,9 @@ summary: Utilities for the Ruby Bioinformatics Toolkit (rbbt)
545
548
  test_files:
546
549
  - test/rbbt/test_entity.rb
547
550
  - test/rbbt/workflow/test_remote_workflow.rb
551
+ - test/rbbt/workflow/util/test_orchestrator.rb
548
552
  - test/rbbt/workflow/test_doc.rb
553
+ - test/rbbt/workflow/test_schedule.rb
549
554
  - test/rbbt/workflow/test_step.rb
550
555
  - test/rbbt/workflow/remote/test_client.rb
551
556
  - test/rbbt/workflow/step/test_dependencies.rb
@@ -1,238 +0,0 @@
1
- class Step
2
- class ResourceManager
3
- class NotEnoughResources < Exception
4
- end
5
-
6
- attr_accessor :cpus, :memory
7
- def initialize(cpus = nil, memory = nil)
8
- @cpus = cpus
9
- @memory = memory
10
- @sem_file = "ResourceManager-" + rand(10000).to_s
11
- @semaphore = RbbtSemaphore.create_semaphore(@sem_file, 1)
12
- end
13
-
14
- def allocate(cpus = nil, memory = nil, &block)
15
- RbbtSemaphore.synchronize(@semaphore) do
16
- if (@cpus && cpus && @cpus < cups) ||
17
- (@memory && memory && @memory < memory)
18
- raise NotEnoughResources
19
- end
20
- begin
21
- @cpus -= cpus
22
- @memory -= memory
23
- yield
24
- rescue
25
- @cpus += cpus
26
- @memory += memory
27
- end
28
- end
29
- end
30
-
31
- def finalize(manager)
32
- RbbtSemaphore.delete_semaphore(@sem_file)
33
- end
34
-
35
- def self.finalize(manager)
36
- proc { manager.finalize }
37
- end
38
- end
39
-
40
- class Scheduler
41
- attr_accessor :jobs, :cpus, :dep_jobs, :job_deps, :jobps
42
- def initialize(jobs, cpus)
43
- @jobs = jobs
44
- @cpus = cpus
45
-
46
- @job_deps = {}
47
-
48
- with_deps = jobs.dup
49
- @dep_jobs = {}
50
- @job_deps = {}
51
- @jobps = {}
52
- @missing = Set.new
53
- while with_deps.any?
54
- job = with_deps.pop
55
- @jobps[job.path] = job
56
- @missing << job.path unless job.done?
57
-
58
- jdeps = job.dependencies
59
- jdeps += job.inputs.flatten.select{|i| Step === i}
60
-
61
- jdeps.reject!{|dep| dep.done? }
62
- @job_deps[job.path] = []
63
- jdeps.each do |dep|
64
- #next if dep.done?
65
- @dep_jobs[dep.path] ||= []
66
- @job_deps[job.path] << dep.path
67
- @dep_jobs[dep.path] << job.path
68
- with_deps << dep unless @job_deps.include? dep.path
69
- end
70
- end
71
-
72
- def ready
73
- @job_deps.select do |jobp,deps|
74
- (@missing & deps).empty?
75
- end.collect{|jobp,deps| jobp}
76
- end
77
-
78
- def used
79
- iii @dep_jobs
80
- @dep_jobs.select do |dep,jobs|
81
- iif [dep, @missing.to_a, jobs]
82
- (@missing & jobs).empty?
83
- end.keys
84
- end
85
-
86
- def next
87
- priorities = {}
88
- @jobs.each do |job|
89
- priorities[job.path] = 1
90
- end
91
-
92
- @missing.each do |jobp|
93
- end
94
-
95
- ready.first
96
- end
97
- end
98
- end
99
-
100
- #def self._priorities(jobs)
101
- # job_level = {}
102
- # jobs.each do |job|
103
- # job_level[job.path] = 1.0
104
- # end
105
-
106
- # with_deps = jobs.dup
107
- # dep_jobs = {}
108
- # job_deps = {}
109
- # while with_deps.any?
110
- # job = with_deps.pop
111
- # level = job_level[job.path]
112
- # job_deps[job.path] = []
113
- # jdeps = job.dependencies
114
- # jdeps += job.inputs.flatten.select{|i| Step === i}
115
-
116
- # jdeps.reject!{|dep| dep.done? }
117
- # jdeps.each do |dep|
118
- # next if dep.done?
119
- # dep_jobs[dep.path] ||= []
120
- # job_level[dep.path] = level / (10 * jdeps.length) if job_level[dep.path].nil? || job_level[dep.path] < level / (10 * jdeps.length)
121
- # job_deps[job.path] << dep.path
122
- # dep_jobs[dep.path] << job.path
123
- # with_deps << dep unless job_deps.include? dep.path
124
- # end
125
- # end
126
- # [job_level, job_deps, dep_jobs]
127
- #end
128
-
129
- #def self.produce_jobs(jobs, cpus, step_cpus = {})
130
- # require 'fc'
131
-
132
- # step_cpus = IndiferentHash.setup(step_cpus || {})
133
-
134
- # deps = []
135
-
136
- # jobs = [jobs] unless Array === jobs
137
-
138
- # job_level, job_deps, dep_jobs = self._priorities(jobs)
139
-
140
- # jobps = {}
141
- # (jobs + jobs.collect{|job| job.rec_dependencies}).flatten.uniq.each do |job|
142
- # jobps[job.path] = job
143
- # end
144
-
145
- # prio_queue = FastContainers::PriorityQueue.new :max
146
-
147
- # job_deps.each do |jobp,depps|
148
- # next if depps.any?
149
- # level = job_level[jobp]
150
-
151
- # prio_queue.push(jobp, level)
152
- # end
153
-
154
- # queue = RbbtProcessQueue.new cpus
155
-
156
- # missing = job_deps.keys
157
- # queue.callback do |jobp|
158
- # Log.info "Done: #{jobp}"
159
- # missing -= [jobp]
160
-
161
- # job_level, job_deps, dep_jobs = self._priorities(jobs)
162
-
163
- # parentsp = dep_jobs[jobp]
164
-
165
- # parentsp.each do |parentp|
166
- # next unless job_deps[parentp].include? jobp
167
- # job_deps[parentp] -= [jobp]
168
- # if job_deps[parentp].empty?
169
- # level = job_level[parentp]
170
- # prio_queue.push(parentp, level )
171
- # end
172
- # end if parentsp
173
- # prio_queue_new = FastContainers::PriorityQueue.new :max
174
- # while prio_queue.any?
175
- # elem = prio_queue.pop
176
- # prio_queue_new.push(elem, job_level[elem])
177
- # end
178
- # prio_queue = prio_queue_new
179
- # end
180
- #
181
- # queue.init do |jobp|
182
- # Log.info "Processing: #{jobp}"
183
- # job = jobps[jobp]
184
- # job_cpus = step_cpus[job.task_name] || 1
185
- # sleep 0.5
186
- # #job.produce
187
- # jobp
188
- # end
189
-
190
- # while missing.any?
191
- # while prio_queue.empty? && missing.any?
192
- # sleep 1
193
- # end
194
- # break if missing.empty?
195
- # jobp = prio_queue.pop
196
- # queue.process jobp
197
- # end
198
-
199
- # queue.join
200
- #end
201
- end
202
-
203
-
204
- if __FILE__ == $0
205
- require 'rbbt/workflow'
206
-
207
- module TestWF
208
- extend Workflow
209
- input :num, :integer
210
- task :dep => :integer do |num|
211
- num
212
- end
213
- dep :dep, :num => 1
214
- dep :dep, :num => 2
215
- dep :dep, :num => 3
216
- task :test do
217
- dependencies.collect{|d| d.load.to_s} * ","
218
- end
219
- end
220
- Log.severity = 0
221
- job = TestWF.job(:test)
222
- job.recursive_clean
223
-
224
- Rbbt::Config.load_file Rbbt.etc.config_profile.HTS.find
225
- Workflow.require_workflow "Sample"
226
- Workflow.require_workflow "HTS"
227
-
228
- jobs = []
229
- # jobs << Sample.job(:mutect2, "QUINTANA-15")
230
- # jobs << Sample.job(:mutect2, "QUINTANA-25")
231
- # jobs << Sample.job(:mutect2, "QUINTANA-22")
232
- jobs << Sample.job(:mutect2, "small")
233
-
234
- sched = Step::Scheduler.new(jobs, 3)
235
- iii sched.ready
236
- iii sched.used
237
- iii sched.next
238
- end