rbbt-util 5.28.6 → 5.28.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 900106fb1799c857a482187c7cb7d18a40c35dbeb5dd231bf8e1851235e28c63
4
- data.tar.gz: e5ec2271b3ada0b282963f4ffc7e2b623d8bb84fcb72867a6848b93a80052463
3
+ metadata.gz: f13812c850239f7d78c94e74288ea7807663c72146002e13a13aa99babb18555
4
+ data.tar.gz: 3568f646d3095448ac6c8d99f32a5513421c80fd9d8b1f9a049a7ce512f3139d
5
5
  SHA512:
6
- metadata.gz: 1a418d5b4fe4369f7c25dd063c60a65c2b6c0bd7e3dbf9fd156fe875f85ef1b2eeb4d2eb1a03472e191de98bbfbae20abdb86877d381193f86fe4c8325d2f391
7
- data.tar.gz: 5225d6dbdbab08d0ed5239eba91bbe3d2a8dd16c92c24fe177b66ed3b9d4baef4e24818057de8bf694edf715aabd72b481926a2466c76492c27b75be25634b27
6
+ metadata.gz: 644bcc769e818310492993b15a024b83de399d65a0cfc74b9dc06ee233bfd66da3c0951a26899c66ce62a8410104f00b8b84c07f77a4a1159a7fb5149befb5a1
7
+ data.tar.gz: da5fd395014feded588f223686bd032dd86e46cac0e48e4369e9d1ea2e1f0d021e0fab02539ab2be1f483efc5463b9487952783edf2e00f22d7158879b5fd32d
@@ -25,6 +25,7 @@ module Persist
25
25
  MEMORY = {} unless defined? MEMORY
26
26
  MAX_FILE_LENGTH = 150
27
27
 
28
+ # Return non-false if the first file is newer than the second file
28
29
  def self.newer?(path, file)
29
30
  return true if not Open.exists?(file)
30
31
  path = path.find if Path === path
@@ -550,9 +550,17 @@ module TSV
550
550
  "\t" << ([""] * fields.length) * "\t" << "\n"
551
551
  end
552
552
  when Array
553
- "\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
553
+ if fields.nil? or fields.empty?
554
+ "\n"
555
+ else
556
+ "\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
557
+ end
554
558
  else
555
- "\t" << values.to_s << "\n"
559
+ if fields.nil? or fields.empty?
560
+ "\n"
561
+ else
562
+ "\t" << values.to_s << "\n"
563
+ end
556
564
  end
557
565
  end
558
566
 
@@ -32,9 +32,17 @@ module TSV
32
32
  sep + ([""] * fields.length) * sep << "\n"
33
33
  end
34
34
  when Array
35
- sep + (values.collect{|v| Array === v ? v * "|" : v} * sep) << "\n"
35
+ if fields.nil? or fields.empty?
36
+ "\n"
37
+ else
38
+ sep + (values.collect{|v| Array === v ? v * "|" : v} * sep) << "\n"
39
+ end
36
40
  else
37
- sep + values.to_s << "\n"
41
+ if fields.nil? or fields.empty?
42
+ "\n"
43
+ else
44
+ sep + values.to_s << "\n"
45
+ end
38
46
  end
39
47
  end
40
48
 
@@ -624,6 +624,8 @@ module TSV
624
624
  def self.traverse(obj, options = {}, &block)
625
625
  into = options[:into]
626
626
 
627
+ into = options[:into] = Open.open(into, :mode => "w") if Misc.is_filename?(into)
628
+
627
629
  case into
628
630
  when :stream
629
631
  sout = Misc.open_pipe false, false do |sin|
@@ -184,7 +184,11 @@ module TSV
184
184
  str = ""
185
185
  str << preamble.strip << "\n" if preamble and not preamble.empty?
186
186
  if fields
187
- str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
187
+ if fields.empty?
188
+ str << header_hash << (key_field || "ID").to_s << "\n"
189
+ else
190
+ str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
191
+ end
188
192
  end
189
193
 
190
194
  str
@@ -15,7 +15,7 @@ module Rbbt::Config
15
15
  end
16
16
 
17
17
  def self.load_file(file)
18
- Log.debug "Loading file: #{ file }"
18
+ Log.debug "Loading config file: #{ file }"
19
19
  TSV.traverse file, :type => :array do |line|
20
20
  next if line =~ /^#/
21
21
  key, value, *tokens = line.strip.split(/\s/)
@@ -85,6 +85,7 @@ module Rbbt::Config
85
85
  priorities
86
86
  end
87
87
 
88
+ # For equal priorities the matching prioritizes tokens ealier in the list
88
89
  def self.get(key, *tokens)
89
90
  options = tokens.pop if Hash === tokens.last
90
91
  default = options.nil? ? nil : options[:default]
@@ -113,7 +113,7 @@ end
113
113
  end
114
114
 
115
115
  def self.is_filename?(string)
116
- return true if defined? PATH and Path === string
116
+ return true if defined? Path and Path === string
117
117
  return true if string.respond_to? :exists
118
118
  return true if String === string and string.length < 265 and File.exist?(string)
119
119
  return false
@@ -83,7 +83,7 @@ module Workflow
83
83
  self.archive_deps
84
84
  self.dependencies = self.dependencies - [dep]
85
85
  Open.rm_rf self.files_dir if Open.exist? self.files_dir
86
- FileUtils.cp_r dep.files_dir, self.files_dir if Open.exist? dep.files_dir
86
+ FileUtils.cp_r dep.files_dir, self.files_dir if Open.exist?(dep.files_dir)
87
87
  Open.ln_h dep.path, self.tmp_path
88
88
  case remove.to_s
89
89
  when 'true'
@@ -92,8 +92,10 @@ module Workflow
92
92
  dep.recursive_clean
93
93
  end
94
94
  else
95
- Open.rm_rf self.files_dir
96
- Open.link dep.files_dir, self.files_dir
95
+ if Open.exists?(dep.files_dir)
96
+ Open.rm_rf self.files_dir
97
+ Open.link dep.files_dir, self.files_dir
98
+ end
97
99
  Open.link dep.path, self.path
98
100
  end
99
101
  nil
@@ -84,6 +84,7 @@ class Step
84
84
  end
85
85
 
86
86
  def load_dependencies_from_info
87
+ relocated = nil
87
88
  @dependencies = (self.info[:dependencies] || []).collect do |task,name,dep_path|
88
89
  if Open.exists?(dep_path) || Open.exists?(dep_path + '.info')
89
90
  Workflow._load_step dep_path
@@ -411,7 +412,7 @@ class Step
411
412
  return
412
413
  end
413
414
 
414
- if (Open.exists?(path) or Open.broken_link?(path)) or Open.exists?(pid_file) or Open.exists?(info_file) or Open.exists?(files_dir)
415
+ if (Open.exists?(path) or Open.broken_link?(path)) or Open.exists?(pid_file) or Open.exists?(info_file) or Open.exists?(files_dir) or Open.broken_link?(files_dir)
415
416
 
416
417
  @result = nil
417
418
  @pid = nil
@@ -419,8 +420,8 @@ class Step
419
420
  Misc.insist do
420
421
  Open.rm info_file if Open.exists?(info_file)
421
422
  Open.rm md5_file if Open.exists?(md5_file)
422
- Open.rm path if (Open.exists?(path) or Open.broken_link?(path))
423
- Open.rm_rf files_dir if Open.exists?(files_dir)
423
+ Open.rm path if (Open.exists?(path) || Open.broken_link?(path))
424
+ Open.rm_rf files_dir if Open.exists?(files_dir) || Open.broken_link?(files_dir)
424
425
  Open.rm pid_file if Open.exists?(pid_file)
425
426
  Open.rm tmp_path if Open.exists?(tmp_path)
426
427
  end
@@ -262,6 +262,7 @@ puts resource[path].find(search_path)
262
262
 
263
263
  job_files.each do |file|
264
264
  begin
265
+ Log.debug "Purging #{file}"
265
266
  Open.rm_rf file if Open.exists?(file)
266
267
  rescue
267
268
  Log.warn "Could not erase '#{file}': #{$!.message}"
@@ -0,0 +1,190 @@
1
+ require 'rbbt/workflow'
2
+
3
+ module Workflow
4
+ class Orchestrator
5
+
6
+ def self.job_workload(job)
7
+ workload = {job => []}
8
+ return workload if job.done?
9
+
10
+ job.dependencies.each do |dep|
11
+ next if dep.done?
12
+ workload.merge!(job_workload(dep))
13
+ workload[job] += workload[dep]
14
+ workload[job] << dep
15
+ end
16
+
17
+ job.input_dependencies.each do |dep|
18
+ next if dep.done?
19
+ workload.merge!(job_workload(dep))
20
+ workload[job] += workload[dep]
21
+ workload[job] << dep
22
+ end
23
+
24
+ workload
25
+ end
26
+
27
+ def self.job_rules(rules, job)
28
+ workflow = job.workflow.to_s
29
+ task_name = job.task_name.to_s
30
+
31
+ return IndiferentHash.setup(rules["defaults"]) unless rules[workflow]
32
+ return IndiferentHash.setup(rules["defaults"]) unless rules[workflow][task_name]
33
+
34
+ job_rules = IndiferentHash.setup(rules[workflow][task_name])
35
+ rules["defaults"].each{|k,v| job_rules[k] ||= v } if rules["defaults"]
36
+ job_rules
37
+ end
38
+
39
+ def self.purge_duplicates(candidates)
40
+ seen = Set.new
41
+ candidates.select do |job|
42
+ if seen.include? job.path
43
+ false
44
+ else
45
+ seen << job.path
46
+ true
47
+ end
48
+ end
49
+ end
50
+
51
+ def self.job_resources(rules, job)
52
+ resources = (job_rules(rules, job) || {})["resources"] || {}
53
+
54
+ IndiferentHash.setup(resources)
55
+
56
+ default_resources = rules["default_resources"] || rules["defaults"]["resources"]
57
+ default_resources.each{|k,v| resources[k] ||= v } if default_resources
58
+
59
+ resources
60
+ end
61
+
62
+ def self.sort_candidates(candidates, rules)
63
+ seen = Set.new
64
+ candidates.sort_by do |job|
65
+ - job_resources(rules, job).values.inject(0){|acc,e| acc += e}
66
+ end
67
+ end
68
+
69
+ def self.candidates(workload, rules)
70
+ if rules.empty?
71
+ candidates = workload.select{|k,v| v.empty? }.
72
+ collect{|k,v| k}.
73
+ reject{|k| k.done? }
74
+ else
75
+ candidates = workload. #select{|k,v| Orchestrator.job_rules(rules, k) }.
76
+ select{|k,v| v.empty? }.
77
+ collect{|k,v| k }.
78
+ reject{|k| k.done? }
79
+ end
80
+
81
+ top_level = workload.keys - workload.values.flatten
82
+
83
+ candidates = purge_duplicates candidates
84
+ candidates = sort_candidates candidates, rules
85
+
86
+ candidates
87
+ end
88
+
89
+ attr_accessor :available_resources, :resources_requested, :resources_used, :timer
90
+
91
+ def initialize(timer = 5, available_resources = {})
92
+ @timer = timer
93
+ @available_resources = IndiferentHash.setup(available_resources)
94
+ @resources_requested = IndiferentHash.setup({})
95
+ @resources_used = IndiferentHash.setup({})
96
+ end
97
+
98
+ def release_resources(job)
99
+ if resources_used[job]
100
+ resources_used[job].each do |resource,value|
101
+ next if resource == 'size'
102
+ resources_requested[resource] -= value.to_i
103
+ end
104
+ resources_used.delete job
105
+ end
106
+ end
107
+
108
+ def check_resources(rules, job)
109
+ resources = Orchestrator.job_resources(rules, job)
110
+
111
+ limit_resources = resources.select{|resource,value| available_resources[resource] && ((resources_requested[resource] || 0) + value) > available_resources[resource] }.collect{|resource,v| resource }
112
+ if limit_resources.any?
113
+ Log.debug "Orchestrator waiting on #{job.path} due to #{limit_resources * ", "}"
114
+ else
115
+
116
+ resources_used[job] = resources
117
+ resources.each do |resource,value|
118
+ resources_requested[resource] ||= 0
119
+ resources_requested[resource] += value.to_i
120
+ end
121
+ Log.low "Orchestrator producing #{job.path} with resources #{resources}"
122
+
123
+ return yield
124
+ end
125
+ end
126
+
127
+ def run_with_rules(rules, job)
128
+ job_rules = Orchestrator.job_rules(rules, job)
129
+
130
+ Rbbt::Config.with_config do
131
+ job_rules[:config_keys].each do |config|
132
+ Rbbt::Config.process_config config
133
+ end if job_rules && job_rules[:config_keys]
134
+
135
+ log = job_rules[:log] if job_rules
136
+ log = Log.severity if log.nil?
137
+ Log.with_severity log do
138
+ job.produce(false, true)
139
+ end
140
+ end
141
+ end
142
+
143
+ def process(rules, jobs)
144
+ begin
145
+
146
+ workload = jobs.inject({}){|acc,job| acc.merge!(Orchestrator.job_workload(job)) }
147
+
148
+ while workload.values.flatten.any?
149
+
150
+ candidates = resources_used.keys + Orchestrator.candidates(workload, rules)
151
+ raise "No candidates" if candidates.empty?
152
+
153
+ candidates.each do |job|
154
+ case
155
+ when (job.error? || job.aborted?)
156
+ if job.recoverable_error?
157
+ job.clean
158
+ raise TryAgain
159
+ else
160
+ next
161
+ end
162
+ release_resources(job)
163
+ when job.done?
164
+ Log.debug "Orchestrator done #{job.path}"
165
+ release_resources(job)
166
+ raise TryAgain
167
+
168
+ when job.running?
169
+ next
170
+
171
+ else
172
+ check_resources(rules, job) do
173
+ run_with_rules(rules, job)
174
+ end
175
+ end
176
+ end
177
+
178
+ new_workload = {}
179
+ workload.each do |k,v|
180
+ next if k.done?
181
+ new_workload[k] = v.reject{|d| d.done? || (d.error? && ! d.recoverable_error?)}
182
+ end
183
+ sleep timer
184
+ end
185
+ rescue TryAgain
186
+ retry
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,182 @@
1
+ require 'rbbt/util/R'
2
+
3
+ module Workflow
4
+ def self.trace(seed_jobs, options = {})
5
+
6
+ jobs = []
7
+ seed_jobs.each{|j| jobs << j; jobs += j.rec_dependencies}
8
+
9
+ data = TSV.setup({}, "Job~Workflow,Task,Start,End#:type=:list")
10
+ min_start = nil
11
+ max_done = nil
12
+ jobs.each do |job|
13
+ next unless job.info[:done]
14
+ started = job.info[:started]
15
+ ddone = job.info[:done]
16
+
17
+ code = [job.workflow, job.task_name].compact.collect{|s| s.to_s} * "."
18
+ code = code + '.' + job.name
19
+
20
+ data[code] = [job.workflow.to_s, job.task_name, started, ddone]
21
+ if min_start.nil?
22
+ min_start = started
23
+ else
24
+ min_start = started if started < min_start
25
+ end
26
+
27
+ if max_done.nil?
28
+ max_done = ddone
29
+ else
30
+ max_done = ddone if ddone > max_done
31
+ end
32
+ end
33
+
34
+ data.add_field "Start.second" do |k,value|
35
+ value["Start"] - min_start
36
+ end
37
+
38
+ data.add_field "End.second" do |k,value|
39
+ value["End"] - min_start
40
+ end
41
+
42
+ if options[:fix_gap]
43
+ ranges = []
44
+ data.through do |k,values|
45
+ start, eend = values.values_at "Start.second", "End.second"
46
+
47
+ ranges << (start..eend)
48
+ end
49
+
50
+ gaps = {}
51
+ last = nil
52
+ Misc.collapse_ranges(ranges).each do |range|
53
+ start = range.begin
54
+ eend = range.end
55
+ if last
56
+ gaps[last] = start - last
57
+ end
58
+ last = eend
59
+ end
60
+
61
+ data.process "End.second" do |value,k,values|
62
+ gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
63
+ value - gap
64
+ end
65
+
66
+ data.process "Start.second" do |value,k,values|
67
+ gap = Misc.sum(gaps.select{|pos,size| pos < values["Start.second"]}.collect{|pos,size| size})
68
+ value - gap
69
+ end
70
+ end
71
+
72
+ tasks_info = {}
73
+
74
+ jobs.each do |dep|
75
+ next unless dep.info[:done]
76
+ task = [dep.workflow, dep.task_name].compact.collect{|s| s.to_s} * "#"
77
+ info = tasks_info[task] ||= {}
78
+
79
+ time = dep.info[:done] - dep.info[:started]
80
+ info[:time] ||= []
81
+ info[:time] << time
82
+
83
+ cpus = nil
84
+ spark = false
85
+ shard = false
86
+ dep.info[:config_keys].select do |kinfo|
87
+ key, value, tokens = kinfo
88
+ key = key.to_s
89
+ cpus = value if key.include? 'cpu'
90
+ spark = value if key == 'spark'
91
+ shard = value if key == 'shard'
92
+ end
93
+
94
+ info[:cpus] = cpus || 1
95
+ info[:spark] = spark
96
+ info[:shard] = shard
97
+ end
98
+
99
+ stats = TSV.setup({}, "Task~Calls,Avg. Time,Total Time,Cpus,Spark,Shard#:type=:list")
100
+
101
+ tasks_info.each do |task, info|
102
+ time_lists, cpus, spark, shard = info.values_at :time, :cpus, :spark, :shard
103
+ avg_time = Misc.mean(time_lists)
104
+ total_time = Misc.sum(time_lists)
105
+ calls = time_lists.length
106
+ stats[task] = [calls, avg_time, total_time, cpus, spark, shard]
107
+ end
108
+
109
+ raise "No jobs to process" if data.size == 0
110
+
111
+ start = data.column("Start.second").values.flatten.collect{|v| v.to_f}.min
112
+ eend = data.column("End.second").values.flatten.collect{|v| v.to_f}.max
113
+ total = eend - start
114
+ Log.info "Total time elapsed: #{total} seconds"
115
+
116
+ if options[:fix_gap]
117
+ total_gaps = Misc.sum(gaps.collect{|k,v| v})
118
+ Log.info "Total gaps: #{total_gaps} seconds"
119
+ end
120
+
121
+ plot, width, height = options.values_at :plot, :width, :height
122
+ if plot
123
+ data.R <<-EOF, [:svg]
124
+ rbbt.require('tidyverse')
125
+ rbbt.require('ggplot2')
126
+
127
+ names(data) <- make.names(names(data))
128
+ data$id = rownames(data)
129
+ data$content = data$Task
130
+ data$start = data$Start
131
+ data$end = data$End
132
+ data$Project = data$Workflow
133
+
134
+ tasks = data
135
+
136
+ #theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") {
137
+ theme_gantt <- function(base_size=11, base_family="Sans Serif") {
138
+ ret <- theme_bw(base_size, base_family) %+replace%
139
+ theme(panel.background = element_rect(fill="#ffffff", colour=NA),
140
+ axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5),
141
+ title=element_text(vjust=1.2, family="Source Sans Pro Semibold"),
142
+ panel.border = element_blank(), axis.line=element_blank(),
143
+ panel.grid.minor=element_blank(),
144
+ panel.grid.major.y = element_blank(),
145
+ panel.grid.major.x = element_line(size=0.5, colour="grey80"),
146
+ axis.ticks=element_blank(),
147
+ legend.position="bottom",
148
+ axis.title=element_text(size=rel(1.2), family="Source Sans Pro Semibold"),
149
+ strip.text=element_text(size=rel(1.5), family="Source Sans Pro Semibold"),
150
+ strip.background=element_rect(fill="#ffffff", colour=NA),
151
+ panel.spacing.y=unit(1.5, "lines"),
152
+ legend.key = element_blank())
153
+
154
+ ret
155
+ }
156
+
157
+ tasks.long <- tasks %>%
158
+ gather(date.type, task.date, -c(Project, Task, id, Start.second, End.second)) %>%
159
+ arrange(date.type, task.date) %>%
160
+ mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE))
161
+
162
+ x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3)
163
+
164
+ timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) +
165
+ geom_segment() +
166
+ geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") +
167
+ guides(colour=guide_legend(title=NULL)) +
168
+ labs(x=NULL, y=NULL) +
169
+ theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1))
170
+
171
+ rbbt.png_plot('#{plot}', 'plot(timeline)', width=#{width}, height=#{height}, pointsize=6)
172
+ EOF
173
+ end
174
+
175
+ if options[:plot_data]
176
+ data
177
+ else
178
+ stats
179
+ end
180
+
181
+ end
182
+ end
@@ -3,7 +3,6 @@
3
3
  require 'rbbt-util'
4
4
  require 'rbbt/util/simpleopt'
5
5
  require 'rbbt/workflow'
6
- require 'rbbt/workflow/remote/ssh/get'
7
6
 
8
7
  $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
9
8
 
@@ -14,8 +13,7 @@ Remove a job and its dependencies
14
13
  $ rbbt purge [options] <job_path>
15
14
 
16
15
  -h--help Print this help
17
- -t--test Do a verbose dry run
18
- -r--relocate Include relocated dependencies
16
+ -r--recursive Remove recursively
19
17
 
20
18
  EOF
21
19
  if options[:help]
@@ -31,4 +29,4 @@ path, search_path, _sep, *other = ARGV
31
29
 
32
30
  raise ParameterException, "No path given" if path.nil?
33
31
 
34
- Step.purge(path, options[:relocate])
32
+ Step.purge(path, options[:recursive])
@@ -201,7 +201,7 @@ workflows.sort.each do |workflow,tasks|
201
201
  Step::INFO_SERIALIZER.load(f)
202
202
  end
203
203
  rescue
204
- Log.exception $!
204
+ #Log.exception $!
205
205
  {:status => :noinfo}
206
206
  end
207
207
  IndiferentHash.setup(info)
@@ -392,6 +392,20 @@ class TestTSVParallelThrough < Test::Unit::TestCase
392
392
  assert_equal size, stream.read.split("\n").length
393
393
  end
394
394
 
395
+ def test_traverse_into_path
396
+ size = 100
397
+ array = (1..size).to_a.collect{|n| n.to_s}
398
+ TmpFile.with_file do |tmpfile|
399
+ Path.setup(tmpfile)
400
+ io = TSV.traverse array, :into => tmpfile do |e|
401
+ e
402
+ end
403
+ io.join
404
+ assert_equal size, Open.read(tmpfile).split("\n").length
405
+ end
406
+ end
407
+
408
+
395
409
  def test_traverse_progress
396
410
  size = 1000
397
411
  array = (1..size).to_a.collect{|n| n.to_s}
@@ -117,6 +117,26 @@ row2 A B C
117
117
  end
118
118
  end
119
119
 
120
+ def test_slice_empty
121
+ content =<<-EOF
122
+ #ID ValueA ValueB Comment
123
+ row1 a b c
124
+ row2 A B C
125
+ EOF
126
+
127
+ TmpFile.with_file(content) do |filename|
128
+ tsv = TSV.open(File.open(filename), :type => :list, :sep => /\s/)
129
+ tsv = tsv.slice []
130
+ assert tsv.fields.empty?
131
+ TmpFile.with_file do |tmpfile|
132
+ iii tsv.to_s
133
+ Open.write(tmpfile, tsv.to_s)
134
+ tsv = TSV.open tmpfile
135
+ assert tsv.fields.empty?
136
+ end
137
+ end
138
+ end
139
+
120
140
  def test_select
121
141
  content =<<-EOF
122
142
  #Id ValueA ValueB OtherID
File without changes
@@ -0,0 +1,136 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/workflow/util/orchestrator'
3
+ require 'rbbt/workflow/util/trace'
4
+ require 'rbbt-util'
5
+ require 'rbbt/workflow'
6
+
7
+ module TestWF
8
+ extend Workflow
9
+
10
+ MULT = 0.1
11
+ task :a => :text do
12
+ sleep(TestWF::MULT * (rand(10) + 2))
13
+ end
14
+
15
+ dep :a
16
+ task :b => :text do
17
+ sleep(TestWF::MULT * (rand(10) + 2))
18
+ end
19
+
20
+ dep :b
21
+ task :c => :text do
22
+ sleep(TestWF::MULT * (rand(10) + 2))
23
+ end
24
+
25
+ dep :c
26
+ task :d => :text do
27
+ sleep(TestWF::MULT * (rand(10) + 2))
28
+ end
29
+ end
30
+
31
+ class TestClass < Test::Unit::TestCase
32
+ def _test_orchestrate
33
+
34
+ jobs =[]
35
+
36
+ num = 10
37
+ num.times do |i|
38
+ jobs.concat %w(test1 _test2).collect{|name| TestWF.job(:d, name + " #{i}") }
39
+ end
40
+ jobs.each do |j| j.recursive_clean end
41
+
42
+ rules = YAML.load <<-EOF
43
+ defaults:
44
+ log: 4
45
+ default_resources:
46
+ IO: 1
47
+ TestWF:
48
+ a:
49
+ resources:
50
+ cpus: 7
51
+ b:
52
+ resources:
53
+ cpus: 2
54
+ c:
55
+ resources:
56
+ cpus: 10
57
+ d:
58
+ resources:
59
+ cpus: 15
60
+ EOF
61
+
62
+ orchestrator = Workflow::Orchestrator.new(TestWF::MULT, "cpus" => 30, "IO" => 4, "size" => 10 )
63
+ Log.with_severity 0 do
64
+ orchestrator.process(rules, jobs)
65
+ end
66
+
67
+ data = Workflow.trace jobs, :plot_data => true
68
+ eend = data.column("End.second").values.collect{|v| v.to_f}.max
69
+ second_cpus = TSV.setup({}, "Second~CPUS#:type=:single#:cast=:to_f")
70
+ (0..eend.to_i).each do |second|
71
+ tasks = data.select("Start.second"){|s| s <= second}.select("End.second"){|s| s > second}
72
+ cpus = 0
73
+ tasks.through :key, ["Workflow", "Task"] do |k, values|
74
+ workflow, task = values
75
+ cpus += rules[workflow][task.to_s]["resources"]["cpus"]
76
+ end
77
+ second_cpus[second] = cpus
78
+ end
79
+
80
+ assert Misc.mean(second_cpus.values) > 15
81
+ assert Misc.mean(second_cpus.values) < 30
82
+ end
83
+
84
+ def test_orchestrate_size
85
+
86
+ jobs =[]
87
+
88
+ num = 10
89
+ num.times do |i|
90
+ jobs.concat %w(test1 _test2).collect{|name| TestWF.job(:d, name + " #{i}") }
91
+ end
92
+ jobs.each do |j| j.recursive_clean end
93
+
94
+ rules = YAML.load <<-EOF
95
+ defaults:
96
+ log: 4
97
+ default_resources:
98
+ IO: 1
99
+ TestWF:
100
+ a:
101
+ resources:
102
+ cpus: 7
103
+ b:
104
+ resources:
105
+ cpus: 2
106
+ c:
107
+ resources:
108
+ cpus: 10
109
+ d:
110
+ resources:
111
+ cpus: 15
112
+ EOF
113
+
114
+ orchestrator = Workflow::Orchestrator.new(TestWF::MULT, "cpus" => 30, "IO" => 4, "size" => 10 )
115
+ Log.with_severity 0 do
116
+ orchestrator.process(rules, jobs)
117
+ end
118
+
119
+ data = Workflow.trace jobs, :plot_data => true
120
+ eend = data.column("End.second").values.collect{|v| v.to_f}.max
121
+ second_cpus = TSV.setup({}, "Second~CPUS#:type=:single#:cast=:to_f")
122
+ (0..eend.to_i).each do |second|
123
+ tasks = data.select("Start.second"){|s| s <= second}.select("End.second"){|s| s > second}
124
+ cpus = 0
125
+ tasks.through :key, ["Workflow", "Task"] do |k, values|
126
+ workflow, task = values
127
+ cpus += rules[workflow][task.to_s]["resources"]["cpus"]
128
+ end
129
+ second_cpus[second] = cpus
130
+ end
131
+
132
+ assert Misc.mean(second_cpus.values) > 15
133
+ assert Misc.mean(second_cpus.values) < 30
134
+ end
135
+ end
136
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.28.6
4
+ version: 5.28.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-15 00:00:00.000000000 Z
11
+ date: 2020-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -327,7 +327,6 @@ files:
327
327
  - lib/rbbt/workflow/remote_workflow/remote_step.rb
328
328
  - lib/rbbt/workflow/remote_workflow/remote_step/rest.rb
329
329
  - lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb
330
- - lib/rbbt/workflow/schedule.rb
331
330
  - lib/rbbt/workflow/soap.rb
332
331
  - lib/rbbt/workflow/step.rb
333
332
  - lib/rbbt/workflow/step/accessor.rb
@@ -337,7 +336,9 @@ files:
337
336
  - lib/rbbt/workflow/task.rb
338
337
  - lib/rbbt/workflow/usage.rb
339
338
  - lib/rbbt/workflow/util/archive.rb
339
+ - lib/rbbt/workflow/util/orchestrator.rb
340
340
  - lib/rbbt/workflow/util/provenance.rb
341
+ - lib/rbbt/workflow/util/trace.rb
341
342
  - share/Rlib/plot.R
342
343
  - share/Rlib/svg.R
343
344
  - share/Rlib/util.R
@@ -516,8 +517,10 @@ files:
516
517
  - test/rbbt/workflow/step/test_dependencies.rb
517
518
  - test/rbbt/workflow/test_doc.rb
518
519
  - test/rbbt/workflow/test_remote_workflow.rb
520
+ - test/rbbt/workflow/test_schedule.rb
519
521
  - test/rbbt/workflow/test_step.rb
520
522
  - test/rbbt/workflow/test_task.rb
523
+ - test/rbbt/workflow/util/test_orchestrator.rb
521
524
  - test/test_helper.rb
522
525
  homepage: http://github.com/mikisvaz/rbbt-util
523
526
  licenses:
@@ -545,7 +548,9 @@ summary: Utilities for the Ruby Bioinformatics Toolkit (rbbt)
545
548
  test_files:
546
549
  - test/rbbt/test_entity.rb
547
550
  - test/rbbt/workflow/test_remote_workflow.rb
551
+ - test/rbbt/workflow/util/test_orchestrator.rb
548
552
  - test/rbbt/workflow/test_doc.rb
553
+ - test/rbbt/workflow/test_schedule.rb
549
554
  - test/rbbt/workflow/test_step.rb
550
555
  - test/rbbt/workflow/remote/test_client.rb
551
556
  - test/rbbt/workflow/step/test_dependencies.rb
@@ -1,238 +0,0 @@
1
- class Step
2
- class ResourceManager
3
- class NotEnoughResources < Exception
4
- end
5
-
6
- attr_accessor :cpus, :memory
7
- def initialize(cpus = nil, memory = nil)
8
- @cpus = cpus
9
- @memory = memory
10
- @sem_file = "ResourceManager-" + rand(10000).to_s
11
- @semaphore = RbbtSemaphore.create_semaphore(@sem_file, 1)
12
- end
13
-
14
- def allocate(cpus = nil, memory = nil, &block)
15
- RbbtSemaphore.synchronize(@semaphore) do
16
- if (@cpus && cpus && @cpus < cups) ||
17
- (@memory && memory && @memory < memory)
18
- raise NotEnoughResources
19
- end
20
- begin
21
- @cpus -= cpus
22
- @memory -= memory
23
- yield
24
- rescue
25
- @cpus += cpus
26
- @memory += memory
27
- end
28
- end
29
- end
30
-
31
- def finalize(manager)
32
- RbbtSemaphore.delete_semaphore(@sem_file)
33
- end
34
-
35
- def self.finalize(manager)
36
- proc { manager.finalize }
37
- end
38
- end
39
-
40
- class Scheduler
41
- attr_accessor :jobs, :cpus, :dep_jobs, :job_deps, :jobps
42
- def initialize(jobs, cpus)
43
- @jobs = jobs
44
- @cpus = cpus
45
-
46
- @job_deps = {}
47
-
48
- with_deps = jobs.dup
49
- @dep_jobs = {}
50
- @job_deps = {}
51
- @jobps = {}
52
- @missing = Set.new
53
- while with_deps.any?
54
- job = with_deps.pop
55
- @jobps[job.path] = job
56
- @missing << job.path unless job.done?
57
-
58
- jdeps = job.dependencies
59
- jdeps += job.inputs.flatten.select{|i| Step === i}
60
-
61
- jdeps.reject!{|dep| dep.done? }
62
- @job_deps[job.path] = []
63
- jdeps.each do |dep|
64
- #next if dep.done?
65
- @dep_jobs[dep.path] ||= []
66
- @job_deps[job.path] << dep.path
67
- @dep_jobs[dep.path] << job.path
68
- with_deps << dep unless @job_deps.include? dep.path
69
- end
70
- end
71
-
72
- def ready
73
- @job_deps.select do |jobp,deps|
74
- (@missing & deps).empty?
75
- end.collect{|jobp,deps| jobp}
76
- end
77
-
78
- def used
79
- iii @dep_jobs
80
- @dep_jobs.select do |dep,jobs|
81
- iif [dep, @missing.to_a, jobs]
82
- (@missing & jobs).empty?
83
- end.keys
84
- end
85
-
86
- def next
87
- priorities = {}
88
- @jobs.each do |job|
89
- priorities[job.path] = 1
90
- end
91
-
92
- @missing.each do |jobp|
93
- end
94
-
95
- ready.first
96
- end
97
- end
98
- end
99
-
100
- #def self._priorities(jobs)
101
- # job_level = {}
102
- # jobs.each do |job|
103
- # job_level[job.path] = 1.0
104
- # end
105
-
106
- # with_deps = jobs.dup
107
- # dep_jobs = {}
108
- # job_deps = {}
109
- # while with_deps.any?
110
- # job = with_deps.pop
111
- # level = job_level[job.path]
112
- # job_deps[job.path] = []
113
- # jdeps = job.dependencies
114
- # jdeps += job.inputs.flatten.select{|i| Step === i}
115
-
116
- # jdeps.reject!{|dep| dep.done? }
117
- # jdeps.each do |dep|
118
- # next if dep.done?
119
- # dep_jobs[dep.path] ||= []
120
- # job_level[dep.path] = level / (10 * jdeps.length) if job_level[dep.path].nil? || job_level[dep.path] < level / (10 * jdeps.length)
121
- # job_deps[job.path] << dep.path
122
- # dep_jobs[dep.path] << job.path
123
- # with_deps << dep unless job_deps.include? dep.path
124
- # end
125
- # end
126
- # [job_level, job_deps, dep_jobs]
127
- #end
128
-
129
- #def self.produce_jobs(jobs, cpus, step_cpus = {})
130
- # require 'fc'
131
-
132
- # step_cpus = IndiferentHash.setup(step_cpus || {})
133
-
134
- # deps = []
135
-
136
- # jobs = [jobs] unless Array === jobs
137
-
138
- # job_level, job_deps, dep_jobs = self._priorities(jobs)
139
-
140
- # jobps = {}
141
- # (jobs + jobs.collect{|job| job.rec_dependencies}).flatten.uniq.each do |job|
142
- # jobps[job.path] = job
143
- # end
144
-
145
- # prio_queue = FastContainers::PriorityQueue.new :max
146
-
147
- # job_deps.each do |jobp,depps|
148
- # next if depps.any?
149
- # level = job_level[jobp]
150
-
151
- # prio_queue.push(jobp, level)
152
- # end
153
-
154
- # queue = RbbtProcessQueue.new cpus
155
-
156
- # missing = job_deps.keys
157
- # queue.callback do |jobp|
158
- # Log.info "Done: #{jobp}"
159
- # missing -= [jobp]
160
-
161
- # job_level, job_deps, dep_jobs = self._priorities(jobs)
162
-
163
- # parentsp = dep_jobs[jobp]
164
-
165
- # parentsp.each do |parentp|
166
- # next unless job_deps[parentp].include? jobp
167
- # job_deps[parentp] -= [jobp]
168
- # if job_deps[parentp].empty?
169
- # level = job_level[parentp]
170
- # prio_queue.push(parentp, level )
171
- # end
172
- # end if parentsp
173
- # prio_queue_new = FastContainers::PriorityQueue.new :max
174
- # while prio_queue.any?
175
- # elem = prio_queue.pop
176
- # prio_queue_new.push(elem, job_level[elem])
177
- # end
178
- # prio_queue = prio_queue_new
179
- # end
180
- #
181
- # queue.init do |jobp|
182
- # Log.info "Processing: #{jobp}"
183
- # job = jobps[jobp]
184
- # job_cpus = step_cpus[job.task_name] || 1
185
- # sleep 0.5
186
- # #job.produce
187
- # jobp
188
- # end
189
-
190
- # while missing.any?
191
- # while prio_queue.empty? && missing.any?
192
- # sleep 1
193
- # end
194
- # break if missing.empty?
195
- # jobp = prio_queue.pop
196
- # queue.process jobp
197
- # end
198
-
199
- # queue.join
200
- #end
201
- end
202
-
203
-
204
- if __FILE__ == $0
205
- require 'rbbt/workflow'
206
-
207
- module TestWF
208
- extend Workflow
209
- input :num, :integer
210
- task :dep => :integer do |num|
211
- num
212
- end
213
- dep :dep, :num => 1
214
- dep :dep, :num => 2
215
- dep :dep, :num => 3
216
- task :test do
217
- dependencies.collect{|d| d.load.to_s} * ","
218
- end
219
- end
220
- Log.severity = 0
221
- job = TestWF.job(:test)
222
- job.recursive_clean
223
-
224
- Rbbt::Config.load_file Rbbt.etc.config_profile.HTS.find
225
- Workflow.require_workflow "Sample"
226
- Workflow.require_workflow "HTS"
227
-
228
- jobs = []
229
- # jobs << Sample.job(:mutect2, "QUINTANA-15")
230
- # jobs << Sample.job(:mutect2, "QUINTANA-25")
231
- # jobs << Sample.job(:mutect2, "QUINTANA-22")
232
- jobs << Sample.job(:mutect2, "small")
233
-
234
- sched = Step::Scheduler.new(jobs, 3)
235
- iii sched.ready
236
- iii sched.used
237
- iii sched.next
238
- end