rbbt-util 5.32.24 → 5.32.28

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt_find.rb +74 -0
  3. data/lib/rbbt/annotations/annotated_array.rb +4 -0
  4. data/lib/rbbt/annotations/util.rb +29 -0
  5. data/lib/rbbt/entity.rb +3 -1
  6. data/lib/rbbt/hpc/orchestrate/batches.rb +140 -0
  7. data/lib/rbbt/hpc/orchestrate/chains.rb +173 -0
  8. data/lib/rbbt/hpc/orchestrate/rules.rb +70 -0
  9. data/lib/rbbt/hpc/orchestrate.old.rb +220 -0
  10. data/lib/rbbt/hpc/orchestrate.rb +24 -200
  11. data/lib/rbbt/hpc/slurm.rb +1 -0
  12. data/lib/rbbt/persist/tsv.rb +1 -1
  13. data/lib/rbbt/tsv/excel.rb +16 -8
  14. data/lib/rbbt/util/log.rb +6 -2
  15. data/lib/rbbt/util/migrate.rb +6 -1
  16. data/lib/rbbt/util/misc/inspect.rb +4 -1
  17. data/lib/rbbt/util/misc.rb +5 -0
  18. data/lib/rbbt/util/python.rb +1 -1
  19. data/lib/rbbt/workflow/definition.rb +1 -1
  20. data/lib/rbbt/workflow/examples.rb +0 -65
  21. data/lib/rbbt/workflow/integration/nextflow.rb +74 -14
  22. data/lib/rbbt/workflow/step/accessor.rb +0 -70
  23. data/lib/rbbt/workflow/step/dependencies.rb +8 -2
  24. data/lib/rbbt/workflow/step/run.rb +1 -1
  25. data/lib/rbbt/workflow/step/save_load_inputs.rb +162 -0
  26. data/lib/rbbt/workflow/step.rb +2 -1
  27. data/lib/rbbt/workflow/task.rb +2 -2
  28. data/lib/rbbt/workflow.rb +9 -2
  29. data/share/rbbt_commands/hpc/tail +0 -13
  30. data/share/rbbt_commands/lsf/tail +0 -13
  31. data/share/rbbt_commands/slurm/tail +0 -13
  32. data/share/rbbt_commands/tsv/keys +14 -15
  33. data/share/rbbt_commands/tsv/read_excel +2 -2
  34. data/share/rbbt_commands/workflow/task +11 -5
  35. data/test/rbbt/annotations/test_util.rb +11 -0
  36. data/test/rbbt/hpc/orchestrate/test_batches.rb +113 -0
  37. data/test/rbbt/hpc/orchestrate/test_chains.rb +139 -0
  38. data/test/rbbt/hpc/orchestrate/test_rules.rb +92 -0
  39. data/test/rbbt/hpc/test_orchestrate.rb +144 -0
  40. data/test/rbbt/tsv/test_excel.rb +38 -4
  41. data/test/rbbt/util/test_misc.rb +4 -0
  42. data/test/rbbt/workflow/step/test_dependencies.rb +14 -13
  43. data/test/rbbt/workflow/step/test_save_load_inputs.rb +46 -0
  44. metadata +19 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7a7eebab331a8908a290e4a3ad8db2ee3dc3c3dec07b93ec1c74f504f9f7ea29
4
- data.tar.gz: 0f9a2ce6019676118ab439dea57f16605555d9ef850c35998704b9215c477ffb
3
+ metadata.gz: a680ce3f6d91a1b0ff8060ee18b8e68efbd43df56f238869b30f6f88617d23d9
4
+ data.tar.gz: d6c8c724f15d4ce4c910ff9db6becbd0587c8dc04598abcc934884a8368dd93e
5
5
  SHA512:
6
- metadata.gz: 2d81532c2babaae631ca98784e7b7e36039fed68544b6bce9fc8fc2d74a155ffd98fe0752477c7ad245aaa0c65e82c47f330f65c5d70a7f5f20a77de4bc4dbd6
7
- data.tar.gz: 2914786ec23349cabccbb6e8a1c561336b60dd5d862e89c3a95802f040c66f8107d5a2135455f00d8bc7b9e06e390fd6462a861419b78ae25bf3a58c14a33350
6
+ metadata.gz: 9b90c640f51de96d469af4b442d9ba66aa89a592c7776196058da3b20e13b07ef50f67e07018435b12bed13492c3bf9d9abf4de565902094a3d58be4ab628fd5
7
+ data.tar.gz: 523d7234c59bc1781d47827daebb07870913b8dec9851110d5c2e0c92441903601965f9a7bb4b494b0271d64e09ad0f3b80b254176ee3495744c2bbf1f34f714
data/bin/rbbt_find.rb ADDED
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ $0 = "rbbt #{$previous_commands*" "} #{ File.basename(__FILE__) }" if $previous_commands
7
+
8
+ options = SOPT.setup <<EOF
9
+
10
+ Find a path
11
+
12
+ $ #{$0} [options] [<subpath>] <path>
13
+
14
+ Use - to read from STDIN
15
+
16
+ -h--help Print this help
17
+ -w--workflows Workflow to load
18
+ -s--search_path* Workflow to load
19
+ -l--list List contents of resolved directories
20
+ -n--nocolor Don't color output
21
+ EOF
22
+ if options[:help]
23
+ if defined? rbbt_usage
24
+ rbbt_usage
25
+ else
26
+ puts SOPT.doc
27
+ end
28
+ exit 0
29
+ end
30
+
31
+ subpath, path = ARGV
32
+ path, subpath = subpath, nil if path.nil?
33
+
34
+ begin
35
+ require 'rbbt/workflow'
36
+ workflow = Workflow.require_workflow subpath
37
+ subpath = workflow.libdir
38
+ rescue
39
+ Log.exception $!
40
+ end if subpath && subpath =~ /^[A-Z][a-zA-Z]+$/
41
+
42
+ path = subpath ? Path.setup(subpath)[path] : Path.setup(path)
43
+
44
+ search_path = options[:search_path].to_sym if options.include? :search_path
45
+ nocolor = options[:nocolor]
46
+
47
+ found = if search_path
48
+ [path.find(search_path)]
49
+ else
50
+ path.find_all
51
+ end
52
+
53
+ found.each do |path|
54
+ if options[:list] && File.directory?(path)
55
+ puts Log.color :blue, path
56
+ path.glob("*").each do |subpath|
57
+ if nocolor
58
+ puts subpath
59
+ else
60
+ color = File.directory?(subpath) ? :blue : nil
61
+ puts " " << Log.color(color, subpath)
62
+ end
63
+ end
64
+ else
65
+ if nocolor
66
+ puts path
67
+ else
68
+ color = File.exists?(path) ? (File.directory?(path) ? :blue : nil) : :red
69
+ puts Log.color color, path
70
+ end
71
+
72
+ end
73
+ end
74
+
@@ -94,6 +94,10 @@ module AnnotatedArray
94
94
  end
95
95
  end
96
96
 
97
+ def +(other)
98
+ self.annotate super(other)
99
+ end
100
+
97
101
 
98
102
  def reject
99
103
  res = []
@@ -220,6 +220,35 @@ module Annotated
220
220
  def marshal_dump
221
221
  Annotated.purge(self).to_sym.to_s
222
222
  end
223
+
224
+ def self.to_hash(e)
225
+ hash = {}
226
+ if Array === e && AnntatedArray === e
227
+ hash[:literal] = Annotated.purge(e)
228
+ hash[:info] = e.info
229
+ elsif Array === e
230
+ hash = e.collect do |_e|
231
+ _hash = {}
232
+ _hash[:literal] = _e
233
+ _hash[:info] = _e.info
234
+ _hash
235
+ end
236
+ else
237
+ hash[:literal] = e
238
+ hash[:info] = e.info
239
+ end
240
+ hash
241
+ end
242
+
243
+ def self.load_hash(hash)
244
+ literal = hash[:literal]
245
+ info = hash[:info]
246
+ info[:annotation_types].each do |type|
247
+ type = Kernel.const_get(type) if String === type
248
+ type.setup(literal, info)
249
+ end
250
+ literal
251
+ end
223
252
  end
224
253
 
225
254
  class String
data/lib/rbbt/entity.rb CHANGED
@@ -164,7 +164,9 @@ module Entity
164
164
  define_method single_name, &block
165
165
  define_method name do |*args|
166
166
  if Array === self
167
- self.collect{|e| e.send(single_name, *args)}
167
+ res = self.collect{|e| e.send(single_name, *args)}
168
+ res.first.annotate(res) if Annotated === res.first && type == :single2array
169
+ res
168
170
  else
169
171
  self.send(single_name, *args)
170
172
  end
@@ -0,0 +1,140 @@
1
+ require 'rbbt/hpc/orchestrate/rules'
2
+ require 'rbbt/hpc/orchestrate/chains'
3
+
4
+ module HPC
5
+ module Orchestration
6
+
7
+ def self.pb(batch)
8
+ if Array === batch
9
+ iii :BATCHES
10
+ batch.each{|b| pb b}
11
+ iii :END_BATCHES
12
+ else
13
+ n = batch.dup
14
+ n[:deps] = n[:deps].collect{|b| b[:top_level] }
15
+ iif n
16
+ end
17
+ end
18
+
19
+ def self.job_workload(job)
20
+ workload = []
21
+ heap = []
22
+ heap << job
23
+ while job = heap.pop
24
+ next if job.done?
25
+ workload << job
26
+ heap.concat job_dependencies(job)
27
+ heap.uniq!
28
+ end
29
+ workload.uniq
30
+ end
31
+
32
+
33
+ def self.chain_batches(rules, chains, workload)
34
+ chain_rules = parse_chains(rules)
35
+
36
+ batches = []
37
+ while job = workload.pop
38
+ matches = chains.select{|name,info| info[:jobs].include? job }
39
+ if matches.any?
40
+ name, info = matches.sort_by do |name,info|
41
+ num_jobs = info[:jobs].length
42
+ total_tasks = chain_rules[name][:tasks].values.flatten.uniq.length
43
+ num_jobs.to_f + 1/total_tasks
44
+ end.last
45
+ workload = workload - info[:jobs]
46
+ info[:chain] = name
47
+ batch = info
48
+ else
49
+ batch = {:jobs => [job], :top_level => job}
50
+ end
51
+
52
+ chains.delete_if{|name,info| batch[:jobs].include? info[:top_level] }
53
+
54
+ chains.each do |name,info|
55
+ info[:jobs] = info[:jobs] - batch[:jobs]
56
+ end
57
+
58
+ chains.delete_if{|name,info| info[:jobs].length < 2 }
59
+
60
+ batches << batch
61
+ end
62
+
63
+ batches
64
+ end
65
+
66
+ def self.add_batch_deps(batches)
67
+
68
+ batches.each do |batch|
69
+ jobs = batch[:jobs]
70
+ all_deps = jobs.collect{|d| job_dependencies(d) }.flatten.uniq
71
+ deps = all_deps.collect do |d|
72
+ (batches - [batch]).select{|batch| batch[:jobs].include? d }
73
+ end.flatten.uniq
74
+ batch[:deps] = deps
75
+ end
76
+
77
+ batches
78
+ end
79
+
80
+ def self.add_rules_and_consolidate(rules, batches)
81
+ chain_rules = parse_chains(rules)
82
+
83
+ batches.each do |batch|
84
+ job_rules = batch[:jobs].inject(nil) do |acc,job|
85
+ task_rules = task_specific_rules(rules, job.workflow, job.task_name)
86
+ acc = accumulate_rules(acc, task_rules.dup)
87
+ end
88
+
89
+ if chain = batch[:chain]
90
+ batch[:rules] = merge_rules(chain_rules[chain][:rules].dup, job_rules)
91
+ else
92
+ batch[:rules] = job_rules
93
+ end
94
+ end
95
+
96
+ begin
97
+ batches.each do |batch|
98
+ batch[:deps] = batch[:deps].collect do |dep|
99
+ dep[:target] || dep
100
+ end if batch[:deps]
101
+ end
102
+
103
+ batches.each do |batch|
104
+ next unless batch[:rules][:skip]
105
+ batch[:rules].delete :skip
106
+ next if batch[:deps].nil?
107
+
108
+ if batch[:deps].any?
109
+ target = batch[:deps].select do |target|
110
+ (batch[:deps] - [target] - target[:deps]).empty?
111
+ end.first
112
+ next if target.nil?
113
+ target[:jobs] = batch[:jobs] + target[:jobs]
114
+ target[:deps] = (target[:deps] + batch[:deps]).uniq - [target]
115
+ target[:top_level] = batch[:top_level]
116
+ target[:rules] = accumulate_rules(target[:rules], batch[:rules])
117
+ batch[:target] = target
118
+ end
119
+ raise TryAgain
120
+ end
121
+ rescue TryAgain
122
+ retry
123
+ end
124
+
125
+ batches.delete_if{|b| b[:target] }
126
+
127
+ batches
128
+ end
129
+
130
+ def self.job_batches(rules, job)
131
+ job_chains = self.job_chains(rules, job)
132
+
133
+ workload = job_workload(job)
134
+
135
+ batches = chain_batches(rules, job_chains, workload)
136
+ batches = add_batch_deps(batches)
137
+ batches = add_rules_and_consolidate(rules, batches)
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,173 @@
1
+ module HPC
2
+ module Orchestration
3
+ def self.check_chains(chains, job)
4
+ matches = []
5
+ chains.each do |name, chain|
6
+ next unless chain[:tasks].include?(job.workflow.to_s)
7
+ next unless chain[:tasks][job.workflow.to_s].include?(job.task_name.to_s)
8
+ matches << name
9
+ end
10
+ matches
11
+ end
12
+
13
+ def self.parse_chains(rules)
14
+ return {} if rules["chains"].nil?
15
+
16
+ chains = IndiferentHash.setup({})
17
+ rules["chains"].each do |name,rules|
18
+ rules = IndiferentHash.setup(rules.dup)
19
+ chain_tasks = rules.delete(:tasks).split(/,\s*/)
20
+ workflow = rules.delete(:workflow)
21
+
22
+ chain_tasks.each do |task|
23
+ chain_workflow, chain_task = task.split("#")
24
+ chain_task, chain_workflow = chain_workflow, workflow if chain_task.nil? or chain_tasks.empty?
25
+
26
+ chains[name] ||= IndiferentHash.setup({:tasks => {}, :rules => rules })
27
+ chains[name][:tasks][chain_workflow] ||= []
28
+ chains[name][:tasks][chain_workflow] << chain_task
29
+ end
30
+ end
31
+
32
+ chains
33
+ end
34
+
35
+ def self.job_dependencies(job)
36
+ (job.dependencies + job.input_dependencies).uniq.select{|d| ! d.done? || d.dirty? }
37
+ end
38
+
39
+ #def self.job_workload(job)
40
+ # workload = []
41
+ # heap = []
42
+ # heap << job
43
+ # while job = heap.pop
44
+ # next if job.done?
45
+ # workload << job
46
+ # heap.concat job_dependencies(job)
47
+ # heap.uniq!
48
+ # end
49
+ # workload.uniq
50
+ #end
51
+
52
+ #def self.top_level_job(jobs)
53
+ # top = jobs.select do |job|
54
+ # (jobs - job_workload(job)).empty? &&
55
+ # (job_workload(job) - jobs).select{|j| (job_workload(j) & jobs).any? }.empty?
56
+ # end
57
+ # return nil if top.length != 1
58
+ # top.first
59
+ #end
60
+
61
+ #def self.job_chains(rules, job)
62
+ # workload = job_workload(job)
63
+ # chains = parse_chains(rules)
64
+
65
+ # chain_jobs = {}
66
+ # workload.each do |job|
67
+ # check_chains(chains, job).each do |match|
68
+ # chain_jobs[match] ||= []
69
+ # chain_jobs[match] << job
70
+ # end
71
+ # end
72
+
73
+ # job_chains = []
74
+
75
+ # seen = []
76
+ # chain_jobs.sort_by{|name,jobs| jobs.length }.reverse.each do |name,jobs|
77
+ # remain = jobs - seen
78
+ # next unless remain.length > 1
79
+ # top_level_job = top_level_job(jobs)
80
+ # next if top_level_job.nil?
81
+ # job_chains << {:jobs => remain, :rules => chains[name][:rules], :top_level_job => top_level_job}
82
+ # seen.concat remain
83
+ # end
84
+
85
+ # job_chains
86
+ #end
87
+
88
+ #def self._job_chains(rules, job)
89
+ # workload = job_workload(job)
90
+ # chains = parse_chains(rules)
91
+
92
+ # matches = check_chains(chains, job)
93
+
94
+ # job_chains = {}
95
+ # job.dependencies.each do |dep|
96
+ # dep_chains = _job_chains(rules, dep)
97
+ # matches.each do |match|
98
+ # if dep_chains[match] && dep_chains[match].include?(dep)
99
+ # dep_chains[match].prepend job
100
+ # end
101
+ # end
102
+ # job_chains.merge!(dep_chains)
103
+ # end
104
+
105
+ # matches.each do |match|
106
+ # job_chains[match] ||= [job]
107
+ # end
108
+
109
+ # job_chains
110
+ #end
111
+
112
+ #def self.job_chains(rules, job)
113
+ # job_chains = self._job_chains(rules, job)
114
+ # iif job_chains
115
+ # chains = parse_chains(rules)
116
+
117
+ # seen = []
118
+ # job_chains.collect do |name,jobs|
119
+ # remain = jobs - seen
120
+ # next unless remain.length > 1
121
+ # top_level_job = top_level_job(jobs)
122
+ # next if top_level_job.nil?
123
+ # seen.concat remain
124
+ # {:jobs => remain, :rules => chains[name][:rules], :top_level_job => top_level_job}
125
+ # end.compact
126
+ #end
127
+
128
+ def self.job_chains(rules, job)
129
+ chains = self.parse_chains(rules)
130
+
131
+ matches = check_chains(chains, job)
132
+
133
+ dependencies = job_dependencies(job)
134
+
135
+ job_chains = []
136
+ new_job_chains = {}
137
+ dependencies.each do |dep|
138
+ dep_matches = check_chains(chains, dep)
139
+ common = matches & dep_matches
140
+
141
+ dep_chains = job_chains(rules, dep)
142
+ found = []
143
+ dep_chains.each do |match,info|
144
+ if common.include?(match)
145
+ found << match
146
+ new_info = new_job_chains[match] ||= {}
147
+ new_info[:jobs] ||= []
148
+ new_info[:jobs].concat info[:jobs]
149
+ new_info[:top_level] = job
150
+ else
151
+ job_chains << [match, info]
152
+ end
153
+ end
154
+
155
+ (common - found).each do |match|
156
+ info = {}
157
+ info[:jobs] = [job, dep]
158
+ info[:top_level] = job
159
+ job_chains << [match, info]
160
+ end
161
+ end
162
+
163
+ new_job_chains.each do |match,info|
164
+ info[:jobs].prepend job
165
+ job_chains << [match, info]
166
+ end
167
+
168
+ job_chains
169
+ end
170
+
171
+ end
172
+ end
173
+
@@ -0,0 +1,70 @@
1
+ module HPC
2
+ module Orchestration
3
+ def self.add_config_keys(current, new)
4
+ if current.nil?
5
+ new
6
+ else
7
+ new + ',' + current
8
+ end.gsub(/,\s*/,',').split(",").reverse.uniq.reverse * ","
9
+ end
10
+
11
+ def self.workflow_rules(rules, workflow)
12
+ return {} if rules[workflow].nil?
13
+ return {} if rules[workflow]["defaults"].nil?
14
+ IndiferentHash.setup(rules[workflow]["defaults"])
15
+ end
16
+
17
+ def self.merge_rules(current, new)
18
+ return IndiferentHash.setup({}) if (new.nil? || new.empty?) && (current.nil? || current.empty?)
19
+ return IndiferentHash.setup(current.dup) if new.nil? || new.empty?
20
+ return IndiferentHash.setup(new.dup) if current.nil? || current.empty?
21
+ target = IndiferentHash.setup(current.dup)
22
+ new.each do |k,value|
23
+ case k.to_s
24
+ when "config_keys"
25
+ target[k] = add_config_keys target["config_keys"], value
26
+ else
27
+ next if target.include?(k)
28
+ target[k] = value
29
+ end
30
+ end
31
+ target
32
+ end
33
+
34
+ def self.accumulate_rules(current, new)
35
+ return IndiferentHash.setup({}) if (new.nil? || new.empty?) && (current.nil? || current.empty?)
36
+ return IndiferentHash.setup(current.dup) if new.nil? || new.empty?
37
+ return IndiferentHash.setup(new.dup) if current.nil? || current.empty?
38
+ target = IndiferentHash.setup(current.dup)
39
+ new.each do |k,value|
40
+ case k.to_s
41
+ when "config_keys"
42
+ target[k] = add_config_keys target["config_keys"], value
43
+ when "cpus"
44
+ target[k] = [target[k], value].compact.sort_by{|v| v.to_i}.last
45
+ when "time"
46
+ target[k] = Misc.format_seconds [target[k], value].compact.inject(0){|acc,t| acc += Misc.timespan t }
47
+ when "skip"
48
+ skip = target[k] && value
49
+ target.delete k unless skip
50
+ else
51
+ next if target.include?(k)
52
+ target[k] = value
53
+ end
54
+ end
55
+ target
56
+ end
57
+
58
+ def self.task_specific_rules(rules, workflow, task)
59
+ defaults = rules[:defaults] || {}
60
+ workflow = workflow.to_s
61
+ task = task.to_s
62
+ return defaults if rules[workflow].nil?
63
+ workflow_rules = merge_rules(workflow_rules(rules, workflow), defaults)
64
+ return IndiferentHash.setup(workflow_rules.dup) if rules[workflow][task].nil?
65
+ merge_rules(rules[workflow][task], workflow_rules)
66
+ end
67
+
68
+
69
+ end
70
+ end