rbbt-util 5.40.0 → 5.40.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '088ac190b36a176e420d4bb1d7b8d28d777133568e45530219f6a0212a1a1144'
4
- data.tar.gz: 47bdc31d247e5fd5d944f493690e8181bcc236cee74c61fefd5ce25711edf7da
3
+ metadata.gz: 0c086b1d2a4bfd64cc80729a4962b2ba04268b5745ae6c79b5d954fc5b15a7a9
4
+ data.tar.gz: 8d0ee0a0942be2cf07259baed09d2c2feb98a67b314adc70bd0ac1e03ecba4f9
5
5
  SHA512:
6
- metadata.gz: 9afce115c221695ee896c1e1f884be9a1ba0f0ae98789a485a906e438252afe8292c5422f3eef57d77089bf55cc0b425988ab9b479b1f422eaa0abb7005cfdfa
7
- data.tar.gz: 4cf4d1e86ce3ab256609aff4c66a3412ea1a9c0c87db08fd5cdad1b98c95147d452cc69d96009c9791cf698ea43903873ed0e4cebbc861bda85097864bed68e8
6
+ metadata.gz: 8ca4b7b1b59eb4546b9e1484fd397c687003389b406de77461da5aa3c773679a87533050c640c90feacc48f9bdadf1c565bcfe9727e8661b2440938ed3c69dbc
7
+ data.tar.gz: 613f30647a8d317984e12a5618fe8d03ab94c3d4ca1acc8ee38460c8f67869c13e8dd83f3d1c4332e813a34802af55d6346f87d898863095ff12368c9f715ba6
data/bin/rbbt CHANGED
@@ -75,7 +75,7 @@ end
75
75
  Log.ignore_stderr do
76
76
  begin
77
77
  require "nokogiri"
78
- rescue
78
+ rescue Exception
79
79
  end
80
80
  end
81
81
 
data/lib/rbbt/hpc/pbs.rb CHANGED
@@ -43,6 +43,8 @@ cd ${PBS_O_WORKDIR}
43
43
  system = Misc.process_options options, :partition
44
44
  filesystems = Misc.process_options options, :filesystems
45
45
 
46
+ filesystems = "home" if filesystems.nil?
47
+
46
48
  filesystems = filesystems * "," if Array === filesystems
47
49
 
48
50
  # NOT USED
@@ -1,6 +1,6 @@
1
1
  module Misc
2
2
 
3
- def self.add_libdir(dir=nil)
3
+ def self.add_libdir(dir=nil)
4
4
  dir ||= File.join(Path.caller_lib_dir(caller.first), 'lib')
5
5
  $LOAD_PATH.unshift(dir) unless $LOAD_PATH.include? dir
6
6
  end
@@ -0,0 +1,41 @@
1
+ require 'rbbt/workflow/step'
2
+
3
+ class Step
4
+ def python_file(file, options = {})
5
+ CMD.cmd_log(:python, file, options)
6
+ end
7
+
8
+ def python_block(options = {}, &block)
9
+ RbbtPython.run options, &block
10
+ end
11
+
12
+ def python(python = nil, options = {}, &block)
13
+ begin
14
+ RbbtPython.add_path self.workflow.libdir.python.find
15
+ rescue
16
+ Log.warn "Error loading libdir python for workflow '#{Misc.fingerprint self.workflow}'"
17
+ end
18
+ case python
19
+ when Path
20
+ python_file python.find, options
21
+ when String
22
+ if Open.exists?(python)
23
+ python_file python
24
+ else
25
+ TmpFile.with_file do |dir|
26
+ pkg = "pkg#{rand(100)}"
27
+ Open.write File.join(dir, "#{pkg}/__init__.py"), code
28
+
29
+ RbbtPython.add_path dir
30
+
31
+ Misc.in_dir dir do
32
+ yield pkg
33
+ end
34
+ end
35
+ end
36
+ else
37
+ python_block(python, &block)
38
+ end
39
+ end
40
+ end
41
+
@@ -49,5 +49,5 @@ module RbbtPython
49
49
  end
50
50
  hash
51
51
  end
52
-
53
52
  end
53
+
data/python/rbbt.py CHANGED
@@ -29,6 +29,7 @@ def tsv_preamble(line, comment_char="#"):
29
29
  entries = re.sub(f"^{comment_char}:", '', line)
30
30
  entries = re.sub(f"^{comment_char}:", '', line).split("#")
31
31
  for entry in entries:
32
+ entry = entry.strip()
32
33
  key, value = entry.split("=")
33
34
  key = re.sub("^:","",key)
34
35
  value = re.sub("^:","",value)
@@ -75,7 +76,13 @@ def tsv_pandas(filename, sep="\t", comment_char="#", index_col=0, **kwargs):
75
76
  header = tsv_header(filename, sep=sep, comment_char="#")
76
77
 
77
78
  if ("type" in header and header["type"] == "flat"):
78
- return None
79
+ if ("sep" in header):
80
+ sep=header["sep"]
81
+
82
+ tsv = pandas.read_table(filename, sep=sep, index_col=index_col, header=None, skiprows=[0,1], **kwargs)
83
+
84
+ if ("key_field" in header):
85
+ tsv.index.name = header["key_field"]
79
86
  else:
80
87
  if ("sep" in header):
81
88
  sep=header["sep"]
@@ -0,0 +1,212 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+ require 'rbbt/hpc'
6
+
7
+ #$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
8
+
9
+ options = SOPT.setup <<EOF
10
+
11
+ Clean error or aborted jobs
12
+
13
+ $ rbbt slurm clean [options]
14
+
15
+ -h--help Print this help
16
+ -d--done Done jobs only
17
+ -e--error Error jobs only
18
+ -a--aborted SLURM aboted jobs
19
+ -q--queued Queued jobs only
20
+ -j--job* Job ids
21
+ -s--search* Regular expression
22
+ -t--tail* Show the last lines of the STDERR
23
+ -BP--batch_parameters show batch parameters
24
+ -dr--dry_run Do not erase anything
25
+ EOF
26
+
27
+ if options[:help]
28
+ if defined? rbbt_usage
29
+ rbbt_usage
30
+ else
31
+ puts SOPT.doc
32
+ end
33
+ exit 0
34
+ end
35
+
36
+ batch_system = options.delete :batch_system
37
+ batch_system ||= 'auto'
38
+
39
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
40
+
41
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
42
+
43
+ Log.severity = 4
44
+ done, error, aborted, queued, jobid, search, tail, batch_parameters, dry_run = options.values_at :done, :error, :aborted, :queued, :job, :search, :tail, :batch_parameters, :dry_run
45
+
46
+ workdir = File.expand_path('~/rbbt-batch')
47
+ Path.setup(workdir)
48
+
49
+ running_jobs = begin
50
+ squeue_txt = HPC::BATCH_MODULE.job_status
51
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
52
+ rescue
53
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
54
+ squeue_txt = nil
55
+ $norunningjobs = true
56
+ []
57
+ end
58
+
59
+ if squeue_txt
60
+ job_nodes = {}
61
+ squeue_txt.split("\n").each do |line|
62
+ parts = line.strip.split(/\s+/)
63
+ job_nodes[parts.first] = parts.last.split(",")
64
+ end
65
+ else
66
+ job_nodes = nil
67
+ end
68
+
69
+ count = 0
70
+ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
71
+ dir = File.dirname(fcmd)
72
+ command_txt = Open.read(fcmd)
73
+
74
+ if m = command_txt.match(/#CMD: (.*)/)
75
+ cmd = m[1]
76
+ else
77
+ cmd = nil
78
+ end
79
+
80
+ if m = command_txt.match(/# Run command\n(.*?)\n/im)
81
+ exe = m[1]
82
+ else
83
+ exe = nil
84
+ end
85
+
86
+ if m = command_txt.match(/^CONTAINER_DIR=(.*)/)
87
+ container_home = m[1]
88
+ else
89
+ container_home = nil
90
+ end
91
+
92
+ if m = command_txt.match(/^BATCH_SYSTEM=(.*)/)
93
+ job_batch_system = m[1].downcase
94
+ else
95
+ job_batch_system = nil
96
+ end
97
+
98
+ different_system = job_batch_system != batch_system
99
+
100
+ if File.exist?(fid = File.join(dir, 'job.id'))
101
+ id = Open.read(fid).chomp
102
+ else
103
+ id = nil
104
+ end
105
+
106
+ if File.exist?(fstatus = File.join(dir, 'exit.status'))
107
+ exit_status = Open.read(fstatus).to_i
108
+ else
109
+ exit_status = nil
110
+ end
111
+
112
+ if File.exist?(fstatus = File.join(dir, 'job.status'))
113
+ fstatus_txt = Open.read(fstatus)
114
+ begin
115
+ if job_batch_system == "lsf"
116
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/)[5].split(",")
117
+ else
118
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
119
+ end
120
+ rescue
121
+ nodes = []
122
+ end
123
+ elsif job_nodes[id]
124
+ nodes = job_nodes[id]
125
+ else
126
+ nodes = []
127
+ end
128
+
129
+ if File.exist?(File.join(dir, 'std.out'))
130
+ outt = File.mtime File.join(dir, 'std.out')
131
+ errt = File.mtime File.join(dir, 'std.err')
132
+ time_diff = Time.now - [outt, errt].max
133
+ end
134
+
135
+ fdep = File.join(dir, 'dependencies.list')
136
+ deps = Open.read(fdep).split("\n") if File.exist?(fdep)
137
+
138
+ fcadep = File.join(dir, 'canfail_dependencies.list')
139
+ cadeps = Open.read(fcadep).split("\n") if File.exist?(fcadep)
140
+
141
+ aborted = error = true if ! done && aborted.nil? && error.nil?
142
+ #if done || error || aborted || running || queued || jobid || search
143
+ # select = false
144
+ # select = true if done && exit_status && exit_status.to_i == 0
145
+ # select = true if error && exit_status && exit_status.to_i != 0
146
+ # select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
147
+ # select = select && jobid.split(",").include?(id) if jobid
148
+ # select = select && cmd.match(/#{search}/) if search
149
+ # next unless select
150
+ #end
151
+
152
+ if done || error || aborted || queued || jobid
153
+ select = false
154
+ select = true if done && exit_status == 0
155
+ select = true if error && exit_status && exit_status != 0
156
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
157
+ is_running = exit_status.nil? && ( (running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)) || different_system )
158
+ select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
159
+ select = true if jobid && jobid.split(",").include?(id)
160
+ select = select && cmd.match(/#{search}/) if search
161
+ next unless select
162
+ elsif search
163
+ select = false
164
+ select = true if search && cmd.match(/#{search}/)
165
+ next unless select
166
+ end
167
+
168
+
169
+ puts Log.color(:yellow, "**ERASING**")
170
+ puts Log.color :blue, dir
171
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
172
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
173
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
174
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
175
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
176
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
177
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
178
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
179
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", "
180
+ puts Log.color(:magenta, "Output: ") << File.exist?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
181
+
182
+ if options[:batch_parameters]
183
+ puts Log.color(:magenta, "BATCH parameters: ")
184
+ case job_batch_system
185
+ when 'slurm'
186
+ puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
187
+ when 'lsf'
188
+ puts Log.color :blue, CMD.cmd('grep "^#BSUB" |tail -n +6', :in => Open.read(fcmd)).read.strip
189
+ end
190
+ end
191
+
192
+ if tail && File.exist?(File.join(dir, 'std.err'))
193
+ if exit_status && exit_status != 0
194
+ puts Log.color(:magenta, "First error or exception found: ")
195
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
196
+ elsif exit_status
197
+ puts Log.color(:magenta, "Completed jobs: ")
198
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
199
+ else
200
+ puts Log.color(:magenta, "Log tail: ")
201
+ puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
202
+ end
203
+ end
204
+
205
+ count += 1
206
+
207
+ Open.rm_rf dir unless dry_run
208
+ end
209
+
210
+ puts
211
+ puts "Found #{count} jobs"
212
+
@@ -0,0 +1,362 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+ require 'rbbt/hpc'
6
+
7
+ #$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
8
+
9
+ options = SOPT.setup <<EOF
10
+
11
+ Queue a job in Marenostrum
12
+
13
+ $ rbbt slurm list [options]
14
+
15
+ -h--help Print this help
16
+ -d--done Done jobs only
17
+ -e--error Error jobs only
18
+ -a--aborted SLURM aboted jobs
19
+ -r--running Running jobs only
20
+ -q--queued Queued jobs only
21
+ -j--job* Job ids
22
+ -s--search* Regular expression
23
+ -t--tail* Show the last lines of the STDERR
24
+ -l--long Show more entries
25
+ -c--compressed Show compressed information about entries
26
+ -p--progress Report progress of job and the dependencies
27
+ -BP--batch_parameters show batch parameters
28
+ -BPP--batch_procpath show Procpath performance summary
29
+ -sacct--sacct_peformance show sacct performance summary
30
+ -bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
31
+ EOF
32
+
33
+ if options[:help]
34
+ if defined? rbbt_usage
35
+ rbbt_usage
36
+ else
37
+ puts SOPT.doc
38
+ end
39
+ exit 0
40
+ end
41
+
42
+ batch_system = options.delete :batch_system
43
+ batch_system ||= 'auto'
44
+
45
+ long = options.delete :long
46
+
47
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
48
+
49
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
50
+
51
+ batch_system = HPC::BATCH_MODULE.to_s.split("::").last.downcase
52
+
53
+ done, error, running, queued, aborted, jobid, search, tail, progress = options.values_at :done, :error, :running, :queued, :aborted, :job, :search, :tail, :progress
54
+
55
+ workdir = File.expand_path('~/rbbt-batch')
56
+ Path.setup(workdir)
57
+
58
+ running_jobs = begin
59
+ squeue_txt = HPC::BATCH_MODULE.job_status
60
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
61
+ rescue
62
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
63
+ squeue_txt = nil
64
+ $norunningjobs = true
65
+ []
66
+ end
67
+
68
+ if squeue_txt
69
+ job_nodes = {}
70
+ squeue_txt.split("\n").each do |line|
71
+ parts = line.strip.split(/\s+/)
72
+ job_nodes[parts.first] = parts.last.split(",")
73
+ end
74
+ else
75
+ job_nodes = nil
76
+ end
77
+
78
+ count = 0
79
+ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
80
+ dir = File.dirname(fcmd)
81
+ command_txt = Open.read(fcmd)
82
+
83
+ if m = command_txt.match(/#CMD: (.*)/)
84
+ cmd = m[1]
85
+ else
86
+ cmd = nil
87
+ end
88
+
89
+ if m = command_txt.match(/^export BATCH_SYSTEM=(.*)/)
90
+ job_batch_system = m[1].downcase
91
+ else
92
+ job_batch_system = nil
93
+ end
94
+
95
+ different_system = job_batch_system != batch_system
96
+
97
+ if m = command_txt.match(/#MANIFEST: (.*)/)
98
+ manifest = m[1]
99
+ else
100
+ manifest = nil
101
+ end
102
+
103
+ if m = command_txt.match(/#STEP_PATH: (.*)/)
104
+ step_path = m[1]
105
+ else
106
+ step_path = nil
107
+ end
108
+
109
+ if m = command_txt.match(/#EXEC_CMD: (.*)/)
110
+ exe = m[1]
111
+ else
112
+ exe = nil
113
+ end
114
+
115
+ if m = command_txt.match(/^CONTAINER_DIR=(.*)/)
116
+ container_home = m[1]
117
+ else
118
+ container_home = nil
119
+ end
120
+
121
+ if File.exist?(fid = File.join(dir, 'job.id'))
122
+ id = Open.read(fid).chomp
123
+ else
124
+ id = nil
125
+ end
126
+
127
+ if File.exist?(fstatus = File.join(dir, 'exit.status'))
128
+ exit_status = Open.read(fstatus).to_i
129
+ else
130
+ exit_status = nil
131
+ end
132
+
133
+ if File.exist?(fstatus = File.join(dir, 'job.status'))
134
+ fstatus_txt = Open.read(fstatus)
135
+ begin
136
+ if job_batch_system == "lsf"
137
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/)[5].split(",")
138
+ else
139
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
140
+ end
141
+ rescue
142
+ nodes = []
143
+ end
144
+ elsif job_nodes && job_nodes[id]
145
+ nodes = job_nodes[id].reject{|n| n.include? "("}
146
+ else
147
+ nodes = []
148
+ end
149
+
150
+ if File.exist?(File.join(dir, 'exit.status'))
151
+ now = File.ctime(File.join(dir, 'exit.status'))
152
+ else
153
+ now = Time.now
154
+ end
155
+
156
+ if File.exist?(File.join(dir, 'std.out'))
157
+ cerrt = File.ctime File.join(dir, 'std.err')
158
+ coutt = File.ctime File.join(dir, 'std.out')
159
+ outt = File.mtime File.join(dir, 'std.out')
160
+ errt = File.mtime File.join(dir, 'std.err')
161
+ time_diff = now - [outt, errt].max
162
+ time_elapsed = now - [cerrt, coutt].min
163
+ end
164
+
165
+ fdep = File.join(dir, 'dependencies.list')
166
+ deps = Open.read(fdep).split("\n") if File.exist?(fdep)
167
+
168
+ fcadep = File.join(dir, 'canfail_dependencies.list')
169
+ cadeps = Open.read(fcadep).split("\n") if File.exist?(fcadep)
170
+
171
+ is_running = exit_status.nil? && ( (running_jobs.include?(id) && (deps.nil? || (running_jobs & deps).empty?)) || different_system )
172
+ if done || error || aborted || running || queued || jobid
173
+ select = false
174
+ select = true if done && exit_status == 0
175
+ select = true if error && exit_status && exit_status != 0
176
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
177
+ select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
178
+ select = true if running && nodes.any? && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
179
+ select = true if jobid && jobid.split(",").include?(id)
180
+ select = select && step_path.match(/#{search}/) if search
181
+ next unless select
182
+ elsif search
183
+ select = false
184
+ select = true if search && cmd.match(/#{search}/)
185
+ next unless select
186
+ end
187
+
188
+
189
+ count += 1
190
+
191
+ if options[:compressed]
192
+ status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) : Log.color(:green, id)
193
+ if different_system
194
+ status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id)
195
+ else
196
+ #status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) )
197
+ status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" :
198
+ (running_jobs.include?(id) || $norunningjobs ?
199
+ (is_running ? Log.color(:cyan, id) : Log.color(:yellow, id) ) :
200
+ Log.color(:red, id) )
201
+ end
202
+ prog_rep = []
203
+ if options[:progress]
204
+ step_line = Open.read(fcmd).split("\n").select{|line| line =~ /^#STEP_PATH:/}.first
205
+ if step_line
206
+ require 'rbbt/workflow'
207
+ step_path = step_line.split(": ").last.strip
208
+ step = Step.new step_path
209
+ step.load_dependencies_from_info
210
+ has_bar = false
211
+ [step].reverse.each do |j|
212
+ next if j.done?
213
+ if j.file(:progress).exists?
214
+ bar = Log::ProgressBar.new
215
+ bar.load(j.file(:progress).yaml)
216
+ rep = bar.report_msg.split("·")[1]
217
+ rep = rep.sub(/.*?(\d+%)/, Log.color(:blue,'\1')).sub(/\-.*/,'')
218
+ prog_rep << [rep]
219
+ end
220
+ end
221
+ end
222
+ end
223
+ workflow, task, name = step_path.split("/")[-3..-1]
224
+ job_str = [Log.color(:yellow, workflow), Log.color(:magenta, task), name] * "/"
225
+ puts [job_str, status, prog_rep ].flatten * " "
226
+ next
227
+ end
228
+
229
+ puts Log.color :blue, dir
230
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s if long
231
+ puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err')) && long
232
+ puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest) if long
233
+ puts Log.color(:magenta, "Step path: ") << Log.color(:yellow, step_path)
234
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
235
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing") if long
236
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing") if long
237
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home && long
238
+ if different_system
239
+ puts Log.color(:magenta, "Job ID (#{Log.color(:red, job_batch_system)}): ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id) )
240
+ else
241
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
242
+ end
243
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
244
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
245
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", " if long
246
+ puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
247
+ puts Log.color(:magenta, "Output: ") << File.exist?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exist?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)") if long
248
+
249
+ if options[:batch_parameters]
250
+ puts Log.color(:magenta, "BATCH parameters: ")
251
+ case job_batch_system
252
+ when 'slurm'
253
+ text = CMD.cmd('grep "^#SBATCH" ', :in => Open.read(fcmd)).read.strip
254
+ when 'lsf'
255
+ text = CMD.cmd('grep "^#BSUB" ', :in => Open.read(fcmd)).read.strip
256
+ when 'pbs'
257
+ text = CMD.cmd('grep "^#PBS" ', :in => Open.read(fcmd)).read.strip
258
+ else
259
+ text = ""
260
+ end
261
+ lines = text.split("\n").collect{|line| header, _sep, value = line.partition(/\s+/); Log.color(:yellow, header + ": ") + value}
262
+ puts Log.color :yellow, lines * "\n"
263
+ end
264
+
265
+ fprocpath = File.join(dir, 'procpath.sqlite3')
266
+ if options[:batch_procpath] && Open.exists?(fprocpath)
267
+ puts Log.color(:magenta, "Procpath summary: ")
268
+ require 'rbbt/tsv/csv'
269
+ meta = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from meta;' "))
270
+ perf = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from record;' "))
271
+
272
+ page_size = meta["page_size"].first.to_f
273
+ clock_ticks = meta["clock_ticks"].first.to_f
274
+
275
+ cpu_average = {}
276
+ rss_average = {}
277
+ perf.through :key, ["ts", 'stat_pid', "stat_utime", "stat_stime", "stat_cutime", "stat_cstime", "stat_rss"] do |k, values|
278
+ time, stat_pid, ucpu, scpu, ccpu, cscpu, rss = values
279
+ time = time.to_f
280
+
281
+ cpu = Misc.sum([ucpu, scpu].collect{|v| v.to_f})
282
+ cpu_average[stat_pid] ||= {}
283
+ cpu_average[stat_pid][time] ||= []
284
+ cpu_average[stat_pid][time] << cpu.to_f
285
+ rss_average[time] ||= []
286
+ rss_average[time] << rss.to_f * page_size
287
+ end
288
+
289
+ ticks = 0
290
+ cpu_average.each do |stat_pid, cpu_average_pid|
291
+ start = cpu_average_pid.keys.sort.first
292
+ eend = cpu_average_pid.keys.sort.last
293
+ ticks += Misc.sum(cpu_average_pid[eend]) - Misc.sum(cpu_average_pid[start])
294
+ end
295
+ start = rss_average.keys.sort.first
296
+ eend = rss_average.keys.sort.last
297
+ time_elapsed = eend - start
298
+ ticks = 1 if ticks == 0
299
+ time_elapsed = 1 if time_elapsed == 0
300
+ puts Log.color(:yellow, "CPU average: ") + "%.2f" % ( ticks / clock_ticks / time_elapsed * 100).to_s
301
+ puts Log.color(:yellow, "RSS average: ") + "%.2f GB" % Misc.mean(rss_average.collect{|t,l| Misc.sum(l) / (1024 * 1024 * 1024)}).to_s
302
+ puts Log.color(:yellow, "Time: ") + Misc.format_seconds((eend - start))
303
+
304
+ end
305
+
306
+ if options[:sacct_peformance]
307
+ begin
308
+ raise "sacct not supported for LSF" unless batch_system == 'slurm'
309
+ tsv = TSV.open(CMD.cmd("sacct -j #{id} -o 'jobid,AveRSS,MaxRSS,MaxDiskRead,MaxDiskWrite' -P|grep 'JobID\\|\.batch'"), :header_hash => '', :sep => "|", :type => :list)
310
+ values = tsv[tsv.keys.first]
311
+ if values.compact.any?
312
+ puts Log.color(:magenta, "SACCT performance: ")
313
+ puts values.zip(values.fields).collect{|v,t| Log.color(:yellow, t + ": ") + v.to_s } * "\n"
314
+ end
315
+ rescue
316
+ Log.warn $!.message
317
+ end
318
+ end
319
+
320
+
321
+ if tail && File.exist?(File.join(dir, 'std.err'))
322
+ if exit_status && exit_status != 0
323
+ puts Log.color(:magenta, "First error or exception found: ")
324
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
325
+ elsif exit_status
326
+ puts Log.color(:magenta, "Completed jobs: ")
327
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
328
+ else
329
+ puts Log.color(:magenta, "Log tail: ")
330
+ puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | grep -v -e '^[[:space:]]*$' | grep -v \"\\(STDOUT\\|STDERR\\):[[:space:]]*$\" | tail -n #{tail.to_i} ").read
331
+ end
332
+ end
333
+
334
+ if options[:progress]
335
+ step_line = Open.read(fcmd).split("\n").select{|line| line =~ /^#STEP_PATH:/}.first
336
+ if step_line
337
+ require 'rbbt/workflow'
338
+ step_path = step_line.split(": ").last.strip
339
+ step = Step.new step_path
340
+ step.load_dependencies_from_info
341
+ has_bar = false
342
+ (step.rec_dependencies + [step]).reverse.each do |j|
343
+ next if j.done?
344
+ if j.file(:progress).exists?
345
+ bar = Log::ProgressBar.new
346
+ bar.load(j.file(:progress).yaml)
347
+ puts Log.color(:magenta, "Progress: ") + bar.report_msg + " " + Log.color(:yellow, j.task_signature)
348
+ has_bar = true
349
+ end
350
+ end
351
+ step_status = step.status
352
+ step_status = Log.color :red, step_status if step_status.to_s == 'cleaned'
353
+ step_status = Log.color :green, step_status if step_status.to_s == 'done'
354
+ puts Log.color(:magenta, "Progress: ") + Log.color(:yellow, step.task_signature) + " #{step_status}" unless has_bar
355
+ end
356
+ end
357
+
358
+ end
359
+
360
+ puts
361
+ puts Log.color :clear, "Found #{count} jobs"
362
+
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt/util/simpleopt'
4
+ require 'rbbt/workflow'
5
+ require 'rbbt/workflow/usage'
6
+ require 'rbbt/workflow/remote_workflow'
7
+ require 'rbbt/hpc'
8
+ require 'rbbt/hpc/orchestrate'
9
+ require 'time'
10
+
11
+ rbbt_options = SOPT::GOT_OPTIONS
12
+
13
+ $slurm_options = SOPT.get <<EOF
14
+ -dr--dry_run Print only the template
15
+ -cj--clean_job Clean job
16
+ --drbbt* Use development version of rbbt
17
+ -sing--singularity Use Singularity
18
+ -si--singularity_img* Singularity image to use
19
+ -sm--singularity_mounts* Singularity image to use
20
+ -ug--user_group* Use alternative user group for group project directory
21
+ -c--contain* Contain in directory (using Singularity)
22
+ -s--sync* Contain in directory and sync jobs
23
+ -e--exclusive Make exclusive use of the node
24
+ -hm--highmem Make use of highmem cores
25
+ -wc--wipe_container* Wipe the jobs from the contain directory
26
+ -pd--purge_deps Purge job dependencies
27
+ -CS--contain_and_sync Contain and sync to default locations
28
+ -ci--copy_image When using a container directory, copy image there
29
+ -t--tail Tail the logs
30
+ -BPP--batch_procpath* Save Procpath performance for batch job; specify only options
31
+ -q--queue* Queue
32
+ -a--account* Account
33
+ -p--partition* Partition
34
+ -t--task_cpus* Tasks
35
+ -tm--time* Time
36
+ -m--mem* SLURM minimum memory
37
+ --gres* SLURM Generic resources
38
+ -mcpu--mem_per_cpu* SLURM minimum memory per CPU
39
+ -lin--licenses* SLURM licenses
40
+ -cons--constraint* SLURM constraint
41
+ -W--workflows* Additional workflows
42
+ -rmb--remove_batch_basedir Remove the SLURM working directory (command, STDIN, exit status, ...)
43
+ -bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
44
+ -lmod--lua_modules* Lua Modules to load
45
+ -co--conda* Conda environment to use
46
+ -OR--orchestration_rules* Orchestration rules
47
+ EOF
48
+
49
+ batch_system = $slurm_options.delete :batch_system
50
+ batch_system ||= 'auto'
51
+
52
+ $slurm_options[:config_keys] = SOPT::GOT_OPTIONS[:config_keys]
53
+
54
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
55
+
56
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
57
+
58
+ class Step
59
+ def join
60
+ HPC::BATCH_MODULE.wait_for_job(@batch_job_dir)
61
+ end
62
+
63
+ def run(no_load = true)
64
+ if done?
65
+ self.load
66
+ else
67
+ begin
68
+ Log.debug "Issuing SLURM job for #{self.path}"
69
+ @batch_job_id, @batch_job_dir = HPC::BATCH_MODULE.orchestrate_job(self, $slurm_options)
70
+ if no_load
71
+ self
72
+ else
73
+ join
74
+ self.load
75
+ end
76
+ rescue HPC::BATCH_DRY_RUN
77
+ end
78
+ end
79
+ end
80
+ end
81
+
82
+ module RemoteStep::SSH
83
+
84
+ def _run
85
+ RemoteWorkflow::SSH.orchestrate_slurm_job(File.join(base_url, task.to_s), @input_id, @base_name, $slurm_options)
86
+ end
87
+
88
+ end
89
+
90
+ SOPT.current_options = rbbt_options
91
+
92
+ load Rbbt.share.rbbt_commands.workflow.task.find
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+ require 'rbbt/hpc'
6
+
7
+ #$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
8
+
9
+ options = SOPT.setup <<EOF
10
+
11
+ Queue a job in Marenostrum
12
+
13
+ $ rbbt slurm tail <directory|jobid> [options]
14
+
15
+ -h--help Print this help
16
+ EOF
17
+
18
+ if options[:help]
19
+ if defined? rbbt_usage
20
+ rbbt_usage
21
+ else
22
+ puts SOPT.doc
23
+ end
24
+ exit 0
25
+ end
26
+
27
+ batch_system = options.delete :batch_system
28
+ batch_system ||= 'auto'
29
+
30
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
31
+
32
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
33
+
34
+ directory = ARGV.shift
35
+
36
+ raise ParameterException if directory.nil?
37
+
38
+ if directory =~ /^[0-9]*$/
39
+ workdir = File.expand_path('~/rbbt-batch')
40
+ Path.setup(workdir)
41
+
42
+ workdir.glob("**/job.id").each do |file|
43
+ next unless directory == Open.read(file).strip
44
+ directory = File.dirname(file)
45
+ break
46
+ end
47
+ end
48
+
49
+ raise ParameterException, "Could not identify job #{directory}" unless File.exist?(directory)
50
+
51
+ require 'rbbt/hpc/slurm'
52
+
53
+ command_txt = Open.read(File.join(directory, 'command.batch'))
54
+ if m = command_txt.match(/#STEP_PATH: (.*)/)
55
+ step_path = m[1]
56
+ else
57
+ step_path = nil
58
+ end
59
+
60
+ puts Log.color(:magenta, "Directory: ") + directory if directory
61
+ puts Log.color(:magenta, "Step path: ") + step_path if step_path
62
+
63
+ HPC::BATCH_MODULE.follow_job directory, true
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt/util/simpleopt'
4
+ require 'rbbt/workflow'
5
+ require 'rbbt/workflow/usage'
6
+ require 'rbbt/workflow/remote_workflow'
7
+ require 'rbbt/hpc'
8
+ require 'time'
9
+
10
+ rbbt_options = SOPT::GOT_OPTIONS
11
+
12
+ $slurm_options = SOPT.get <<EOF
13
+ -dr--dry_run Print only the template
14
+ -cj--clean_job Clean job
15
+ --drbbt* Use development version of rbbt
16
+ -sing--singularity Use Singularity
17
+ -si--singularity_img* Singularity image to use
18
+ -sm--singularity_mounts* Singularity image to use
19
+ -ug--user_group* Use alternative user group for group project directory
20
+ -c--contain* Contain in directory (using Singularity)
21
+ -s--sync* Contain in directory and sync jobs
22
+ -e--exclusive Make exclusive use of the node
23
+ -hm--highmem Make use of highmem cores
24
+ -wc--wipe_container* Wipe the jobs from the contain directory
25
+ -pd--purge_deps Purge job dependencies
26
+ -CS--contain_and_sync Contain and sync to default locations
27
+ -ci--copy_image When using a container directory, copy image there
28
+ -t--tail Tail the logs
29
+ -BPP--batch_procpath* Save Procpath performance for batch job; specify only options
30
+ -q--queue* Queue
31
+ -a--account* Account
32
+ -p--partition* Partition
33
+ -t--task_cpus* Tasks
34
+ -tm--time* Time
35
+ -m--mem* minimum memory
36
+ --gres* Generic resources
37
+ -mcpu--mem_per_cpu* minimum memory per CPU
38
+ -lin--licenses* licenses
39
+ -cons--constraint* constraint
40
+ -W--workflows* Additional workflows
41
+ -rmb--remove_batch_dir Remove the batch working directory (command, STDIN, exit status, ...)
42
+ -bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
43
+ -lmod--lua_modules* Lua Modules to load
44
+ -co--conda* Conda environment to use
45
+ -OR--orchestration_rules* Orchestration rules
46
+ EOF
47
+
48
+ batch_system = $slurm_options.delete :batch_system
49
+ batch_system ||= 'auto'
50
+
51
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
52
+
53
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
54
+
55
+ class Step
56
+
57
+ def run(*args)
58
+ if done?
59
+ self.load
60
+ else
61
+ begin
62
+ Log.debug "Issuing BATCH job for #{self.path}"
63
+ HPC::BATCH_MODULE.run_job(self, $slurm_options)
64
+ rescue HPC::BATCH_DRY_RUN
65
+ end
66
+ end
67
+ end
68
+ end
69
+
70
+ module RemoteStep::SSH
71
+
72
+ def _run
73
+ RemoteWorkflow::SSH.run_slurm_job(File.join(base_url, task.to_s), @input_id, @base_name, $slurm_options)
74
+ end
75
+
76
+ end
77
+
78
+ SOPT.current_options = rbbt_options
79
+ load Rbbt.share.rbbt_commands.workflow.task.find
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.40.0
4
+ version: 5.40.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-12 00:00:00.000000000 Z
11
+ date: 2023-10-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -136,20 +136,6 @@ dependencies:
136
136
  - - ">="
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
- - !ruby/object:Gem::Dependency
140
- name: nokogiri
141
- requirement: !ruby/object:Gem::Requirement
142
- requirements:
143
- - - ">="
144
- - !ruby/object:Gem::Version
145
- version: '0'
146
- type: :runtime
147
- prerelease: false
148
- version_requirements: !ruby/object:Gem::Requirement
149
- requirements:
150
- - - ">="
151
- - !ruby/object:Gem::Version
152
- version: '0'
153
139
  description: Utilities for handling tsv files, caches, etc
154
140
  email: miguel.vazquez.g@bsc.es
155
141
  executables:
@@ -302,6 +288,7 @@ files:
302
288
  - lib/rbbt/util/open.rb
303
289
  - lib/rbbt/util/procpath.rb
304
290
  - lib/rbbt/util/python.rb
291
+ - lib/rbbt/util/python/step.rb
305
292
  - lib/rbbt/util/python/util.rb
306
293
  - lib/rbbt/util/semaphore.rb
307
294
  - lib/rbbt/util/simpleDSL.rb
@@ -390,6 +377,11 @@ files:
390
377
  - share/rbbt_commands/lsf/task
391
378
  - share/rbbt_commands/migrate
392
379
  - share/rbbt_commands/migrate_job
380
+ - share/rbbt_commands/pbs/clean
381
+ - share/rbbt_commands/pbs/list
382
+ - share/rbbt_commands/pbs/orchestrate
383
+ - share/rbbt_commands/pbs/tail
384
+ - share/rbbt_commands/pbs/task
393
385
  - share/rbbt_commands/purge_job
394
386
  - share/rbbt_commands/resource/claims
395
387
  - share/rbbt_commands/resource/exists
@@ -592,7 +584,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
592
584
  - !ruby/object:Gem::Version
593
585
  version: '0'
594
586
  requirements: []
595
- rubygems_version: 3.5.0.dev
587
+ rubygems_version: 3.4.19
596
588
  signing_key:
597
589
  specification_version: 4
598
590
  summary: Utilities for the Ruby Bioinformatics Toolkit (rbbt)