rbbt-util 5.40.0 → 5.40.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '088ac190b36a176e420d4bb1d7b8d28d777133568e45530219f6a0212a1a1144'
4
- data.tar.gz: 47bdc31d247e5fd5d944f493690e8181bcc236cee74c61fefd5ce25711edf7da
3
+ metadata.gz: 0c086b1d2a4bfd64cc80729a4962b2ba04268b5745ae6c79b5d954fc5b15a7a9
4
+ data.tar.gz: 8d0ee0a0942be2cf07259baed09d2c2feb98a67b314adc70bd0ac1e03ecba4f9
5
5
  SHA512:
6
- metadata.gz: 9afce115c221695ee896c1e1f884be9a1ba0f0ae98789a485a906e438252afe8292c5422f3eef57d77089bf55cc0b425988ab9b479b1f422eaa0abb7005cfdfa
7
- data.tar.gz: 4cf4d1e86ce3ab256609aff4c66a3412ea1a9c0c87db08fd5cdad1b98c95147d452cc69d96009c9791cf698ea43903873ed0e4cebbc861bda85097864bed68e8
6
+ metadata.gz: 8ca4b7b1b59eb4546b9e1484fd397c687003389b406de77461da5aa3c773679a87533050c640c90feacc48f9bdadf1c565bcfe9727e8661b2440938ed3c69dbc
7
+ data.tar.gz: 613f30647a8d317984e12a5618fe8d03ab94c3d4ca1acc8ee38460c8f67869c13e8dd83f3d1c4332e813a34802af55d6346f87d898863095ff12368c9f715ba6
data/bin/rbbt CHANGED
@@ -75,7 +75,7 @@ end
75
75
  Log.ignore_stderr do
76
76
  begin
77
77
  require "nokogiri"
78
- rescue
78
+ rescue Exception
79
79
  end
80
80
  end
81
81
 
data/lib/rbbt/hpc/pbs.rb CHANGED
@@ -43,6 +43,8 @@ cd ${PBS_O_WORKDIR}
43
43
  system = Misc.process_options options, :partition
44
44
  filesystems = Misc.process_options options, :filesystems
45
45
 
46
+ filesystems = "home" if filesystems.nil?
47
+
46
48
  filesystems = filesystems * "," if Array === filesystems
47
49
 
48
50
  # NOT USED
@@ -1,6 +1,6 @@
1
1
  module Misc
2
2
 
3
- def self.add_libdir(dir=nil)
3
+ def self.add_libdir(dir=nil)
4
4
  dir ||= File.join(Path.caller_lib_dir(caller.first), 'lib')
5
5
  $LOAD_PATH.unshift(dir) unless $LOAD_PATH.include? dir
6
6
  end
@@ -0,0 +1,41 @@
1
+ require 'rbbt/workflow/step'
2
+
3
+ class Step
4
+ def python_file(file, options = {})
5
+ CMD.cmd_log(:python, file, options)
6
+ end
7
+
8
+ def python_block(options = {}, &block)
9
+ RbbtPython.run options, &block
10
+ end
11
+
12
+ def python(python = nil, options = {}, &block)
13
+ begin
14
+ RbbtPython.add_path self.workflow.libdir.python.find
15
+ rescue
16
+ Log.warn "Error loading libdir python for workflow '#{Misc.fingerprint self.workflow}'"
17
+ end
18
+ case python
19
+ when Path
20
+ python_file python.find, options
21
+ when String
22
+ if Open.exists?(python)
23
+ python_file python
24
+ else
25
+ TmpFile.with_file do |dir|
26
+ pkg = "pkg#{rand(100)}"
27
+ Open.write File.join(dir, "#{pkg}/__init__.py"), code
28
+
29
+ RbbtPython.add_path dir
30
+
31
+ Misc.in_dir dir do
32
+ yield pkg
33
+ end
34
+ end
35
+ end
36
+ else
37
+ python_block(python, &block)
38
+ end
39
+ end
40
+ end
41
+
@@ -49,5 +49,5 @@ module RbbtPython
49
49
  end
50
50
  hash
51
51
  end
52
-
53
52
  end
53
+
data/python/rbbt.py CHANGED
@@ -29,6 +29,7 @@ def tsv_preamble(line, comment_char="#"):
29
29
  entries = re.sub(f"^{comment_char}:", '', line)
30
30
  entries = re.sub(f"^{comment_char}:", '', line).split("#")
31
31
  for entry in entries:
32
+ entry = entry.strip()
32
33
  key, value = entry.split("=")
33
34
  key = re.sub("^:","",key)
34
35
  value = re.sub("^:","",value)
@@ -75,7 +76,13 @@ def tsv_pandas(filename, sep="\t", comment_char="#", index_col=0, **kwargs):
75
76
  header = tsv_header(filename, sep=sep, comment_char="#")
76
77
 
77
78
  if ("type" in header and header["type"] == "flat"):
78
- return None
79
+ if ("sep" in header):
80
+ sep=header["sep"]
81
+
82
+ tsv = pandas.read_table(filename, sep=sep, index_col=index_col, header=None, skiprows=[0,1], **kwargs)
83
+
84
+ if ("key_field" in header):
85
+ tsv.index.name = header["key_field"]
79
86
  else:
80
87
  if ("sep" in header):
81
88
  sep=header["sep"]
@@ -0,0 +1,212 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+ require 'rbbt/hpc'
6
+
7
+ #$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
8
+
9
+ options = SOPT.setup <<EOF
10
+
11
+ Clean error or aborted jobs
12
+
13
+ $ rbbt slurm clean [options]
14
+
15
+ -h--help Print this help
16
+ -d--done Done jobs only
17
+ -e--error Error jobs only
18
+ -a--aborted SLURM aboted jobs
19
+ -q--queued Queued jobs only
20
+ -j--job* Job ids
21
+ -s--search* Regular expression
22
+ -t--tail* Show the last lines of the STDERR
23
+ -BP--batch_parameters show batch parameters
24
+ -dr--dry_run Do not erase anything
25
+ EOF
26
+
27
+ if options[:help]
28
+ if defined? rbbt_usage
29
+ rbbt_usage
30
+ else
31
+ puts SOPT.doc
32
+ end
33
+ exit 0
34
+ end
35
+
36
+ batch_system = options.delete :batch_system
37
+ batch_system ||= 'auto'
38
+
39
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
40
+
41
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
42
+
43
+ Log.severity = 4
44
+ done, error, aborted, queued, jobid, search, tail, batch_parameters, dry_run = options.values_at :done, :error, :aborted, :queued, :job, :search, :tail, :batch_parameters, :dry_run
45
+
46
+ workdir = File.expand_path('~/rbbt-batch')
47
+ Path.setup(workdir)
48
+
49
+ running_jobs = begin
50
+ squeue_txt = HPC::BATCH_MODULE.job_status
51
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
52
+ rescue
53
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
54
+ squeue_txt = nil
55
+ $norunningjobs = true
56
+ []
57
+ end
58
+
59
+ if squeue_txt
60
+ job_nodes = {}
61
+ squeue_txt.split("\n").each do |line|
62
+ parts = line.strip.split(/\s+/)
63
+ job_nodes[parts.first] = parts.last.split(",")
64
+ end
65
+ else
66
+ job_nodes = nil
67
+ end
68
+
69
+ count = 0
70
+ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
71
+ dir = File.dirname(fcmd)
72
+ command_txt = Open.read(fcmd)
73
+
74
+ if m = command_txt.match(/#CMD: (.*)/)
75
+ cmd = m[1]
76
+ else
77
+ cmd = nil
78
+ end
79
+
80
+ if m = command_txt.match(/# Run command\n(.*?)\n/im)
81
+ exe = m[1]
82
+ else
83
+ exe = nil
84
+ end
85
+
86
+ if m = command_txt.match(/^CONTAINER_DIR=(.*)/)
87
+ container_home = m[1]
88
+ else
89
+ container_home = nil
90
+ end
91
+
92
+ if m = command_txt.match(/^BATCH_SYSTEM=(.*)/)
93
+ job_batch_system = m[1].downcase
94
+ else
95
+ job_batch_system = nil
96
+ end
97
+
98
+ different_system = job_batch_system != batch_system
99
+
100
+ if File.exist?(fid = File.join(dir, 'job.id'))
101
+ id = Open.read(fid).chomp
102
+ else
103
+ id = nil
104
+ end
105
+
106
+ if File.exist?(fstatus = File.join(dir, 'exit.status'))
107
+ exit_status = Open.read(fstatus).to_i
108
+ else
109
+ exit_status = nil
110
+ end
111
+
112
+ if File.exist?(fstatus = File.join(dir, 'job.status'))
113
+ fstatus_txt = Open.read(fstatus)
114
+ begin
115
+ if job_batch_system == "lsf"
116
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/)[5].split(",")
117
+ else
118
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
119
+ end
120
+ rescue
121
+ nodes = []
122
+ end
123
+ elsif job_nodes[id]
124
+ nodes = job_nodes[id]
125
+ else
126
+ nodes = []
127
+ end
128
+
129
+ if File.exist?(File.join(dir, 'std.out'))
130
+ outt = File.mtime File.join(dir, 'std.out')
131
+ errt = File.mtime File.join(dir, 'std.err')
132
+ time_diff = Time.now - [outt, errt].max
133
+ end
134
+
135
+ fdep = File.join(dir, 'dependencies.list')
136
+ deps = Open.read(fdep).split("\n") if File.exist?(fdep)
137
+
138
+ fcadep = File.join(dir, 'canfail_dependencies.list')
139
+ cadeps = Open.read(fcadep).split("\n") if File.exist?(fcadep)
140
+
141
+ aborted = error = true if ! done && aborted.nil? && error.nil?
142
+ #if done || error || aborted || running || queued || jobid || search
143
+ # select = false
144
+ # select = true if done && exit_status && exit_status.to_i == 0
145
+ # select = true if error && exit_status && exit_status.to_i != 0
146
+ # select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
147
+ # select = select && jobid.split(",").include?(id) if jobid
148
+ # select = select && cmd.match(/#{search}/) if search
149
+ # next unless select
150
+ #end
151
+
152
+ if done || error || aborted || queued || jobid
153
+ select = false
154
+ select = true if done && exit_status == 0
155
+ select = true if error && exit_status && exit_status != 0
156
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
157
+ is_running = exit_status.nil? && ( (running_jobs.include?(id) && (!deps || (running_jobs & deps).empty?)) || different_system )
158
+ select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
159
+ select = true if jobid && jobid.split(",").include?(id)
160
+ select = select && cmd.match(/#{search}/) if search
161
+ next unless select
162
+ elsif search
163
+ select = false
164
+ select = true if search && cmd.match(/#{search}/)
165
+ next unless select
166
+ end
167
+
168
+
169
+ puts Log.color(:yellow, "**ERASING**")
170
+ puts Log.color :blue, dir
171
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s
172
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
173
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing")
174
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing")
175
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home
176
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
177
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
178
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
179
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", "
180
+ puts Log.color(:magenta, "Output: ") << File.exist?(File.join(dir, 'std.out')).to_s << (id.nil? ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)")
181
+
182
+ if options[:batch_parameters]
183
+ puts Log.color(:magenta, "BATCH parameters: ")
184
+ case job_batch_system
185
+ when 'slurm'
186
+ puts Log.color :blue, CMD.cmd('grep "^#SBATCH" |tail -n +6', :in => Open.read(fcmd)).read.strip
187
+ when 'lsf'
188
+ puts Log.color :blue, CMD.cmd('grep "^#BSUB" |tail -n +6', :in => Open.read(fcmd)).read.strip
189
+ end
190
+ end
191
+
192
+ if tail && File.exist?(File.join(dir, 'std.err'))
193
+ if exit_status && exit_status != 0
194
+ puts Log.color(:magenta, "First error or exception found: ")
195
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
196
+ elsif exit_status
197
+ puts Log.color(:magenta, "Completed jobs: ")
198
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
199
+ else
200
+ puts Log.color(:magenta, "Log tail: ")
201
+ puts CMD.cmd("tail -n #{tail.to_i} #{File.join(dir, 'std.err')}").read
202
+ end
203
+ end
204
+
205
+ count += 1
206
+
207
+ Open.rm_rf dir unless dry_run
208
+ end
209
+
210
+ puts
211
+ puts "Found #{count} jobs"
212
+
@@ -0,0 +1,362 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+ require 'rbbt/hpc'
6
+
7
+ #$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
8
+
9
+ options = SOPT.setup <<EOF
10
+
11
+ Queue a job in Marenostrum
12
+
13
+ $ rbbt slurm list [options]
14
+
15
+ -h--help Print this help
16
+ -d--done Done jobs only
17
+ -e--error Error jobs only
18
+ -a--aborted SLURM aboted jobs
19
+ -r--running Running jobs only
20
+ -q--queued Queued jobs only
21
+ -j--job* Job ids
22
+ -s--search* Regular expression
23
+ -t--tail* Show the last lines of the STDERR
24
+ -l--long Show more entries
25
+ -c--compressed Show compressed information about entries
26
+ -p--progress Report progress of job and the dependencies
27
+ -BP--batch_parameters show batch parameters
28
+ -BPP--batch_procpath show Procpath performance summary
29
+ -sacct--sacct_peformance show sacct performance summary
30
+ -bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
31
+ EOF
32
+
33
+ if options[:help]
34
+ if defined? rbbt_usage
35
+ rbbt_usage
36
+ else
37
+ puts SOPT.doc
38
+ end
39
+ exit 0
40
+ end
41
+
42
+ batch_system = options.delete :batch_system
43
+ batch_system ||= 'auto'
44
+
45
+ long = options.delete :long
46
+
47
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
48
+
49
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
50
+
51
+ batch_system = HPC::BATCH_MODULE.to_s.split("::").last.downcase
52
+
53
+ done, error, running, queued, aborted, jobid, search, tail, progress = options.values_at :done, :error, :running, :queued, :aborted, :job, :search, :tail, :progress
54
+
55
+ workdir = File.expand_path('~/rbbt-batch')
56
+ Path.setup(workdir)
57
+
58
+ running_jobs = begin
59
+ squeue_txt = HPC::BATCH_MODULE.job_status
60
+ squeue_txt.split("\n").collect{|l| l.to_i.to_s}
61
+ rescue
62
+ Log.warn "Cannot determine if jobs are running, they will seem to be all alive (Job ID in green)"
63
+ squeue_txt = nil
64
+ $norunningjobs = true
65
+ []
66
+ end
67
+
68
+ if squeue_txt
69
+ job_nodes = {}
70
+ squeue_txt.split("\n").each do |line|
71
+ parts = line.strip.split(/\s+/)
72
+ job_nodes[parts.first] = parts.last.split(",")
73
+ end
74
+ else
75
+ job_nodes = nil
76
+ end
77
+
78
+ count = 0
79
+ workdir.glob("**/command.batch").sort_by{|f| File.mtime(f)}.each do |fcmd|
80
+ dir = File.dirname(fcmd)
81
+ command_txt = Open.read(fcmd)
82
+
83
+ if m = command_txt.match(/#CMD: (.*)/)
84
+ cmd = m[1]
85
+ else
86
+ cmd = nil
87
+ end
88
+
89
+ if m = command_txt.match(/^export BATCH_SYSTEM=(.*)/)
90
+ job_batch_system = m[1].downcase
91
+ else
92
+ job_batch_system = nil
93
+ end
94
+
95
+ different_system = job_batch_system != batch_system
96
+
97
+ if m = command_txt.match(/#MANIFEST: (.*)/)
98
+ manifest = m[1]
99
+ else
100
+ manifest = nil
101
+ end
102
+
103
+ if m = command_txt.match(/#STEP_PATH: (.*)/)
104
+ step_path = m[1]
105
+ else
106
+ step_path = nil
107
+ end
108
+
109
+ if m = command_txt.match(/#EXEC_CMD: (.*)/)
110
+ exe = m[1]
111
+ else
112
+ exe = nil
113
+ end
114
+
115
+ if m = command_txt.match(/^CONTAINER_DIR=(.*)/)
116
+ container_home = m[1]
117
+ else
118
+ container_home = nil
119
+ end
120
+
121
+ if File.exist?(fid = File.join(dir, 'job.id'))
122
+ id = Open.read(fid).chomp
123
+ else
124
+ id = nil
125
+ end
126
+
127
+ if File.exist?(fstatus = File.join(dir, 'exit.status'))
128
+ exit_status = Open.read(fstatus).to_i
129
+ else
130
+ exit_status = nil
131
+ end
132
+
133
+ if File.exist?(fstatus = File.join(dir, 'job.status'))
134
+ fstatus_txt = Open.read(fstatus)
135
+ begin
136
+ if job_batch_system == "lsf"
137
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/)[5].split(",")
138
+ else
139
+ nodes = Open.read(fstatus).split("\n").last.split(/\s+/).last.split(",")
140
+ end
141
+ rescue
142
+ nodes = []
143
+ end
144
+ elsif job_nodes && job_nodes[id]
145
+ nodes = job_nodes[id].reject{|n| n.include? "("}
146
+ else
147
+ nodes = []
148
+ end
149
+
150
+ if File.exist?(File.join(dir, 'exit.status'))
151
+ now = File.ctime(File.join(dir, 'exit.status'))
152
+ else
153
+ now = Time.now
154
+ end
155
+
156
+ if File.exist?(File.join(dir, 'std.out'))
157
+ cerrt = File.ctime File.join(dir, 'std.err')
158
+ coutt = File.ctime File.join(dir, 'std.out')
159
+ outt = File.mtime File.join(dir, 'std.out')
160
+ errt = File.mtime File.join(dir, 'std.err')
161
+ time_diff = now - [outt, errt].max
162
+ time_elapsed = now - [cerrt, coutt].min
163
+ end
164
+
165
+ fdep = File.join(dir, 'dependencies.list')
166
+ deps = Open.read(fdep).split("\n") if File.exist?(fdep)
167
+
168
+ fcadep = File.join(dir, 'canfail_dependencies.list')
169
+ cadeps = Open.read(fcadep).split("\n") if File.exist?(fcadep)
170
+
171
+ is_running = exit_status.nil? && ( (running_jobs.include?(id) && (deps.nil? || (running_jobs & deps).empty?)) || different_system )
172
+ if done || error || aborted || running || queued || jobid
173
+ select = false
174
+ select = true if done && exit_status == 0
175
+ select = true if error && exit_status && exit_status != 0
176
+ select = true if aborted && (exit_status.nil? && ! running_jobs.include?(id))
177
+ select = true if queued && deps && (running_jobs & deps).any? || queued && is_running && nodes.empty?
178
+ select = true if running && nodes.any? && (exit_status.nil? && running_jobs.include?(id)) && (!deps || (running_jobs & deps).empty?)
179
+ select = true if jobid && jobid.split(",").include?(id)
180
+ select = select && step_path.match(/#{search}/) if search
181
+ next unless select
182
+ elsif search
183
+ select = false
184
+ select = true if search && cmd.match(/#{search}/)
185
+ next unless select
186
+ end
187
+
188
+
189
+ count += 1
190
+
191
+ if options[:compressed]
192
+ status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) : Log.color(:green, id)
193
+ if different_system
194
+ status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id)
195
+ else
196
+ #status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) )
197
+ status = exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" :
198
+ (running_jobs.include?(id) || $norunningjobs ?
199
+ (is_running ? Log.color(:cyan, id) : Log.color(:yellow, id) ) :
200
+ Log.color(:red, id) )
201
+ end
202
+ prog_rep = []
203
+ if options[:progress]
204
+ step_line = Open.read(fcmd).split("\n").select{|line| line =~ /^#STEP_PATH:/}.first
205
+ if step_line
206
+ require 'rbbt/workflow'
207
+ step_path = step_line.split(": ").last.strip
208
+ step = Step.new step_path
209
+ step.load_dependencies_from_info
210
+ has_bar = false
211
+ [step].reverse.each do |j|
212
+ next if j.done?
213
+ if j.file(:progress).exists?
214
+ bar = Log::ProgressBar.new
215
+ bar.load(j.file(:progress).yaml)
216
+ rep = bar.report_msg.split("·")[1]
217
+ rep = rep.sub(/.*?(\d+%)/, Log.color(:blue,'\1')).sub(/\-.*/,'')
218
+ prog_rep << [rep]
219
+ end
220
+ end
221
+ end
222
+ end
223
+ workflow, task, name = step_path.split("/")[-3..-1]
224
+ job_str = [Log.color(:yellow, workflow), Log.color(:magenta, task), name] * "/"
225
+ puts [job_str, status, prog_rep ].flatten * " "
226
+ next
227
+ end
228
+
229
+ puts Log.color :blue, dir
230
+ puts Log.color(:magenta, "Creation: ") << File.mtime(File.join(dir, 'command.batch')).to_s if long
231
+ puts Log.color(:magenta, "Started: ") << File.ctime(File.join(dir, 'std.err')).to_s if File.exist?(File.join(dir, 'std.err')) && long
232
+ puts Log.color(:magenta, "Manifest: ") << Log.color(:yellow, manifest) if long
233
+ puts Log.color(:magenta, "Step path: ") << Log.color(:yellow, step_path)
234
+ puts Log.color(:magenta, "Done: ") << File.mtime(File.join(dir, 'exit.status')).to_s if File.exist?(File.join(dir, 'exit.status'))
235
+ puts Log.color(:magenta, "Exec: ") << (exe || "Missing") if long
236
+ puts Log.color(:magenta, "CMD: ") << (Log.color(:yellow, cmd) || "Missing") if long
237
+ puts Log.color(:magenta, "HOME: ") << Log.color(:yellow, container_home) if container_home && long
238
+ if different_system
239
+ puts Log.color(:magenta, "Job ID (#{Log.color(:red, job_batch_system)}): ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : Log.color(:green, id) )
240
+ else
241
+ puts Log.color(:magenta, "Job ID: ") << (exit_status ? (exit_status == 0 ? Log.color(:green, "Done") : Log.color(:red, "Error")) + " (#{ id })" : (running_jobs.include?(id) || $norunningjobs ? Log.color(:green, id) : Log.color(:red, id) ))
242
+ end
243
+ puts Log.color(:magenta, "Dependencies: ") << deps * ", " if deps
244
+ puts Log.color(:magenta, "Dependencies (can fail): ") << cadeps * ", " if cadeps
245
+ puts Log.color(:magenta, "Nodes: ") << nodes * ", " if long
246
+ puts Log.color(:magenta, "Time elapsed: ") << Misc.format_seconds(time_elapsed) if time_elapsed
247
+ puts Log.color(:magenta, "Output: ") << File.exist?(File.join(dir, 'std.out')).to_s << (id.nil? || File.exist?(File.join(dir, 'exit.status')) ? "" : " (last update " + Misc.format_seconds(time_diff) + " ago)") if long
248
+
249
+ if options[:batch_parameters]
250
+ puts Log.color(:magenta, "BATCH parameters: ")
251
+ case job_batch_system
252
+ when 'slurm'
253
+ text = CMD.cmd('grep "^#SBATCH" ', :in => Open.read(fcmd)).read.strip
254
+ when 'lsf'
255
+ text = CMD.cmd('grep "^#BSUB" ', :in => Open.read(fcmd)).read.strip
256
+ when 'pbs'
257
+ text = CMD.cmd('grep "^#PBS" ', :in => Open.read(fcmd)).read.strip
258
+ else
259
+ text = ""
260
+ end
261
+ lines = text.split("\n").collect{|line| header, _sep, value = line.partition(/\s+/); Log.color(:yellow, header + ": ") + value}
262
+ puts Log.color :yellow, lines * "\n"
263
+ end
264
+
265
+ fprocpath = File.join(dir, 'procpath.sqlite3')
266
+ if options[:batch_procpath] && Open.exists?(fprocpath)
267
+ puts Log.color(:magenta, "Procpath summary: ")
268
+ require 'rbbt/tsv/csv'
269
+ meta = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from meta;' "))
270
+ perf = TSV.csv(CMD.cmd("sqlite3 -header -csv #{fprocpath} 'select * from record;' "))
271
+
272
+ page_size = meta["page_size"].first.to_f
273
+ clock_ticks = meta["clock_ticks"].first.to_f
274
+
275
+ cpu_average = {}
276
+ rss_average = {}
277
+ perf.through :key, ["ts", 'stat_pid', "stat_utime", "stat_stime", "stat_cutime", "stat_cstime", "stat_rss"] do |k, values|
278
+ time, stat_pid, ucpu, scpu, ccpu, cscpu, rss = values
279
+ time = time.to_f
280
+
281
+ cpu = Misc.sum([ucpu, scpu].collect{|v| v.to_f})
282
+ cpu_average[stat_pid] ||= {}
283
+ cpu_average[stat_pid][time] ||= []
284
+ cpu_average[stat_pid][time] << cpu.to_f
285
+ rss_average[time] ||= []
286
+ rss_average[time] << rss.to_f * page_size
287
+ end
288
+
289
+ ticks = 0
290
+ cpu_average.each do |stat_pid, cpu_average_pid|
291
+ start = cpu_average_pid.keys.sort.first
292
+ eend = cpu_average_pid.keys.sort.last
293
+ ticks += Misc.sum(cpu_average_pid[eend]) - Misc.sum(cpu_average_pid[start])
294
+ end
295
+ start = rss_average.keys.sort.first
296
+ eend = rss_average.keys.sort.last
297
+ time_elapsed = eend - start
298
+ ticks = 1 if ticks == 0
299
+ time_elapsed = 1 if time_elapsed == 0
300
+ puts Log.color(:yellow, "CPU average: ") + "%.2f" % ( ticks / clock_ticks / time_elapsed * 100).to_s
301
+ puts Log.color(:yellow, "RSS average: ") + "%.2f GB" % Misc.mean(rss_average.collect{|t,l| Misc.sum(l) / (1024 * 1024 * 1024)}).to_s
302
+ puts Log.color(:yellow, "Time: ") + Misc.format_seconds((eend - start))
303
+
304
+ end
305
+
306
+ if options[:sacct_peformance]
307
+ begin
308
+ raise "sacct not supported for LSF" unless batch_system == 'slurm'
309
+ tsv = TSV.open(CMD.cmd("sacct -j #{id} -o 'jobid,AveRSS,MaxRSS,MaxDiskRead,MaxDiskWrite' -P|grep 'JobID\\|\.batch'"), :header_hash => '', :sep => "|", :type => :list)
310
+ values = tsv[tsv.keys.first]
311
+ if values.compact.any?
312
+ puts Log.color(:magenta, "SACCT performance: ")
313
+ puts values.zip(values.fields).collect{|v,t| Log.color(:yellow, t + ": ") + v.to_s } * "\n"
314
+ end
315
+ rescue
316
+ Log.warn $!.message
317
+ end
318
+ end
319
+
320
+
321
+ if tail && File.exist?(File.join(dir, 'std.err'))
322
+ if exit_status && exit_status != 0
323
+ puts Log.color(:magenta, "First error or exception found: ")
324
+ puts CMD.cmd("grep -i -w 'error\\|[a-z]*exception' #{File.join(dir, 'std.err')} -A #{tail.to_i} |head -n #{tail.to_i}", :no_fail => true).read
325
+ elsif exit_status
326
+ puts Log.color(:magenta, "Completed jobs: ")
327
+ puts CMD.cmd("grep -i -w 'Completed step' #{File.join(dir, 'std.err')} | grep -v 'Retrying dep.' | tail -n #{tail.to_i}", :no_fail => true).read
328
+ else
329
+ puts Log.color(:magenta, "Log tail: ")
330
+ puts CMD.cmd(" cat #{File.join(dir, 'std.err')} | grep -v '^[^\\s:]*\\[3.m' | grep -v -e '^[[:space:]]*$' | grep -v \"\\(STDOUT\\|STDERR\\):[[:space:]]*$\" | tail -n #{tail.to_i} ").read
331
+ end
332
+ end
333
+
334
+ if options[:progress]
335
+ step_line = Open.read(fcmd).split("\n").select{|line| line =~ /^#STEP_PATH:/}.first
336
+ if step_line
337
+ require 'rbbt/workflow'
338
+ step_path = step_line.split(": ").last.strip
339
+ step = Step.new step_path
340
+ step.load_dependencies_from_info
341
+ has_bar = false
342
+ (step.rec_dependencies + [step]).reverse.each do |j|
343
+ next if j.done?
344
+ if j.file(:progress).exists?
345
+ bar = Log::ProgressBar.new
346
+ bar.load(j.file(:progress).yaml)
347
+ puts Log.color(:magenta, "Progress: ") + bar.report_msg + " " + Log.color(:yellow, j.task_signature)
348
+ has_bar = true
349
+ end
350
+ end
351
+ step_status = step.status
352
+ step_status = Log.color :red, step_status if step_status.to_s == 'cleaned'
353
+ step_status = Log.color :green, step_status if step_status.to_s == 'done'
354
+ puts Log.color(:magenta, "Progress: ") + Log.color(:yellow, step.task_signature) + " #{step_status}" unless has_bar
355
+ end
356
+ end
357
+
358
+ end
359
+
360
+ puts
361
+ puts Log.color :clear, "Found #{count} jobs"
362
+
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt/util/simpleopt'
4
+ require 'rbbt/workflow'
5
+ require 'rbbt/workflow/usage'
6
+ require 'rbbt/workflow/remote_workflow'
7
+ require 'rbbt/hpc'
8
+ require 'rbbt/hpc/orchestrate'
9
+ require 'time'
10
+
11
+ rbbt_options = SOPT::GOT_OPTIONS
12
+
13
+ $slurm_options = SOPT.get <<EOF
14
+ -dr--dry_run Print only the template
15
+ -cj--clean_job Clean job
16
+ --drbbt* Use development version of rbbt
17
+ -sing--singularity Use Singularity
18
+ -si--singularity_img* Singularity image to use
19
+ -sm--singularity_mounts* Singularity image to use
20
+ -ug--user_group* Use alternative user group for group project directory
21
+ -c--contain* Contain in directory (using Singularity)
22
+ -s--sync* Contain in directory and sync jobs
23
+ -e--exclusive Make exclusive use of the node
24
+ -hm--highmem Make use of highmem cores
25
+ -wc--wipe_container* Wipe the jobs from the contain directory
26
+ -pd--purge_deps Purge job dependencies
27
+ -CS--contain_and_sync Contain and sync to default locations
28
+ -ci--copy_image When using a container directory, copy image there
29
+ -t--tail Tail the logs
30
+ -BPP--batch_procpath* Save Procpath performance for batch job; specify only options
31
+ -q--queue* Queue
32
+ -a--account* Account
33
+ -p--partition* Partition
34
+ -t--task_cpus* Tasks
35
+ -tm--time* Time
36
+ -m--mem* SLURM minimum memory
37
+ --gres* SLURM Generic resources
38
+ -mcpu--mem_per_cpu* SLURM minimum memory per CPU
39
+ -lin--licenses* SLURM licenses
40
+ -cons--constraint* SLURM constraint
41
+ -W--workflows* Additional workflows
42
+ -rmb--remove_batch_basedir Remove the SLURM working directory (command, STDIN, exit status, ...)
43
+ -bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
44
+ -lmod--lua_modules* Lua Modules to load
45
+ -co--conda* Conda environment to use
46
+ -OR--orchestration_rules* Orchestration rules
47
+ EOF
48
+
49
+ batch_system = $slurm_options.delete :batch_system
50
+ batch_system ||= 'auto'
51
+
52
+ $slurm_options[:config_keys] = SOPT::GOT_OPTIONS[:config_keys]
53
+
54
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
55
+
56
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
57
+
58
+ class Step
59
+ def join
60
+ HPC::BATCH_MODULE.wait_for_job(@batch_job_dir)
61
+ end
62
+
63
+ def run(no_load = true)
64
+ if done?
65
+ self.load
66
+ else
67
+ begin
68
+ Log.debug "Issuing SLURM job for #{self.path}"
69
+ @batch_job_id, @batch_job_dir = HPC::BATCH_MODULE.orchestrate_job(self, $slurm_options)
70
+ if no_load
71
+ self
72
+ else
73
+ join
74
+ self.load
75
+ end
76
+ rescue HPC::BATCH_DRY_RUN
77
+ end
78
+ end
79
+ end
80
+ end
81
+
82
+ module RemoteStep::SSH
83
+
84
+ def _run
85
+ RemoteWorkflow::SSH.orchestrate_slurm_job(File.join(base_url, task.to_s), @input_id, @base_name, $slurm_options)
86
+ end
87
+
88
+ end
89
+
90
+ SOPT.current_options = rbbt_options
91
+
92
+ load Rbbt.share.rbbt_commands.workflow.task.find
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+ require 'rbbt/hpc'
6
+
7
+ #$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
8
+
9
+ options = SOPT.setup <<EOF
10
+
11
+ Queue a job in Marenostrum
12
+
13
+ $ rbbt slurm tail <directory|jobid> [options]
14
+
15
+ -h--help Print this help
16
+ EOF
17
+
18
+ if options[:help]
19
+ if defined? rbbt_usage
20
+ rbbt_usage
21
+ else
22
+ puts SOPT.doc
23
+ end
24
+ exit 0
25
+ end
26
+
27
+ batch_system = options.delete :batch_system
28
+ batch_system ||= 'auto'
29
+
30
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
31
+
32
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
33
+
34
+ directory = ARGV.shift
35
+
36
+ raise ParameterException if directory.nil?
37
+
38
+ if directory =~ /^[0-9]*$/
39
+ workdir = File.expand_path('~/rbbt-batch')
40
+ Path.setup(workdir)
41
+
42
+ workdir.glob("**/job.id").each do |file|
43
+ next unless directory == Open.read(file).strip
44
+ directory = File.dirname(file)
45
+ break
46
+ end
47
+ end
48
+
49
+ raise ParameterException, "Could not identify job #{directory}" unless File.exist?(directory)
50
+
51
+ require 'rbbt/hpc/slurm'
52
+
53
+ command_txt = Open.read(File.join(directory, 'command.batch'))
54
+ if m = command_txt.match(/#STEP_PATH: (.*)/)
55
+ step_path = m[1]
56
+ else
57
+ step_path = nil
58
+ end
59
+
60
+ puts Log.color(:magenta, "Directory: ") + directory if directory
61
+ puts Log.color(:magenta, "Step path: ") + step_path if step_path
62
+
63
+ HPC::BATCH_MODULE.follow_job directory, true
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt/util/simpleopt'
4
+ require 'rbbt/workflow'
5
+ require 'rbbt/workflow/usage'
6
+ require 'rbbt/workflow/remote_workflow'
7
+ require 'rbbt/hpc'
8
+ require 'time'
9
+
10
+ rbbt_options = SOPT::GOT_OPTIONS
11
+
12
+ $slurm_options = SOPT.get <<EOF
13
+ -dr--dry_run Print only the template
14
+ -cj--clean_job Clean job
15
+ --drbbt* Use development version of rbbt
16
+ -sing--singularity Use Singularity
17
+ -si--singularity_img* Singularity image to use
18
+ -sm--singularity_mounts* Singularity image to use
19
+ -ug--user_group* Use alternative user group for group project directory
20
+ -c--contain* Contain in directory (using Singularity)
21
+ -s--sync* Contain in directory and sync jobs
22
+ -e--exclusive Make exclusive use of the node
23
+ -hm--highmem Make use of highmem cores
24
+ -wc--wipe_container* Wipe the jobs from the contain directory
25
+ -pd--purge_deps Purge job dependencies
26
+ -CS--contain_and_sync Contain and sync to default locations
27
+ -ci--copy_image When using a container directory, copy image there
28
+ -t--tail Tail the logs
29
+ -BPP--batch_procpath* Save Procpath performance for batch job; specify only options
30
+ -q--queue* Queue
31
+ -a--account* Account
32
+ -p--partition* Partition
33
+ -t--task_cpus* Tasks
34
+ -tm--time* Time
35
+ -m--mem* minimum memory
36
+ --gres* Generic resources
37
+ -mcpu--mem_per_cpu* minimum memory per CPU
38
+ -lin--licenses* licenses
39
+ -cons--constraint* constraint
40
+ -W--workflows* Additional workflows
41
+ -rmb--remove_batch_dir Remove the batch working directory (command, STDIN, exit status, ...)
42
+ -bs--batch_system* Batch system to use: auto, lsf, slurm (default is auto-detect)
43
+ -lmod--lua_modules* Lua Modules to load
44
+ -co--conda* Conda environment to use
45
+ -OR--orchestration_rules* Orchestration rules
46
+ EOF
47
+
48
+ batch_system = $slurm_options.delete :batch_system
49
+ batch_system ||= 'auto'
50
+
51
+ HPC::BATCH_MODULE = HPC.batch_system batch_system
52
+
53
+ raise ParameterException.new("Could not detect batch_system: #{Misc.fingerprint batch_system}") if HPC::BATCH_MODULE.nil?
54
+
55
+ class Step
56
+
57
+ def run(*args)
58
+ if done?
59
+ self.load
60
+ else
61
+ begin
62
+ Log.debug "Issuing BATCH job for #{self.path}"
63
+ HPC::BATCH_MODULE.run_job(self, $slurm_options)
64
+ rescue HPC::BATCH_DRY_RUN
65
+ end
66
+ end
67
+ end
68
+ end
69
+
70
+ module RemoteStep::SSH
71
+
72
+ def _run
73
+ RemoteWorkflow::SSH.run_slurm_job(File.join(base_url, task.to_s), @input_id, @base_name, $slurm_options)
74
+ end
75
+
76
+ end
77
+
78
+ SOPT.current_options = rbbt_options
79
+ load Rbbt.share.rbbt_commands.workflow.task.find
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.40.0
4
+ version: 5.40.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-12 00:00:00.000000000 Z
11
+ date: 2023-10-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -136,20 +136,6 @@ dependencies:
136
136
  - - ">="
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
- - !ruby/object:Gem::Dependency
140
- name: nokogiri
141
- requirement: !ruby/object:Gem::Requirement
142
- requirements:
143
- - - ">="
144
- - !ruby/object:Gem::Version
145
- version: '0'
146
- type: :runtime
147
- prerelease: false
148
- version_requirements: !ruby/object:Gem::Requirement
149
- requirements:
150
- - - ">="
151
- - !ruby/object:Gem::Version
152
- version: '0'
153
139
  description: Utilities for handling tsv files, caches, etc
154
140
  email: miguel.vazquez.g@bsc.es
155
141
  executables:
@@ -302,6 +288,7 @@ files:
302
288
  - lib/rbbt/util/open.rb
303
289
  - lib/rbbt/util/procpath.rb
304
290
  - lib/rbbt/util/python.rb
291
+ - lib/rbbt/util/python/step.rb
305
292
  - lib/rbbt/util/python/util.rb
306
293
  - lib/rbbt/util/semaphore.rb
307
294
  - lib/rbbt/util/simpleDSL.rb
@@ -390,6 +377,11 @@ files:
390
377
  - share/rbbt_commands/lsf/task
391
378
  - share/rbbt_commands/migrate
392
379
  - share/rbbt_commands/migrate_job
380
+ - share/rbbt_commands/pbs/clean
381
+ - share/rbbt_commands/pbs/list
382
+ - share/rbbt_commands/pbs/orchestrate
383
+ - share/rbbt_commands/pbs/tail
384
+ - share/rbbt_commands/pbs/task
393
385
  - share/rbbt_commands/purge_job
394
386
  - share/rbbt_commands/resource/claims
395
387
  - share/rbbt_commands/resource/exists
@@ -592,7 +584,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
592
584
  - !ruby/object:Gem::Version
593
585
  version: '0'
594
586
  requirements: []
595
- rubygems_version: 3.5.0.dev
587
+ rubygems_version: 3.4.19
596
588
  signing_key:
597
589
  specification_version: 4
598
590
  summary: Utilities for the Ruby Bioinformatics Toolkit (rbbt)