ood_core 0.10.0 → 0.11.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +2 -2
- data/CHANGELOG.md +26 -1
- data/lib/ood_core.rb +1 -0
- data/lib/ood_core/batch_connect/template.rb +44 -2
- data/lib/ood_core/cluster.rb +15 -0
- data/lib/ood_core/clusters.rb +19 -5
- data/lib/ood_core/invalid_cluster.rb +37 -0
- data/lib/ood_core/job/adapter.rb +35 -4
- data/lib/ood_core/job/adapters/drmaa.rb +1 -1
- data/lib/ood_core/job/adapters/linux_host.rb +4 -0
- data/lib/ood_core/job/adapters/linux_host/launcher.rb +10 -2
- data/lib/ood_core/job/adapters/lsf.rb +4 -0
- data/lib/ood_core/job/adapters/lsf/helper.rb +9 -3
- data/lib/ood_core/job/adapters/pbspro.rb +5 -0
- data/lib/ood_core/job/adapters/sge.rb +4 -0
- data/lib/ood_core/job/adapters/sge/batch.rb +1 -2
- data/lib/ood_core/job/adapters/sge/helper.rb +1 -0
- data/lib/ood_core/job/adapters/sge/qstat_xml_j_r_listener.rb +19 -4
- data/lib/ood_core/job/adapters/slurm.rb +9 -2
- data/lib/ood_core/job/adapters/torque.rb +5 -0
- data/lib/ood_core/job/array_ids.rb +18 -53
- data/lib/ood_core/job/script.rb +10 -2
- data/lib/ood_core/version.rb +1 -1
- data/ood_core.gemspec +2 -1
- metadata +20 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: bf5cfe29bd0770daa8404169e04fcc8fcdc9a89b88f83bbfbc8675040b119ccf
|
4
|
+
data.tar.gz: c6082f2c7b751c0b7f247dcc790ae35bc1a372830b6f7c529d1b5574eed714ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 30c82f37cf6c974c04a3d8c9bc9da21e47014b7aecfa03424575196e42aa0ebebb89aba6717e73073a1ed3d963d8391fa76da263a4e3a7e4dc4250a2cb32f830
|
7
|
+
data.tar.gz: 9c7be268d29f4dd6c9cec57ce783e7eb0777c76e3df7025062e77868985a61e410cd24ec7d39c2a4e91a039d85a85c41fad75e28a408c8e00940597e5a1fb1ff
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,29 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
|
6
6
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
|
+
|
10
|
+
## [0.11.1] - 2012-03-18
|
11
|
+
### Changed
|
12
|
+
- Only the version changed. Had to republish to rubygems.org
|
13
|
+
|
14
|
+
## [0.11.0] - 2012-03-18
|
15
|
+
### Added
|
16
|
+
- Added directive prefixes to each adapter (e.g. `#QSUB`) ([#161](https://github.com/OSC/ood_core/issues/161))
|
17
|
+
- LHA supports `submit_host` field in native ([#164](https://github.com/OSC/ood_core/issues/164))
|
18
|
+
- Cluster files can be yaml or yml extensions ([#171](https://github.com/OSC/ood_core/issues/171))
|
19
|
+
- Users can add a flag `OOD_JOB_NAME_ILLEGAL_CHARS` to sanitize job names ([#183](https://github.com/OSC/ood_core/issues/183)
|
20
|
+
|
21
|
+
### Changed
|
22
|
+
- Simplified job array parsing ([#144](https://github.com/OSC/ood_core/issues/144))
|
23
|
+
|
24
|
+
### Fixed
|
25
|
+
- Issue where environment variables were not properly exported to the job ([#158](https://github.com/OSC/ood_core/issues/158))
|
26
|
+
- Parsing bad cluster files ([#150](https://github.com/OSC/ood_core/issues/150) and [#178](https://github.com/OSC/ood_core/issues/178))
|
27
|
+
- netcat is no longer a hard dependency. Now lsof, python and bash can be used ([153](https://github.com/OSC/ood_core/issues/153))
|
28
|
+
- GE crash when nil config file was given ([#175](https://github.com/OSC/ood_core/issues/175))
|
29
|
+
- GE sometimes reported incorrect core count ([#168](https://github.com/OSC/ood_core/issues/168))
|
30
|
+
|
31
|
+
|
9
32
|
## [0.10.0] - 2019-11-05
|
10
33
|
### Added
|
11
34
|
- Added an adapter for submitting work on Linux hosted systems without using a scheduler
|
@@ -196,7 +219,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
196
219
|
### Added
|
197
220
|
- Initial release!
|
198
221
|
|
199
|
-
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.
|
222
|
+
[Unreleased]: https://github.com/OSC/ood_core/compare/v0.11.1...HEAD
|
223
|
+
[0.11.1]: https://github.com/OSC/ood_core/compare/v0.11.0...v0.11.1
|
224
|
+
[0.11.0]: https://github.com/OSC/ood_core/compare/v0.10.0...v0.11.0
|
200
225
|
[0.10.0]: https://github.com/OSC/ood_core/compare/v0.9.3...v0.10.0
|
201
226
|
[0.9.3]: https://github.com/OSC/ood_core/compare/v0.9.2...v0.9.3
|
202
227
|
[0.9.2]: https://github.com/OSC/ood_core/compare/v0.9.1...v0.9.2
|
data/lib/ood_core.rb
CHANGED
@@ -117,11 +117,47 @@ module OodCore
|
|
117
117
|
}
|
118
118
|
export -f random_number
|
119
119
|
|
120
|
+
port_used_python() {
|
121
|
+
python -c "import socket; socket.socket().connect(('$1',$2))" >/dev/null 2>&1
|
122
|
+
}
|
123
|
+
|
124
|
+
port_used_python3() {
|
125
|
+
python3 -c "import socket; socket.socket().connect(('$1',$2))" >/dev/null 2>&1
|
126
|
+
}
|
127
|
+
|
128
|
+
port_used_nc(){
|
129
|
+
nc -w 2 "$1" "$2" < /dev/null > /dev/null 2>&1
|
130
|
+
}
|
131
|
+
|
132
|
+
port_used_lsof(){
|
133
|
+
lsof -i :"$2" >/dev/null 2>&1
|
134
|
+
}
|
135
|
+
|
136
|
+
port_used_bash(){
|
137
|
+
local bash_supported=$(strings /bin/bash 2>/dev/null | grep tcp)
|
138
|
+
if [ "$bash_supported" == "/dev/tcp/*/*" ]; then
|
139
|
+
(: < /dev/tcp/$1/$2) >/dev/null 2>&1
|
140
|
+
else
|
141
|
+
return 127
|
142
|
+
fi
|
143
|
+
}
|
144
|
+
|
120
145
|
# Check if port $1 is in use
|
121
146
|
port_used () {
|
122
147
|
local port="${1#*:}"
|
123
148
|
local host=$((expr "${1}" : '\\(.*\\):' || echo "localhost") | awk 'END{print $NF}')
|
124
|
-
|
149
|
+
local port_strategies=(port_used_nc port_used_lsof port_used_bash port_used_python port_used_python3)
|
150
|
+
|
151
|
+
for strategy in ${port_strategies[@]};
|
152
|
+
do
|
153
|
+
$strategy $host $port
|
154
|
+
status=$?
|
155
|
+
if [[ "$status" == "0" ]] || [[ "$status" == "1" ]]; then
|
156
|
+
return $status
|
157
|
+
fi
|
158
|
+
done
|
159
|
+
|
160
|
+
return 127
|
125
161
|
}
|
126
162
|
export -f port_used
|
127
163
|
|
@@ -143,8 +179,14 @@ module OodCore
|
|
143
179
|
local port="${1}"
|
144
180
|
local time="${2:-30}"
|
145
181
|
for ((i=1; i<=time*2; i++)); do
|
146
|
-
|
182
|
+
port_used "${port}"
|
183
|
+
port_status=$?
|
184
|
+
if [ "$port_status" == "0" ]; then
|
147
185
|
return 0
|
186
|
+
elif [ "$port_status" == "127" ]; then
|
187
|
+
echo "commands to find port were either not found or inaccessible."
|
188
|
+
echo "command options are lsof, nc, bash's /dev/tcp, or python (or python3) with socket lib."
|
189
|
+
return 127
|
148
190
|
fi
|
149
191
|
sleep 0.5
|
150
192
|
done
|
data/lib/ood_core/cluster.rb
CHANGED
@@ -28,6 +28,10 @@ module OodCore
|
|
28
28
|
# @return [Hash] the acls configuration
|
29
29
|
attr_reader :acls_config
|
30
30
|
|
31
|
+
# The errors encountered with configuring this cluster
|
32
|
+
# @return Array<String> the errors
|
33
|
+
attr_reader :errors
|
34
|
+
|
31
35
|
# @param cluster [#to_h] the cluster object
|
32
36
|
# @option cluster [#to_sym] :id The cluster id
|
33
37
|
# @option cluster [#to_h] :metadata ({}) The cluster's metadata
|
@@ -39,6 +43,8 @@ module OodCore
|
|
39
43
|
# against
|
40
44
|
# @option cluster [#to_h] :batch_connect ({}) Configuration for batch
|
41
45
|
# connect templates
|
46
|
+
# @option cluster [#to_a] :errors ([]) List of configuration errors
|
47
|
+
#
|
42
48
|
def initialize(cluster)
|
43
49
|
c = cluster.to_h.symbolize_keys
|
44
50
|
|
@@ -52,6 +58,9 @@ module OodCore
|
|
52
58
|
@custom_config = c.fetch(:custom, {}) .to_h.symbolize_keys
|
53
59
|
@acls_config = c.fetch(:acls, []) .map(&:to_h)
|
54
60
|
@batch_connect_config = c.fetch(:batch_connect, {}).to_h.symbolize_keys
|
61
|
+
|
62
|
+
# side affects from object creation and validation
|
63
|
+
@errors = c.fetch(:errors, []) .to_a
|
55
64
|
end
|
56
65
|
|
57
66
|
# Metadata that provides extra information about this cluster
|
@@ -159,6 +168,12 @@ module OodCore
|
|
159
168
|
}
|
160
169
|
end
|
161
170
|
|
171
|
+
# This cluster is always valid
|
172
|
+
# @return true
|
173
|
+
def valid?
|
174
|
+
return true
|
175
|
+
end
|
176
|
+
|
162
177
|
private
|
163
178
|
# Build acl adapter objects from array
|
164
179
|
def build_acls(ary)
|
data/lib/ood_core/clusters.rb
CHANGED
@@ -21,16 +21,30 @@ module OodCore
|
|
21
21
|
if config.file?
|
22
22
|
if config.readable?
|
23
23
|
CONFIG_VERSION.any? do |version|
|
24
|
-
|
25
|
-
|
24
|
+
begin
|
25
|
+
YAML.safe_load(config.read)&.fetch(version, {}).each do |k, v|
|
26
|
+
clusters << Cluster.new(send("parse_#{version}", id: k, cluster: v))
|
27
|
+
end
|
28
|
+
rescue Psych::SyntaxError => e
|
29
|
+
clusters << InvalidCluster.new(
|
30
|
+
id: config.basename(config.extname).to_s,
|
31
|
+
errors: [ e.message.to_s ]
|
32
|
+
)
|
26
33
|
end
|
27
34
|
end
|
28
35
|
end
|
29
36
|
elsif config.directory?
|
30
|
-
Pathname.glob(config.join("*.yml")).select(&:file?).select(&:readable?).each do |p|
|
37
|
+
Pathname.glob([config.join("*.yml"), config.join("*.yaml")]).select(&:file?).select(&:readable?).each do |p|
|
31
38
|
CONFIG_VERSION.any? do |version|
|
32
|
-
|
33
|
-
|
39
|
+
begin
|
40
|
+
if cluster = YAML.safe_load(p.read)&.fetch(version, nil)
|
41
|
+
clusters << Cluster.new(send("parse_#{version}", id: p.basename(p.extname()).to_s, cluster: cluster))
|
42
|
+
end
|
43
|
+
rescue Psych::SyntaxError => e
|
44
|
+
clusters << InvalidCluster.new(
|
45
|
+
id: p.basename(p.extname).to_s,
|
46
|
+
errors: [ e.message.to_s ]
|
47
|
+
)
|
34
48
|
end
|
35
49
|
end
|
36
50
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module OodCore
|
2
|
+
# A special case of an OodCore::Cluster where something went awry in the
|
3
|
+
# creation and it's invalid for some reason. Users should only be able
|
4
|
+
# to rely on id and metadata.error_msg. All *allow? related functions
|
5
|
+
# false, meaning nothing is allowed.
|
6
|
+
class InvalidCluster < Cluster
|
7
|
+
# Jobs are not allowed
|
8
|
+
# @return false
|
9
|
+
def login_allow?
|
10
|
+
false
|
11
|
+
end
|
12
|
+
|
13
|
+
# Jobs are not allowed
|
14
|
+
# @return false
|
15
|
+
def job_allow?
|
16
|
+
false
|
17
|
+
end
|
18
|
+
|
19
|
+
# Custom features are not allowed
|
20
|
+
# @return false
|
21
|
+
def custom_allow?(_)
|
22
|
+
false
|
23
|
+
end
|
24
|
+
|
25
|
+
# This cluster is not allowed to be used
|
26
|
+
# @return false
|
27
|
+
def allow?
|
28
|
+
false
|
29
|
+
end
|
30
|
+
|
31
|
+
# This cluster is never valid
|
32
|
+
# @return false
|
33
|
+
def valid?
|
34
|
+
return false
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/ood_core/job/adapter.rb
CHANGED
@@ -36,7 +36,7 @@ module OodCore
|
|
36
36
|
# Retrieve info for all jobs from the resource manager
|
37
37
|
# @abstract Subclass is expected to implement {#info_all}
|
38
38
|
# @raise [NotImplementedError] if subclass did not define {#info_all}
|
39
|
-
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
39
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
40
40
|
# This array specifies only attrs you want, in addition to id and status.
|
41
41
|
# If an array, the Info object that is returned to you is not guarenteed
|
42
42
|
# to have a value for any attr besides the ones specified and id and status.
|
@@ -51,7 +51,7 @@ module OodCore
|
|
51
51
|
# Retrieve info for all jobs for a given owner or owners from the
|
52
52
|
# resource manager
|
53
53
|
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
54
|
-
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
54
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
55
55
|
# This array specifies only attrs you want, in addition to id and status.
|
56
56
|
# If an array, the Info object that is returned to you is not guarenteed
|
57
57
|
# to have a value for any attr besides the ones specified and id and status.
|
@@ -69,7 +69,7 @@ module OodCore
|
|
69
69
|
end
|
70
70
|
|
71
71
|
# Iterate over each job Info object
|
72
|
-
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
72
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
73
73
|
# This array specifies only attrs you want, in addition to id and status.
|
74
74
|
# If an array, the Info object that is returned to you is not guarenteed
|
75
75
|
# to have a value for any attr besides the ones specified and id and status.
|
@@ -88,7 +88,7 @@ module OodCore
|
|
88
88
|
|
89
89
|
# Iterate over each job Info object
|
90
90
|
# @param owner [#to_s, Array<#to_s>] the owner(s) of the jobs
|
91
|
-
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
91
|
+
# @param attrs [Array<symbol>] defaults to nil (and all attrs are provided)
|
92
92
|
# This array specifies only attrs you want, in addition to id and status.
|
93
93
|
# If an array, the Info object that is returned to you is not guarenteed
|
94
94
|
# to have a value for any attr besides the ones specified and id and status.
|
@@ -157,6 +157,37 @@ module OodCore
|
|
157
157
|
def delete(id)
|
158
158
|
raise NotImplementedError, "subclass did not define #delete"
|
159
159
|
end
|
160
|
+
|
161
|
+
# Return the scheduler-specific directive prefix
|
162
|
+
#
|
163
|
+
# Examples of directive prefixes include #QSUB, #BSUB and allow placing what would
|
164
|
+
# otherwise be command line options inside the job launch script.
|
165
|
+
#
|
166
|
+
# The method should return nil if the adapter does not support prefixes
|
167
|
+
#
|
168
|
+
# @abstract Subclass is expected to implement {#directive_prefix}
|
169
|
+
# @raise [NotImplementedError] if subclass did not defined {#directive_prefix}
|
170
|
+
# @return [String]
|
171
|
+
def directive_prefix
|
172
|
+
raise NotImplementedError, "subclass did not define #directive_prefix"
|
173
|
+
end
|
174
|
+
|
175
|
+
# Replace illegal chars in job name with a dash
|
176
|
+
#
|
177
|
+
# @return [String] job name with dashes replacing illegal chars
|
178
|
+
def sanitize_job_name(job_name)
|
179
|
+
# escape ^ and omit -
|
180
|
+
chars = job_name_illegal_chars.to_s.gsub("^", "\\^").gsub("-", "")
|
181
|
+
job_name.tr(chars, "-")
|
182
|
+
end
|
183
|
+
|
184
|
+
# Illegal chars that should not be used in a job name
|
185
|
+
# A dash is assumed to be legal in job names in all batch schedulers
|
186
|
+
#
|
187
|
+
# @return [String] string of chars
|
188
|
+
def job_name_illegal_chars
|
189
|
+
ENV["OOD_JOB_NAME_ILLEGAL_CHARS"].to_s
|
190
|
+
end
|
160
191
|
end
|
161
192
|
end
|
162
193
|
end
|
@@ -13,7 +13,7 @@
|
|
13
13
|
# The contents of this file are subject to the Sun Industry Standards
|
14
14
|
# Source License Version 1.2 (the "License"); You may not use this file
|
15
15
|
# except in compliance with the License. You may obtain a copy of the
|
16
|
-
# License at http://
|
16
|
+
# License at http://gridscheduler.sourceforge.net/Gridengine_SISSL_license.html
|
17
17
|
#
|
18
18
|
# Software provided under this License is provided on an "AS IS" basis,
|
19
19
|
# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
|
@@ -11,7 +11,7 @@ require 'time'
|
|
11
11
|
class OodCore::Job::Adapters::LinuxHost::Launcher
|
12
12
|
attr_reader :contain, :debug, :site_timeout, :session_name_label, :singularity_bin,
|
13
13
|
:site_singularity_bindpath, :default_singularity_image, :ssh_hosts,
|
14
|
-
:strict_host_checking, :
|
14
|
+
:strict_host_checking, :tmux_bin, :username
|
15
15
|
# The root exception class that all LinuxHost adapter-specific exceptions inherit
|
16
16
|
# from
|
17
17
|
class Error < StandardError; end
|
@@ -57,7 +57,7 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
57
57
|
# @param hostname [#to_s] The hostname to submit the work to
|
58
58
|
# @param script [OodCore::Job::Script] The script object defining the work
|
59
59
|
def start_remote_session(script)
|
60
|
-
cmd = ssh_cmd(submit_host)
|
60
|
+
cmd = ssh_cmd(submit_host(script))
|
61
61
|
|
62
62
|
session_name = unique_session_name
|
63
63
|
output = call(*cmd, stdin: wrapped_script(script, session_name))
|
@@ -98,6 +98,14 @@ class OodCore::Job::Adapters::LinuxHost::Launcher
|
|
98
98
|
}
|
99
99
|
end
|
100
100
|
|
101
|
+
def submit_host(script = nil)
|
102
|
+
if script && script.native && script.native['submit_host_override']
|
103
|
+
script.native['submit_host_override']
|
104
|
+
else
|
105
|
+
@submit_host
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
101
109
|
private
|
102
110
|
|
103
111
|
# Call a forked Slurm command for a given cluster
|
@@ -92,6 +92,15 @@ class OodCore::Job::Adapters::Lsf::Helper
|
|
92
92
|
args += ["-W", (script.wall_time / 60).to_i] unless script.wall_time.nil?
|
93
93
|
args += ["-L", script.shell_path.to_s] unless script.shell_path.nil?
|
94
94
|
|
95
|
+
# environment
|
96
|
+
env = script.job_environment || {}
|
97
|
+
# To preserve pre-existing behavior we only act when true or false, when nil we do nothing
|
98
|
+
if script.copy_environment?
|
99
|
+
args += ["-env", (["all"] + env.keys).join(",")]
|
100
|
+
elsif script.copy_environment? == false
|
101
|
+
args += ["-env", (["none"] + env.keys).join(",")]
|
102
|
+
end
|
103
|
+
|
95
104
|
# input and output files
|
96
105
|
args += ["-i", script.input_path] unless script.input_path.nil?
|
97
106
|
args += ["-o", script.output_path] unless script.output_path.nil?
|
@@ -104,9 +113,6 @@ class OodCore::Job::Adapters::Lsf::Helper
|
|
104
113
|
|
105
114
|
args += script.native unless script.native.nil?
|
106
115
|
|
107
|
-
# environment
|
108
|
-
env = script.job_environment || {}
|
109
|
-
|
110
116
|
{args: args, env: env}
|
111
117
|
end
|
112
118
|
end
|
@@ -261,6 +261,7 @@ module OodCore
|
|
261
261
|
# Set environment variables
|
262
262
|
envvars = script.job_environment.to_h
|
263
263
|
args += ["-v", envvars.map{|k,v| "#{k}=#{v}"}.join(",")] unless envvars.empty?
|
264
|
+
args += ["-V"] if script.copy_environment?
|
264
265
|
|
265
266
|
# If error_path is not specified we join stdout & stderr (as this
|
266
267
|
# mimics what the other resource managers do)
|
@@ -397,6 +398,10 @@ module OodCore
|
|
397
398
|
raise JobAdapterError, e.message unless /Unknown Job Id/ =~ e.message || /Job has finished/ =~ e.message
|
398
399
|
end
|
399
400
|
|
401
|
+
def directive_prefix
|
402
|
+
'#PBS'
|
403
|
+
end
|
404
|
+
|
400
405
|
private
|
401
406
|
# Convert duration to seconds
|
402
407
|
def duration_in_seconds(time)
|
@@ -33,8 +33,7 @@ class OodCore::Job::Adapters::Sge::Batch
|
|
33
33
|
# @see Factory.build_sge
|
34
34
|
def initialize(config)
|
35
35
|
@cluster = config.fetch(:cluster, nil)
|
36
|
-
@
|
37
|
-
@bin = Pathname.new(config.fetch(:bin, nil))
|
36
|
+
@bin = Pathname.new(config.fetch(:bin, nil).to_s)
|
38
37
|
@sge_root = Pathname.new(config[:sge_root] || ENV['SGE_ROOT'] || "/var/lib/gridengine")
|
39
38
|
@bin_overrides = config.fetch(:bin_overrides, {})
|
40
39
|
|
@@ -20,6 +20,7 @@ class OodCore::Job::Adapters::Sge::Helper
|
|
20
20
|
args += ['-h'] if script.submit_as_hold
|
21
21
|
args += ['-r', 'yes'] if script.rerunnable
|
22
22
|
script.job_environment.each_pair {|k, v| args += ['-v', "#{k.to_s}=#{v.to_s}"]} unless script.job_environment.nil?
|
23
|
+
args += ["-V"] if script.copy_environment?
|
23
24
|
|
24
25
|
if script.workdir
|
25
26
|
args += ['-wd', script.workdir]
|
@@ -27,13 +27,15 @@ class QstatXmlJRListener
|
|
27
27
|
@parsed_job = {
|
28
28
|
:tasks => [],
|
29
29
|
:status => :queued,
|
30
|
-
:procs => 1,
|
30
|
+
:procs => 1,
|
31
31
|
:native => {} # TODO: improve native attribute reporting
|
32
32
|
}
|
33
33
|
@current_text = nil
|
34
34
|
@current_request = nil
|
35
35
|
|
36
36
|
@processing_job_array_spec = false
|
37
|
+
@adding_slots = false
|
38
|
+
|
37
39
|
@job_array_spec = {
|
38
40
|
start: nil,
|
39
41
|
stop: nil,
|
@@ -46,6 +48,8 @@ class QstatXmlJRListener
|
|
46
48
|
case name
|
47
49
|
when 'task_id_range'
|
48
50
|
toggle_processing_array_spec
|
51
|
+
when 'JB_pe_range'
|
52
|
+
toggle_adding_slots
|
49
53
|
end
|
50
54
|
end
|
51
55
|
|
@@ -78,13 +82,16 @@ class QstatXmlJRListener
|
|
78
82
|
when 'djob_info'
|
79
83
|
finalize_parsed_job
|
80
84
|
when 'RN_min'
|
81
|
-
set_job_array_piece(:start)
|
85
|
+
set_job_array_piece(:start) if @processing_job_array_spec
|
86
|
+
set_slots if @adding_slots
|
82
87
|
when 'RN_max'
|
83
|
-
set_job_array_piece(:stop)
|
88
|
+
set_job_array_piece(:stop) if @processing_job_array_spec
|
84
89
|
when 'RN_step'
|
85
|
-
set_job_array_piece(:step)
|
90
|
+
set_job_array_piece(:step) if @processing_job_array_spec
|
86
91
|
when 'task_id_range'
|
87
92
|
toggle_processing_array_spec
|
93
|
+
when 'JB_pe_range'
|
94
|
+
toggle_adding_slots
|
88
95
|
end
|
89
96
|
end
|
90
97
|
|
@@ -186,5 +193,13 @@ class QstatXmlJRListener
|
|
186
193
|
def toggle_processing_array_spec
|
187
194
|
@processing_job_array_spec = ! @processing_job_array_spec
|
188
195
|
end
|
196
|
+
|
197
|
+
def toggle_adding_slots
|
198
|
+
@adding_slots = ! @adding_slots
|
199
|
+
end
|
200
|
+
|
201
|
+
def set_slots
|
202
|
+
@parsed_job[:procs] = @current_text.to_i
|
203
|
+
end
|
189
204
|
end
|
190
205
|
|
@@ -192,7 +192,7 @@ module OodCore
|
|
192
192
|
# @return [String] the id of the job that was created
|
193
193
|
def submit_string(str, args: [], env: {})
|
194
194
|
args = args.map(&:to_s) + ["--parsable"]
|
195
|
-
env =
|
195
|
+
env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
|
196
196
|
call("sbatch", *args, env: env, stdin: str.to_s).strip.split(";").first
|
197
197
|
end
|
198
198
|
|
@@ -394,7 +394,10 @@ module OodCore
|
|
394
394
|
|
395
395
|
# Set environment variables
|
396
396
|
env = script.job_environment || {}
|
397
|
-
|
397
|
+
unless (script.job_environment.nil? || script.job_environment.empty?)
|
398
|
+
prefix = script.copy_environment? ? "ALL," : "NONE," # NONE if false or nil
|
399
|
+
args += ["--export", prefix + script.job_environment.keys.join(",")]
|
400
|
+
end
|
398
401
|
|
399
402
|
# Set native options
|
400
403
|
args += script.native if script.native
|
@@ -530,6 +533,10 @@ module OodCore
|
|
530
533
|
raise JobAdapterError, e.message unless /Invalid job id specified/ =~ e.message
|
531
534
|
end
|
532
535
|
|
536
|
+
def directive_prefix
|
537
|
+
'#SBATCH'
|
538
|
+
end
|
539
|
+
|
533
540
|
private
|
534
541
|
# Convert duration to seconds
|
535
542
|
def duration_in_seconds(time)
|
@@ -155,6 +155,7 @@ module OodCore
|
|
155
155
|
# Set environment variables
|
156
156
|
env = script.job_environment.to_h
|
157
157
|
args += ["-v", env.keys.join(",")] unless env.empty?
|
158
|
+
args += ["-V"] if script.copy_environment?
|
158
159
|
|
159
160
|
# If error_path is not specified we join stdout & stderr (as this
|
160
161
|
# mimics what the other resource managers do)
|
@@ -288,6 +289,10 @@ module OodCore
|
|
288
289
|
raise JobAdapterError, e.message
|
289
290
|
end
|
290
291
|
|
292
|
+
def directive_prefix
|
293
|
+
'#QSUB'
|
294
|
+
end
|
295
|
+
|
291
296
|
private
|
292
297
|
# Convert duration to seconds
|
293
298
|
def duration_in_seconds(time)
|
@@ -10,68 +10,33 @@
|
|
10
10
|
module OodCore
|
11
11
|
module Job
|
12
12
|
class ArrayIds
|
13
|
-
|
13
|
+
attr_reader :spec_string
|
14
14
|
|
15
|
-
attr_reader :ids
|
16
15
|
def initialize(spec_string)
|
17
|
-
@
|
18
|
-
begin
|
19
|
-
parse_spec_string(spec_string) if spec_string
|
20
|
-
rescue Error
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
protected
|
25
|
-
def parse_spec_string(spec_string)
|
26
|
-
@ids = get_components(spec_string).map{
|
27
|
-
|component| process_component(component)
|
28
|
-
}.reduce(:+).sort
|
16
|
+
@spec_string = spec_string
|
29
17
|
end
|
30
18
|
|
31
|
-
def
|
32
|
-
|
33
|
-
raise Error unless base
|
34
|
-
base.split(',')
|
19
|
+
def ids
|
20
|
+
@ids ||= parse_spec_string(spec_string)
|
35
21
|
end
|
36
22
|
|
37
|
-
|
38
|
-
# simultaneous tasks. The percent is expected to come at the end.
|
39
|
-
def discard_percent_modifier(spec_string)
|
40
|
-
spec_string.split('%').first
|
41
|
-
end
|
23
|
+
protected
|
42
24
|
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
25
|
+
def parse_spec_string(spec_string)
|
26
|
+
return [] unless spec_string
|
27
|
+
|
28
|
+
rx = /^(\d+)-?(\d+)?:?(\d+)?%?\d*$/
|
29
|
+
spec_string.split(',').reduce([]) do |ids, spec|
|
30
|
+
if rx =~ spec
|
31
|
+
start = ($1 || 1).to_i
|
32
|
+
finish = ($2 || start).to_i
|
33
|
+
step = ($3 || 1).to_i
|
34
|
+
ids.concat (start..finish).step(step).to_a
|
35
|
+
end
|
36
|
+
|
37
|
+
ids
|
50
38
|
end
|
51
39
|
end
|
52
|
-
|
53
|
-
def get_range(component)
|
54
|
-
raw_range, raw_step = component.split(':')
|
55
|
-
start, stop = raw_range.split('-')
|
56
|
-
raise Error unless numbers_valid?(
|
57
|
-
# Only include Step if it is not nil
|
58
|
-
[start, stop].tap { |a| a << raw_step if raw_step }
|
59
|
-
)
|
60
|
-
range = Range.new(start.to_i, stop.to_i)
|
61
|
-
step = raw_step.to_i
|
62
|
-
step = 1 if step == 0
|
63
|
-
|
64
|
-
range.step(step).to_a
|
65
|
-
end
|
66
|
-
|
67
|
-
def is_range?(component)
|
68
|
-
component.include?('-')
|
69
|
-
end
|
70
|
-
|
71
|
-
# Protect against Ruby's String#to_i returning 0 for arbitrary strings
|
72
|
-
def numbers_valid?(numbers)
|
73
|
-
numbers.all? { |str| /^[0-9]+$/ =~ str }
|
74
|
-
end
|
75
40
|
end
|
76
41
|
end
|
77
42
|
end
|
data/lib/ood_core/job/script.rb
CHANGED
@@ -104,6 +104,11 @@ module OodCore
|
|
104
104
|
# @return [Object, nil] native specifications
|
105
105
|
attr_reader :native
|
106
106
|
|
107
|
+
# Flag whether the job should contain a copy of its calling environment
|
108
|
+
# @return [Boolean] copy environment
|
109
|
+
attr_reader :copy_environment
|
110
|
+
alias_method :copy_environment?, :copy_environment
|
111
|
+
|
107
112
|
# @param content [#to_s] the script content
|
108
113
|
# @param args [Array<#to_s>, nil] arguments supplied to script
|
109
114
|
# @param submit_as_hold [Boolean, nil] whether job is held after submit
|
@@ -132,7 +137,8 @@ module OodCore
|
|
132
137
|
job_name: nil, shell_path: nil, input_path: nil,
|
133
138
|
output_path: nil, error_path: nil, reservation_id: nil,
|
134
139
|
queue_name: nil, priority: nil, start_time: nil,
|
135
|
-
wall_time: nil, accounting_id: nil, job_array_request: nil,
|
140
|
+
wall_time: nil, accounting_id: nil, job_array_request: nil,
|
141
|
+
native: nil, copy_environment: nil, **_)
|
136
142
|
@content = content.to_s
|
137
143
|
|
138
144
|
@submit_as_hold = submit_as_hold
|
@@ -157,6 +163,7 @@ module OodCore
|
|
157
163
|
@accounting_id = accounting_id && accounting_id.to_s
|
158
164
|
@job_array_request = job_array_request && job_array_request.to_s
|
159
165
|
@native = native
|
166
|
+
@copy_environment = (copy_environment.nil?) ? nil : !! copy_environment
|
160
167
|
end
|
161
168
|
|
162
169
|
# Convert object to hash
|
@@ -184,7 +191,8 @@ module OodCore
|
|
184
191
|
wall_time: wall_time,
|
185
192
|
accounting_id: accounting_id,
|
186
193
|
job_array_request: job_array_request,
|
187
|
-
native: native
|
194
|
+
native: native,
|
195
|
+
copy_environment: copy_environment
|
188
196
|
}
|
189
197
|
end
|
190
198
|
|
data/lib/ood_core/version.rb
CHANGED
data/ood_core.gemspec
CHANGED
@@ -25,8 +25,9 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.add_runtime_dependency "ood_support", "~> 0.0.2"
|
26
26
|
spec.add_runtime_dependency "ffi", "~> 1.9", ">= 1.9.6"
|
27
27
|
spec.add_development_dependency "bundler", "~> 1.7"
|
28
|
-
spec.add_development_dependency "rake", "~>
|
28
|
+
spec.add_development_dependency "rake", "~> 13.0.1"
|
29
29
|
spec.add_development_dependency "rspec", "~> 3.0"
|
30
30
|
spec.add_development_dependency "pry", "~> 0.10"
|
31
31
|
spec.add_development_dependency "timecop", "~> 0.8"
|
32
|
+
spec.add_development_dependency "climate_control", "~> 0.2.0"
|
32
33
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ood_core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.11.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Franz
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2020-03-18 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: ood_support
|
@@ -66,14 +66,14 @@ dependencies:
|
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
69
|
+
version: 13.0.1
|
70
70
|
type: :development
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
73
|
requirements:
|
74
74
|
- - "~>"
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version:
|
76
|
+
version: 13.0.1
|
77
77
|
- !ruby/object:Gem::Dependency
|
78
78
|
name: rspec
|
79
79
|
requirement: !ruby/object:Gem::Requirement
|
@@ -116,6 +116,20 @@ dependencies:
|
|
116
116
|
- - "~>"
|
117
117
|
- !ruby/object:Gem::Version
|
118
118
|
version: '0.8'
|
119
|
+
- !ruby/object:Gem::Dependency
|
120
|
+
name: climate_control
|
121
|
+
requirement: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - "~>"
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 0.2.0
|
126
|
+
type: :development
|
127
|
+
prerelease: false
|
128
|
+
version_requirements: !ruby/object:Gem::Requirement
|
129
|
+
requirements:
|
130
|
+
- - "~>"
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: 0.2.0
|
119
133
|
description: Open OnDemand core library that provides support for an HPC Center to
|
120
134
|
globally define HPC services that web applications can then take advantage of.
|
121
135
|
email:
|
@@ -147,6 +161,7 @@ files:
|
|
147
161
|
- lib/ood_core/cluster.rb
|
148
162
|
- lib/ood_core/clusters.rb
|
149
163
|
- lib/ood_core/errors.rb
|
164
|
+
- lib/ood_core/invalid_cluster.rb
|
150
165
|
- lib/ood_core/job/adapter.rb
|
151
166
|
- lib/ood_core/job/adapters/drmaa.rb
|
152
167
|
- lib/ood_core/job/adapters/helper.rb
|
@@ -201,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
201
216
|
version: '0'
|
202
217
|
requirements: []
|
203
218
|
rubyforge_project:
|
204
|
-
rubygems_version: 2.6.
|
219
|
+
rubygems_version: 2.7.6.2
|
205
220
|
signing_key:
|
206
221
|
specification_version: 4
|
207
222
|
summary: Open OnDemand core library
|