ood_core 0.16.1 → 0.17.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19665b6db28d01da39093dc90d4a5023ca12264f07b932aebc8ec8c443bafa25
4
- data.tar.gz: d9c8c6d8f30851ea9138c8325aafd750823534a51f36601a20366265ac4feec2
3
+ metadata.gz: 4c94c6fbf110564ec2cff9d885d2799566a70e1e98759f44fec00fde6eb0cdec
4
+ data.tar.gz: 7ed0326c52582dbd8b15a272706e8aa58e36a2be0555a78c65037bb74517d0a6
5
5
  SHA512:
6
- metadata.gz: 1ed1eaa873366ad5e825ed29c7401dd3bca4a424ab7a689a19479f297ec20d7e019cd53609006b0919a365dd0002eb0c1e9c0cabcc9f69579cf7ae81b33b3ae7
7
- data.tar.gz: 90a4cfa3ee8b1f76ef7e1f28df6d8e64725d1eaff005b4bd4ff7fc8f88e5bfda8a15e706636c18e7b5ac74451071eaea4e6814945ea25e95f6c7ed2de8fd2fec
6
+ metadata.gz: b6af308bf4acb767e6c3128ce753714ebcee4f33a17b5114a1196d73ec7df63be5d5007ad985c752329463e2533ed1bbfa8951426a2a035ef08ce9b3704b5984
7
+ data.tar.gz: 76b07812da52479c3d5c834c51dcb6c5af328721436474197a77bd0423f5061361d333d233c235da7a699e31ac772104eb65902e74ee25afb4640c6e5adc4add
data/CHANGELOG.md CHANGED
@@ -6,6 +6,75 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
6
6
  and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
7
7
 
8
8
  ## [Unreleased]
9
+
10
+ ## [0.17.4] - 7-29-2021
11
+
12
+ Functionally the same as [0.17.3] but with some CI updates.
13
+
14
+ ## [0.17.3] - 7-29-2021
15
+
16
+ ### Fixed
17
+
18
+ - Fixed handling of pods in a startup phase in [303](https://github.com/OSC/ood_core/pull/303).
19
+
20
+ ### Added
21
+
22
+ - Enable automatic population of supplemental groups in [305](https://github.com/OSC/ood_core/pull/305).
23
+
24
+ ## [0.17.2] - 7-14-2021
25
+
26
+ ### Fixed
27
+
28
+ - Fixed k8s adapter to only show Running pods as running in [300](https://github.com/OSC/ood_core/pull/300).
29
+
30
+ ## [0.17.1] - 6-14-2021
31
+
32
+ ### Fixed
33
+
34
+ - Fixed [278](https://github.com/OSC/ood_core/pull/278) where unschedulable pods will now show up as
35
+ queued_held status.
36
+
37
+ ### Changed
38
+
39
+ - KUBECONFIG now defaults to /dev/null in the kubernetes adapter in [292](https://github.com/OSC/ood_core/pull/292).
40
+
41
+ ### Added
42
+
43
+ - Sites can now set `batch_connect.ssh_allow` on the cluster to disable the buttons to start
44
+ a shell session to compute nodes in [289](https://github.com/OSC/ood_core/pull/289).
45
+ - `POD_PORT` is now available to jobs in the kubernetes adapter in [290](https://github.com/OSC/ood_core/pull/290).
46
+ - Kubernetes pods now support a startProbe in [291](https://github.com/OSC/ood_core/pull/291).
47
+
48
+ ## [0.17.0] - 5-26-2021
49
+
50
+ ### Fixed
51
+
52
+ - All Kubernetes resources now have the same labels in [280](https://github.com/OSC/ood_core/pull/280).
53
+ - Kubernetes does not crash when no configmap is defined in [282](https://github.com/OSC/ood_core/pull/282).
54
+ - Kubernetes will not specify init containers if there are none in
55
+ [284](https://github.com/OSC/ood_core/pull/284).
56
+
57
+ ### Added
58
+
59
+ - Kubernetes, Slurm and Torque now support the script option `gpus_per_node` in
60
+ [266](https://github.com/OSC/ood_core/pull/266).
61
+ - Kubernetes will now save the pod.yml into the staged root in
62
+ [277](https://github.com/OSC/ood_core/pull/277).
63
+ - Kubernetes now allows for node selector in [264](https://github.com/OSC/ood_core/pull/264).
64
+ - Kubernetes pods now have access the environment variable POD_NAMESPACE in
65
+ [275](https://github.com/OSC/ood_core/pull/275).
66
+ - Kubernetes pods can now specify the image pull policy in [272](https://github.com/OSC/ood_core/pull/272).
67
+ - Cluster config's batch_connect now support `ssh_allow` to disable sshing to compute
68
+ nodes per cluster in [286](https://github.com/OSC/ood_core/pull/286).
69
+ - Kubernetes will now add the templated script content to a configmap in
70
+ [273](https://github.com/OSC/ood_core/pull/273).
71
+
72
+ ### Changed
73
+
74
+ - Kubernetes username prefix no longer appends a - in [271](https://github.com/OSC/ood_core/pull/271).
75
+
76
+
77
+
9
78
  ## [0.16.1] - 2021-04-23
10
79
  ### Fixed
11
80
  - memorized some allow? variables to have better support around ACLS in
@@ -305,7 +374,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
305
374
  ### Added
306
375
  - Initial release!
307
376
 
308
- [Unreleased]: https://github.com/OSC/ood_core/compare/v0.16.1...HEAD
377
+ [Unreleased]: https://github.com/OSC/ood_core/compare/v0.17.4...HEAD
378
+ [0.17.4]: https://github.com/OSC/ood_core/compare/v0.17.3...v0.17.4
379
+ [0.17.3]: https://github.com/OSC/ood_core/compare/v0.17.2...v0.17.3
380
+ [0.17.2]: https://github.com/OSC/ood_core/compare/v0.17.1...v0.17.2
381
+ [0.17.1]: https://github.com/OSC/ood_core/compare/v0.17.0...v0.17.1
382
+ [0.17.0]: https://github.com/OSC/ood_core/compare/v0.16.1...v0.17.0
309
383
  [0.16.1]: https://github.com/OSC/ood_core/compare/v0.16.0...v0.16.1
310
384
  [0.16.0]: https://github.com/OSC/ood_core/compare/v0.15.1...v0.16.0
311
385
  [0.15.1]: https://github.com/OSC/ood_core/compare/v0.15.0...v0.15.1
@@ -147,6 +147,15 @@ module OodCore
147
147
  @allow = acls.all?(&:allow?)
148
148
  end
149
149
 
150
+ # Whether this cluster supports SSH to batch connect nodes
151
+ # @return [Boolean, nil] whether cluster supports SSH to batch connect node
152
+ def batch_connect_ssh_allow?
153
+ return @batch_connect_ssh_allow if defined?(@batch_connect_ssh_allow)
154
+ return @batch_connect_ssh_allow = nil if batch_connect_config.nil?
155
+
156
+ @batch_connect_ssh_allow = batch_connect_config.fetch(:ssh_allow, nil)
157
+ end
158
+
150
159
  # The comparison operator
151
160
  # @param other [#to_sym] object to compare against
152
161
  # @return [Boolean] whether objects are equivalent
@@ -14,6 +14,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
14
14
  attr_reader :config_file, :bin, :cluster, :mounts
15
15
  attr_reader :all_namespaces, :using_context, :helper
16
16
  attr_reader :username_prefix, :namespace_prefix
17
+ attr_reader :auto_supplemental_groups
17
18
 
18
19
  def initialize(options = {})
19
20
  options = options.to_h.symbolize_keys
@@ -23,8 +24,9 @@ class OodCore::Job::Adapters::Kubernetes::Batch
23
24
  @cluster = options.fetch(:cluster, 'open-ondemand')
24
25
  @mounts = options.fetch(:mounts, []).map { |m| m.to_h.symbolize_keys }
25
26
  @all_namespaces = options.fetch(:all_namespaces, false)
26
- @username_prefix = options.fetch(:username_prefix, nil)
27
+ @username_prefix = options.fetch(:username_prefix, '')
27
28
  @namespace_prefix = options.fetch(:namespace_prefix, '')
29
+ @auto_supplemental_groups = options.fetch(:auto_supplemental_groups, false)
28
30
 
29
31
  @using_context = false
30
32
  @helper = OodCore::Job::Adapters::Kubernetes::Helper.new
@@ -45,6 +47,9 @@ class OodCore::Job::Adapters::Kubernetes::Batch
45
47
  raise ArgumentError, 'Must specify the script' if script.nil?
46
48
 
47
49
  resource_yml, id = generate_id_yml(script)
50
+ if !script.workdir.nil? && Dir.exist?(script.workdir)
51
+ File.open(File.join(script.workdir, 'pod.yml'), 'w') { |f| f.write resource_yml }
52
+ end
48
53
  call("#{formatted_ns_cmd} create -f -", stdin: resource_yml)
49
54
 
50
55
  id
@@ -146,7 +151,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
146
151
  end
147
152
 
148
153
  def k8s_username
149
- username_prefix.nil? ? username : "#{username_prefix}-#{username}"
154
+ "#{username_prefix}#{username}"
150
155
  end
151
156
 
152
157
  def user
@@ -173,6 +178,19 @@ class OodCore::Job::Adapters::Kubernetes::Batch
173
178
  Etc.getgrgid(run_as_group).name
174
179
  end
175
180
 
181
+ def default_supplemental_groups
182
+ OodSupport::User.new.groups.sort_by(&:id).map(&:id).reject { |id| id < 1000 }
183
+ end
184
+
185
+ def supplemental_groups(groups = [])
186
+ sgroups = []
187
+ if auto_supplemental_groups
188
+ sgroups.concat(default_supplemental_groups)
189
+ end
190
+ sgroups.concat(groups.to_a)
191
+ sgroups.uniq.sort
192
+ end
193
+
176
194
  def default_env
177
195
  {
178
196
  USER: username,
@@ -180,6 +198,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
180
198
  HOME: home_dir,
181
199
  GROUP: group,
182
200
  GID: run_as_group,
201
+ KUBECONFIG: '/dev/null',
183
202
  }
184
203
  end
185
204
 
@@ -187,12 +206,15 @@ class OodCore::Job::Adapters::Kubernetes::Batch
187
206
  # create an id.
188
207
  def generate_id_yml(script)
189
208
  native_data = script.native
209
+ native_data[:container][:supplemental_groups] = supplemental_groups(native_data[:container][:supplemental_groups])
190
210
  container = helper.container_from_native(native_data[:container], default_env)
191
211
  id = generate_id(container.name)
192
- configmap = helper.configmap_from_native(native_data, id)
212
+ configmap = helper.configmap_from_native(native_data, id, script.content)
193
213
  init_containers = helper.init_ctrs_from_native(native_data[:init_containers], container.env)
194
214
  spec = OodCore::Job::Adapters::Kubernetes::Resources::PodSpec.new(container, init_containers: init_containers)
195
215
  all_mounts = native_data[:mounts].nil? ? mounts : mounts + native_data[:mounts]
216
+ node_selector = native_data[:node_selector].nil? ? {} : native_data[:node_selector]
217
+ gpu_type = native_data[:gpu_type].nil? ? "nvidia.com/gpu" : native_data[:gpu_type]
196
218
 
197
219
  template = ERB.new(File.read(resource_file), nil, '-')
198
220
 
@@ -53,7 +53,10 @@ class OodCore::Job::Adapters::Kubernetes::Helper
53
53
  cpu: container[:cpu],
54
54
  working_dir: container[:working_dir],
55
55
  restart_policy: container[:restart_policy],
56
- image_pull_secret: container[:image_pull_secret]
56
+ image_pull_policy: container[:image_pull_policy],
57
+ image_pull_secret: container[:image_pull_secret],
58
+ supplemental_groups: container[:supplemental_groups],
59
+ startup_probe: container[:startup_probe],
57
60
  )
58
61
  end
59
62
 
@@ -80,10 +83,18 @@ class OodCore::Job::Adapters::Kubernetes::Helper
80
83
  # the input configmap hash
81
84
  # @param id [#to_s]
82
85
  # the id to use for giving the configmap a name
86
+ # @param script_content [#to_s]
87
+ # the batch script content
83
88
  # @return [OodCore::Job::Adapters::Kubernetes::Resources::ConfigMap]
84
- def configmap_from_native(native, id)
85
- configmap = native.fetch(:configmap, nil)
86
- return nil if configmap.nil?
89
+ def configmap_from_native(native, id, script_content)
90
+ configmap = native.fetch(:configmap, {})
91
+ configmap[:files] ||= []
92
+ configmap[:files] << {
93
+ filename: 'script.sh',
94
+ data: script_content,
95
+ mount_path: '/ood/script.sh',
96
+ sub_path: 'script.sh',
97
+ } unless configmap[:files].any? { |f| f[:filename] == 'script.sh' }
87
98
 
88
99
  OodCore::Job::Adapters::Kubernetes::Resources::ConfigMap.new(
89
100
  configmap_name(id),
@@ -140,7 +151,7 @@ class OodCore::Job::Adapters::Kubernetes::Helper
140
151
  {
141
152
  id: json_data.dig(:metadata, :name).to_s,
142
153
  job_name: name_from_metadata(json_data.dig(:metadata)),
143
- status: pod_status_from_json(json_data),
154
+ status: OodCore::Job::Status.new(state: pod_status_from_json(json_data)),
144
155
  job_owner: job_owner_from_json(json_data, ns_prefix),
145
156
  submission_time: submission_time(json_data),
146
157
  dispatch_time: dispatch_time(json_data),
@@ -230,15 +241,21 @@ class OodCore::Job::Adapters::Kubernetes::Helper
230
241
  def submission_time(json_data)
231
242
  status = json_data.dig(:status)
232
243
  start = status.dig(:startTime)
244
+ creation = json_data.dig(:metadata, :creationTimestamp)
233
245
 
234
- if start.nil?
246
+ if !creation.nil?
247
+ str = creation
248
+ elsif !start.nil?
249
+ str = start
250
+ else
235
251
  # the pod is in some pending state limbo
236
252
  conditions = status.dig(:conditions)
253
+ return nil if conditions.nil?
254
+ return nil if conditions.size == 0
237
255
  # best guess to start time is just the first condition's
238
256
  # transition time
239
257
  str = conditions[0].dig(:lastTransitionTime)
240
- else
241
- str = start
258
+ return nil if str.nil?
242
259
  end
243
260
 
244
261
  DateTime.parse(str).to_time.to_i
@@ -246,11 +263,21 @@ class OodCore::Job::Adapters::Kubernetes::Helper
246
263
 
247
264
  def pod_status_from_json(json_data)
248
265
  phase = json_data.dig(:status, :phase)
266
+ conditions = json_data.dig(:status, :conditions)
267
+ container_statuses = json_data.dig(:status, :containerStatuses)
268
+ unschedulable = conditions.to_a.any? { |c| c.dig(:reason) == "Unschedulable" }
269
+ ready = !container_statuses.to_a.empty? && container_statuses.to_a.all? { |s| s.dig(:ready) == true }
270
+ started = !container_statuses.to_a.empty? && container_statuses.to_a.any? { |s| s.fetch(:state, {}).key?(:running) }
271
+ return "running" if ready
272
+ return "queued" if phase == "Running" && started
273
+
249
274
  state = case phase
250
- when "Running"
251
- "running"
252
275
  when "Pending"
253
- "queued"
276
+ if unschedulable
277
+ "queued_held"
278
+ else
279
+ "queued"
280
+ end
254
281
  when "Failed"
255
282
  "suspended"
256
283
  when "Succeeded"
@@ -260,8 +287,6 @@ class OodCore::Job::Adapters::Kubernetes::Helper
260
287
  else
261
288
  "undetermined"
262
289
  end
263
-
264
- OodCore::Job::Status.new(state: state)
265
290
  end
266
291
 
267
292
  def terminated_state(status)
@@ -33,13 +33,36 @@ module OodCore::Job::Adapters::Kubernetes::Resources
33
33
  end
34
34
  end
35
35
 
36
+ class TCPProbe
37
+ attr_accessor :port, :initial_delay_seconds, :failure_threshold, :period_seconds
38
+
39
+ def initialize(port, data)
40
+ data ||= {}
41
+ @port = port
42
+ @initial_delay_seconds = data[:initial_delay_seconds] || 2
43
+ @failure_threshold = data[:failure_threshold] || 5
44
+ @period_seconds = data[:period_seconds] || 5
45
+ end
46
+
47
+ def to_h
48
+ {
49
+ port: port,
50
+ initial_delay_seconds: initial_delay_seconds,
51
+ failure_threshold: failure_threshold,
52
+ period_seconds: period_seconds,
53
+ }
54
+ end
55
+ end
56
+
36
57
  class Container
37
58
  attr_accessor :name, :image, :command, :port, :env, :memory, :cpu, :working_dir,
38
- :restart_policy, :image_pull_secret, :supplemental_groups
59
+ :restart_policy, :image_pull_policy, :image_pull_secret, :supplemental_groups,
60
+ :startup_probe
39
61
 
40
62
  def initialize(
41
63
  name, image, command: [], port: nil, env: {}, memory: "4Gi", cpu: "1",
42
- working_dir: "", restart_policy: "Never", image_pull_secret: nil, supplemental_groups: []
64
+ working_dir: "", restart_policy: "Never", image_pull_policy: nil, image_pull_secret: nil, supplemental_groups: [],
65
+ startup_probe: {}
43
66
  )
44
67
  raise ArgumentError, "containers need valid names and images" unless name && image
45
68
 
@@ -52,8 +75,10 @@ module OodCore::Job::Adapters::Kubernetes::Resources
52
75
  @cpu = cpu.nil? ? "1" : cpu
53
76
  @working_dir = working_dir.nil? ? "" : working_dir
54
77
  @restart_policy = restart_policy.nil? ? "Never" : restart_policy
78
+ @image_pull_policy = image_pull_policy.nil? ? "IfNotPresent" : image_pull_policy
55
79
  @image_pull_secret = image_pull_secret
56
80
  @supplemental_groups = supplemental_groups.nil? ? [] : supplemental_groups
81
+ @startup_probe = TCPProbe.new(@port, startup_probe)
57
82
  end
58
83
 
59
84
  def ==(other)
@@ -66,8 +91,10 @@ module OodCore::Job::Adapters::Kubernetes::Resources
66
91
  cpu == other.cpu &&
67
92
  working_dir == other.working_dir &&
68
93
  restart_policy == other.restart_policy &&
94
+ image_pull_policy == other.image_pull_policy &&
69
95
  image_pull_secret == other.image_pull_secret &&
70
- supplemental_groups == other.supplemental_groups
96
+ supplemental_groups == other.supplemental_groups &&
97
+ startup_probe.to_h == other.startup_probe.to_h
71
98
  end
72
99
  end
73
100
 
@@ -20,14 +20,7 @@ spec:
20
20
  runAsUser: <%= run_as_user %>
21
21
  runAsGroup: <%= run_as_group %>
22
22
  runAsNonRoot: true
23
- <%- if spec.container.supplemental_groups.empty? -%>
24
- supplementalGroups: []
25
- <%- else -%>
26
- supplementalGroups:
27
- <%- spec.container.supplemental_groups.each do |supplemental_group| -%>
28
- - "<%= supplemental_group %>"
29
- <%- end -%>
30
- <%- end -%>
23
+ supplementalGroups: <%= spec.container.supplemental_groups %>
31
24
  fsGroup: <%= fs_group %>
32
25
  hostNetwork: false
33
26
  hostIPC: false
@@ -39,7 +32,7 @@ spec:
39
32
  containers:
40
33
  - name: "<%= spec.container.name %>"
41
34
  image: <%= spec.container.image %>
42
- imagePullPolicy: IfNotPresent
35
+ imagePullPolicy: <%= spec.container.image_pull_policy %>
43
36
  <%- unless spec.container.working_dir.empty? -%>
44
37
  workingDir: "<%= spec.container.working_dir %>"
45
38
  <%- end -%>
@@ -48,6 +41,14 @@ spec:
48
41
  valueFrom:
49
42
  fieldRef:
50
43
  fieldPath: metadata.name
44
+ - name: POD_NAMESPACE
45
+ valueFrom:
46
+ fieldRef:
47
+ fieldPath: metadata.namespace
48
+ <%- unless spec.container.port.nil? -%>
49
+ - name: POD_PORT
50
+ value: "<%= spec.container.port %>"
51
+ <%- end -%>
51
52
  <%- spec.container.env.each_pair do |name, value| -%>
52
53
  - name: <%= name %>
53
54
  value: "<%= value %>"
@@ -61,9 +62,16 @@ spec:
61
62
  <%- unless spec.container.port.nil? -%>
62
63
  ports:
63
64
  - containerPort: <%= spec.container.port %>
65
+ startupProbe:
66
+ tcpSocket:
67
+ port: <%= spec.container.startup_probe.port %>
68
+ initialDelaySeconds: <%= spec.container.startup_probe.initial_delay_seconds %>
69
+ failureThreshold: <%= spec.container.startup_probe.failure_threshold %>
70
+ periodSeconds: <%= spec.container.startup_probe.period_seconds %>
64
71
  <%- end -%>
65
- <%- if configmap.mounts? || !all_mounts.empty? -%>
72
+ <%- if !all_mounts.empty? || (!configmap.nil? && configmap.mounts?) -%>
66
73
  volumeMounts:
74
+ <%- unless configmap.nil? -%>
67
75
  <%- configmap.files.each do |file| -%>
68
76
  <%- next if file.mount_path.nil? -%>
69
77
  - name: configmap-volume
@@ -72,6 +80,7 @@ spec:
72
80
  subPath: <%= file.sub_path %>
73
81
  <%- end # end unless file.sub_path.nil? -%>
74
82
  <%- end # end configmap.files.each -%>
83
+ <%- end # unless configmap.nil? -%>
75
84
  <%- all_mounts.each do |mount| -%>
76
85
  - name: <%= mount[:name] %>
77
86
  mountPath: <%= mount[:destination_path] %>
@@ -81,25 +90,36 @@ spec:
81
90
  limits:
82
91
  memory: "<%= spec.container.memory %>"
83
92
  cpu: "<%= spec.container.cpu %>"
93
+ <%- unless script.gpus_per_node.nil? -%>
94
+ <%= gpu_type %>: <%= script.gpus_per_node %>
95
+ <%- end -%>
84
96
  requests:
85
97
  memory: "<%= spec.container.memory %>"
86
98
  cpu: "<%= spec.container.cpu %>"
99
+ <%- unless script.gpus_per_node.nil? -%>
100
+ <%= gpu_type %>: <%= script.gpus_per_node %>
101
+ <%- end -%>
87
102
  securityContext:
88
103
  allowPrivilegeEscalation: false
89
104
  capabilities:
90
105
  drop:
91
106
  - all
92
107
  privileged: false
93
- <%- unless spec.init_containers.nil? -%>
108
+ <%- unless spec.init_containers.empty? -%>
94
109
  initContainers:
95
110
  <%- spec.init_containers.each do |ctr| -%>
96
111
  - name: "<%= ctr.name %>"
97
112
  image: "<%= ctr.image %>"
113
+ imagePullPolicy: <%= ctr.image_pull_policy %>
98
114
  env:
99
115
  - name: POD_NAME
100
116
  valueFrom:
101
117
  fieldRef:
102
118
  fieldPath: metadata.name
119
+ - name: POD_NAMESPACE
120
+ valueFrom:
121
+ fieldRef:
122
+ fieldPath: metadata.namespace
103
123
  <%- ctr.env.each_pair do |name, value| -%>
104
124
  - name: <%= name %>
105
125
  value: "<%= value %>"
@@ -108,8 +128,9 @@ spec:
108
128
  <%- ctr.command.each do |cmd| -%>
109
129
  - "<%= cmd %>"
110
130
  <%- end # command loop -%>
111
- <%- if configmap.init_mounts? || !all_mounts.empty? -%>
131
+ <%- if !all_mounts.empty? || (!configmap.nil? && configmap.init_mounts?) -%>
112
132
  volumeMounts:
133
+ <%- unless configmap.nil? -%>
113
134
  <%- configmap.files.each do |file| -%>
114
135
  <%- next if file.init_mount_path.nil? -%>
115
136
  - name: configmap-volume
@@ -118,6 +139,7 @@ spec:
118
139
  subPath: <%= file.init_sub_path %>
119
140
  <%- end # end unless file.sub_path.nil? -%>
120
141
  <%- end # end configmap.files.each -%>
142
+ <%- end # unless configmap.nil? -%>
121
143
  <%- all_mounts.each do |mount| -%>
122
144
  - name: <%= mount[:name] %>
123
145
  mountPath: <%= mount[:destination_path] %>
@@ -152,6 +174,12 @@ spec:
152
174
  <%- end # if mount is [host,nfs] -%>
153
175
  <%- end # for each mount -%>
154
176
  <%- end # (configmap.to_s.empty? || all_mounts.empty?) -%>
177
+ <%- unless node_selector.empty? -%>
178
+ nodeSelector:
179
+ <%- node_selector.each_pair do |key, value| -%>
180
+ <%= key %>: "<%= value %>"
181
+ <%- end # node_selector.each_pair -%>
182
+ <%- end #unless node_selector.empty? -%>
155
183
  ---
156
184
  <%- unless spec.container.port.nil? -%>
157
185
  apiVersion: v1
@@ -161,6 +189,8 @@ metadata:
161
189
  namespace: <%= namespace %>
162
190
  labels:
163
191
  job: <%= id %>
192
+ app.kubernetes.io/name: <%= container.name %>
193
+ app.kubernetes.io/managed-by: open-ondemand
164
194
  spec:
165
195
  selector:
166
196
  job: <%= id %>
@@ -170,8 +200,8 @@ spec:
170
200
  targetPort: <%= spec.container.port %>
171
201
  type: NodePort
172
202
  <%- end # end for service -%>
173
- ---
174
203
  <%- unless configmap.nil? -%>
204
+ ---
175
205
  apiVersion: v1
176
206
  kind: ConfigMap
177
207
  metadata:
@@ -179,6 +209,8 @@ metadata:
179
209
  namespace: <%= namespace %>
180
210
  labels:
181
211
  job: <%= id %>
212
+ app.kubernetes.io/name: <%= container.name %>
213
+ app.kubernetes.io/managed-by: open-ondemand
182
214
  data:
183
215
  <%- configmap.files.each do |file| -%>
184
216
  <%- next if file.data.nil? || file.filename.nil? -%>
@@ -423,6 +423,7 @@ module OodCore
423
423
  args.concat ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
424
424
  args.concat ['-a', script.job_array_request] unless script.job_array_request.nil?
425
425
  args.concat ['--qos', script.qos] unless script.qos.nil?
426
+ args.concat ['--gpus-per-node', script.gpus_per_node] unless script.gpus_per_node.nil?
426
427
  # ignore nodes, don't know how to do this for slurm
427
428
 
428
429
  # Set dependencies
@@ -159,6 +159,8 @@ module OodCore
159
159
  args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
160
160
  args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
161
161
  args.concat ['-l', "qos=#{script.qos}"] unless script.qos.nil?
162
+ args.concat ['-l', "gpus=#{script.gpus_per_node}"] unless script.gpus_per_node.nil?
163
+
162
164
  # Set environment variables
163
165
  env = script.job_environment.to_h
164
166
  args.concat ["-v", env.keys.join(",")] unless env.empty?
@@ -103,6 +103,10 @@ module OodCore
103
103
  # @return [String, nil] qos
104
104
  attr_reader :qos
105
105
 
106
+ # The GPUs per node for the job
107
+ # @return [Integer, nil] gpus per node
108
+ attr_reader :gpus_per_node
109
+
106
110
  # Object detailing any native specifications that are implementation specific
107
111
  # @note Should not be used at all costs.
108
112
  # @return [Object, nil] native specifications
@@ -136,6 +140,7 @@ module OodCore
136
140
  # @param accounting_id [#to_s, nil] accounting id
137
141
  # @param job_array_request [#to_s, nil] job array request
138
142
  # @param qos [#to_s, nil] qos
143
+ # @param gpus_per_node [#to_i, nil] gpus per node
139
144
  # @param native [Object, nil] native specifications
140
145
  # @param copy_environment [Boolean, nil] copy the environment
141
146
  def initialize(content:, args: nil, submit_as_hold: nil, rerunnable: nil,
@@ -145,7 +150,7 @@ module OodCore
145
150
  output_path: nil, error_path: nil, reservation_id: nil,
146
151
  queue_name: nil, priority: nil, start_time: nil,
147
152
  wall_time: nil, accounting_id: nil, job_array_request: nil,
148
- qos: nil, native: nil, copy_environment: nil, **_)
153
+ qos: nil, gpus_per_node: nil, native: nil, copy_environment: nil, **_)
149
154
  @content = content.to_s
150
155
 
151
156
  @submit_as_hold = submit_as_hold
@@ -170,6 +175,7 @@ module OodCore
170
175
  @accounting_id = accounting_id && accounting_id.to_s
171
176
  @job_array_request = job_array_request && job_array_request.to_s
172
177
  @qos = qos && qos.to_s
178
+ @gpus_per_node = gpus_per_node && gpus_per_node.to_i
173
179
  @native = native
174
180
  @copy_environment = (copy_environment.nil?) ? nil : !! copy_environment
175
181
  end
@@ -200,6 +206,7 @@ module OodCore
200
206
  accounting_id: accounting_id,
201
207
  job_array_request: job_array_request,
202
208
  qos: qos,
209
+ gpus_per_node: gpus_per_node,
203
210
  native: native,
204
211
  copy_environment: copy_environment
205
212
  }
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.16.1"
3
+ VERSION = "0.17.4"
4
4
  end
data/ood_core.gemspec CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec|
15
15
  spec.license = "MIT"
16
16
 
17
17
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
- f.match(%r{^(test|spec|features)/})
18
+ f.match(%r{^(test|spec|features|.github)/})
19
19
  end
20
20
  spec.bindir = "exe"
21
21
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.1
4
+ version: 0.17.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2021-04-23 00:00:00.000000000 Z
13
+ date: 2021-07-29 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -160,7 +160,6 @@ executables: []
160
160
  extensions: []
161
161
  extra_rdoc_files: []
162
162
  files:
163
- - ".github/workflows/test.yml"
164
163
  - ".gitignore"
165
164
  - ".rspec"
166
165
  - CHANGELOG.md
@@ -1,30 +0,0 @@
1
- name: Unit Tests
2
-
3
- on:
4
- push:
5
- branches:
6
- - master
7
- pull_request:
8
- branches:
9
- - master
10
-
11
- jobs:
12
- tests:
13
- runs-on: ubuntu-latest
14
-
15
- steps:
16
- - name: checkout
17
- uses: actions/checkout@v2
18
-
19
- - name: Setup Ruby using Bundler
20
- uses: ruby/setup-ruby@v1
21
- with:
22
- ruby-version: "2.7.1"
23
- bundler-cache: true
24
- bundler: "2.1.4"
25
-
26
- - name: install gems
27
- run: bundle install
28
-
29
- - name: test
30
- run: bundle exec rake spec