ood_core 0.16.0 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 392bbf30609185792c59bc9b83bad0d2d6861885db4264a4da39356cb64624b6
4
- data.tar.gz: 76df75234a646147a8fed14367d6467ffabe289bfa8264ff02614b60f6463652
3
+ metadata.gz: dca336fb15a08ad68f556b8d33fb76887f5c0370a0eef63685a5770fbf073110
4
+ data.tar.gz: 410b08fee5e739b7444ca3054483a2758d43062af964168b3f32318489d19fa0
5
5
  SHA512:
6
- metadata.gz: fd127307d7048a220bdd5bed28302e90708bb0a32c9c6905136ddb3664b8f0c5035510c528f255161c4d40c98a772d6200fa60c938aff10ea4a157fdf56d8d1f
7
- data.tar.gz: f3c1ea0622da5b4387ee123843da2e0e955afd40d28e0f25e425fea010865896cdebf87171e2264ffbffbd743be8d1533971a25566251dcb8da5748191dcbeea
6
+ metadata.gz: e82540895495b9f09c92f413f8f39a894fb700122da195cd4224d68eb5eae30845f8692c6d440462bac2c4f45b0a3270e7bf5219ba4adecfc63baa3884b53d28
7
+ data.tar.gz: 39a441ede8e9b91e169b1aff0c1345a56c98aac1f25a1b07d873b20e66833ed46a534b5ef23f392f5d0213b7e5c80e942f62b0031f4fd2065c327116975fdf8b
@@ -0,0 +1,8 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: bundler
4
+ directory: "/"
5
+ schedule:
6
+ interval: daily
7
+ time: "03:30"
8
+ open-pull-requests-limit: 10
data/CHANGELOG.md CHANGED
@@ -6,6 +6,66 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
6
6
  and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
7
7
 
8
8
  ## [Unreleased]
9
+
10
+ ## [0.17.2] - 7-14-2021
11
+
12
+ ### Fixed
13
+
14
+ - Fixed k8s adapter to only show Running pods as running in [300](https://github.com/OSC/ood_core/pull/300).
15
+
16
+ ## [0.17.1] - 6-14-2021
17
+
18
+ ### Fixed
19
+
20
+ - Fixed [278](https://github.com/OSC/ood_core/pull/278) where unschedulable pods will now show up as
21
+ queued_held status.
22
+
23
+ ### Changed
24
+
25
+ - KUBECONFIG now defaults to /dev/null in the kubernetes adapter in [292](https://github.com/OSC/ood_core/pull/292).
26
+
27
+ ### Added
28
+
29
+ - Sites can now set `batch_connect.ssh_allow` on the cluster to disable the buttons to start
30
+ a shell session to compute nodes in [289](https://github.com/OSC/ood_core/pull/289).
31
+ - `POD_PORT` is now available to jobs in the kubernetes adapter in [290](https://github.com/OSC/ood_core/pull/290).
32
+ - Kubernetes pods now support a startProbe in [291](https://github.com/OSC/ood_core/pull/291).
33
+
34
+ ## [0.17.0] - 5-26-2021
35
+
36
+ ### Fixed
37
+
38
+ - All Kubernetes resources now have the same labels in [280](https://github.com/OSC/ood_core/pull/280).
39
+ - Kubernetes does not crash when no configmap is defined in [282](https://github.com/OSC/ood_core/pull/282).
40
+ - Kubernetes will not specify init containers if there are none in
41
+ [284](https://github.com/OSC/ood_core/pull/284).
42
+
43
+ ### Added
44
+
45
+ - Kubernetes, Slurm and Torque now support the script option `gpus_per_node` in
46
+ [266](https://github.com/OSC/ood_core/pull/266).
47
+ - Kubernetes will now save the pod.yml into the staged root in
48
+ [277](https://github.com/OSC/ood_core/pull/277).
49
+ - Kubernetes now allows for node selector in [264](https://github.com/OSC/ood_core/pull/264).
50
+ - Kubernetes pods now have access the environment variable POD_NAMESPACE in
51
+ [275](https://github.com/OSC/ood_core/pull/275).
52
+ - Kubernetes pods can now specify the image pull policy in [272](https://github.com/OSC/ood_core/pull/272).
53
+ - Cluster config's batch_connect now support `ssh_allow` to disable sshing to compute
54
+ nodes per cluster in [286](https://github.com/OSC/ood_core/pull/286).
55
+ - Kubernetes will now add the templated script content to a configmap in
56
+ [273](https://github.com/OSC/ood_core/pull/273).
57
+
58
+ ### Changed
59
+
60
+ - Kubernetes username prefix no longer appends a - in [271](https://github.com/OSC/ood_core/pull/271).
61
+
62
+
63
+
64
+ ## [0.16.1] - 2021-04-23
65
+ ### Fixed
66
+ - memorized some allow? variables to have better support around ACLS in
67
+ [267](https://github.com/OSC/ood_core/pull/267)
68
+
9
69
  ## [0.16.0] - 2021-04-20
10
70
  ### Fixed
11
71
  - tmux 2.7+ bug in the linux host adapter in [2.5.8](https://github.com/OSC/ood_core/pull/258)
@@ -300,8 +360,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
300
360
  ### Added
301
361
  - Initial release!
302
362
 
303
- [Unreleased]: https://github.com/OSC/ood_core/compare/v0.16.0...HEAD
304
- [0.15.2]: https://github.com/OSC/ood_core/compare/v0.15.1...v0.16.0
363
+ [Unreleased]: https://github.com/OSC/ood_core/compare/v0.17.2...HEAD
364
+ [0.17.2]: https://github.com/OSC/ood_core/compare/v0.17.1...v0.17.2
365
+ [0.17.1]: https://github.com/OSC/ood_core/compare/v0.17.0...v0.17.1
366
+ [0.17.0]: https://github.com/OSC/ood_core/compare/v0.16.1...v0.17.0
367
+ [0.16.1]: https://github.com/OSC/ood_core/compare/v0.16.0...v0.16.1
368
+ [0.16.0]: https://github.com/OSC/ood_core/compare/v0.15.1...v0.16.0
305
369
  [0.15.1]: https://github.com/OSC/ood_core/compare/v0.15.0...v0.15.1
306
370
  [0.15.0]: https://github.com/OSC/ood_core/compare/v0.14.0...v0.15.0
307
371
  [0.14.0]: https://github.com/OSC/ood_core/compare/v0.13.0...v0.14.0
@@ -78,7 +78,9 @@ module OodCore
78
78
  # Whether the login feature is allowed
79
79
  # @return [Boolean] is login allowed
80
80
  def login_allow?
81
- allow? && !login_config.empty?
81
+ return @login_allow if defined?(@login_allow)
82
+
83
+ @login_allow = (allow? && !login_config.empty?)
82
84
  end
83
85
 
84
86
  # Build a job adapter from the job configuration
@@ -90,9 +92,11 @@ module OodCore
90
92
  # Whether the job feature is allowed based on the ACLs
91
93
  # @return [Boolean] is the job feature allowed
92
94
  def job_allow?
93
- allow? &&
94
- !job_config.empty? &&
95
- build_acls(job_config.fetch(:acls, []).map(&:to_h)).all?(&:allow?)
95
+ return @job_allow if defined?(@job_allow)
96
+
97
+ @job_allow = (allow? && ! job_config.empty? && build_acls(
98
+ job_config.fetch(:acls, []).map(&:to_h)
99
+ ).all?(&:allow?))
96
100
  end
97
101
 
98
102
  # The batch connect template configuration used for this cluster
@@ -138,7 +142,18 @@ module OodCore
138
142
  # Whether this cluster is allowed to be used
139
143
  # @return [Boolean] whether cluster is allowed
140
144
  def allow?
141
- acls.all?(&:allow?)
145
+ return @allow if defined?(@allow)
146
+
147
+ @allow = acls.all?(&:allow?)
148
+ end
149
+
150
+ # Whether this cluster supports SSH to batch connect nodes
151
+ # @return [Boolean, nil] whether cluster supports SSH to batch connect node
152
+ def batch_connect_ssh_allow?
153
+ return @batch_connect_ssh_allow if defined?(@batch_connect_ssh_allow)
154
+ return @batch_connect_ssh_allow = nil if batch_connect_config.nil?
155
+
156
+ @batch_connect_ssh_allow = batch_connect_config.fetch(:ssh_allow, nil)
142
157
  end
143
158
 
144
159
  # The comparison operator
@@ -23,7 +23,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
23
23
  @cluster = options.fetch(:cluster, 'open-ondemand')
24
24
  @mounts = options.fetch(:mounts, []).map { |m| m.to_h.symbolize_keys }
25
25
  @all_namespaces = options.fetch(:all_namespaces, false)
26
- @username_prefix = options.fetch(:username_prefix, nil)
26
+ @username_prefix = options.fetch(:username_prefix, '')
27
27
  @namespace_prefix = options.fetch(:namespace_prefix, '')
28
28
 
29
29
  @using_context = false
@@ -45,6 +45,9 @@ class OodCore::Job::Adapters::Kubernetes::Batch
45
45
  raise ArgumentError, 'Must specify the script' if script.nil?
46
46
 
47
47
  resource_yml, id = generate_id_yml(script)
48
+ if !script.workdir.nil? && Dir.exist?(script.workdir)
49
+ File.open(File.join(script.workdir, 'pod.yml'), 'w') { |f| f.write resource_yml }
50
+ end
48
51
  call("#{formatted_ns_cmd} create -f -", stdin: resource_yml)
49
52
 
50
53
  id
@@ -146,7 +149,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
146
149
  end
147
150
 
148
151
  def k8s_username
149
- username_prefix.nil? ? username : "#{username_prefix}-#{username}"
152
+ "#{username_prefix}#{username}"
150
153
  end
151
154
 
152
155
  def user
@@ -180,6 +183,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
180
183
  HOME: home_dir,
181
184
  GROUP: group,
182
185
  GID: run_as_group,
186
+ KUBECONFIG: '/dev/null',
183
187
  }
184
188
  end
185
189
 
@@ -189,10 +193,12 @@ class OodCore::Job::Adapters::Kubernetes::Batch
189
193
  native_data = script.native
190
194
  container = helper.container_from_native(native_data[:container], default_env)
191
195
  id = generate_id(container.name)
192
- configmap = helper.configmap_from_native(native_data, id)
196
+ configmap = helper.configmap_from_native(native_data, id, script.content)
193
197
  init_containers = helper.init_ctrs_from_native(native_data[:init_containers], container.env)
194
198
  spec = OodCore::Job::Adapters::Kubernetes::Resources::PodSpec.new(container, init_containers: init_containers)
195
199
  all_mounts = native_data[:mounts].nil? ? mounts : mounts + native_data[:mounts]
200
+ node_selector = native_data[:node_selector].nil? ? {} : native_data[:node_selector]
201
+ gpu_type = native_data[:gpu_type].nil? ? "nvidia.com/gpu" : native_data[:gpu_type]
196
202
 
197
203
  template = ERB.new(File.read(resource_file), nil, '-')
198
204
 
@@ -53,7 +53,9 @@ class OodCore::Job::Adapters::Kubernetes::Helper
53
53
  cpu: container[:cpu],
54
54
  working_dir: container[:working_dir],
55
55
  restart_policy: container[:restart_policy],
56
- image_pull_secret: container[:image_pull_secret]
56
+ image_pull_policy: container[:image_pull_policy],
57
+ image_pull_secret: container[:image_pull_secret],
58
+ startup_probe: container[:startup_probe],
57
59
  )
58
60
  end
59
61
 
@@ -80,10 +82,18 @@ class OodCore::Job::Adapters::Kubernetes::Helper
80
82
  # the input configmap hash
81
83
  # @param id [#to_s]
82
84
  # the id to use for giving the configmap a name
85
+ # @param script_content [#to_s]
86
+ # the batch script content
83
87
  # @return [OodCore::Job::Adapters::Kubernetes::Resources::ConfigMap]
84
- def configmap_from_native(native, id)
85
- configmap = native.fetch(:configmap, nil)
86
- return nil if configmap.nil?
88
+ def configmap_from_native(native, id, script_content)
89
+ configmap = native.fetch(:configmap, {})
90
+ configmap[:files] ||= []
91
+ configmap[:files] << {
92
+ filename: 'script.sh',
93
+ data: script_content,
94
+ mount_path: '/ood/script.sh',
95
+ sub_path: 'script.sh',
96
+ } unless configmap[:files].any? { |f| f[:filename] == 'script.sh' }
87
97
 
88
98
  OodCore::Job::Adapters::Kubernetes::Resources::ConfigMap.new(
89
99
  configmap_name(id),
@@ -140,7 +150,7 @@ class OodCore::Job::Adapters::Kubernetes::Helper
140
150
  {
141
151
  id: json_data.dig(:metadata, :name).to_s,
142
152
  job_name: name_from_metadata(json_data.dig(:metadata)),
143
- status: pod_status_from_json(json_data),
153
+ status: OodCore::Job::Status.new(state: pod_status_from_json(json_data)),
144
154
  job_owner: job_owner_from_json(json_data, ns_prefix),
145
155
  submission_time: submission_time(json_data),
146
156
  dispatch_time: dispatch_time(json_data),
@@ -230,15 +240,21 @@ class OodCore::Job::Adapters::Kubernetes::Helper
230
240
  def submission_time(json_data)
231
241
  status = json_data.dig(:status)
232
242
  start = status.dig(:startTime)
243
+ creation = json_data.dig(:metadata, :creationTimestamp)
233
244
 
234
- if start.nil?
245
+ if !creation.nil?
246
+ str = creation
247
+ elsif !start.nil?
248
+ str = start
249
+ else
235
250
  # the pod is in some pending state limbo
236
251
  conditions = status.dig(:conditions)
252
+ return nil if conditions.nil?
253
+ return nil if conditions.size == 0
237
254
  # best guess to start time is just the first condition's
238
255
  # transition time
239
256
  str = conditions[0].dig(:lastTransitionTime)
240
- else
241
- str = start
257
+ return nil if str.nil?
242
258
  end
243
259
 
244
260
  DateTime.parse(str).to_time.to_i
@@ -246,11 +262,19 @@ class OodCore::Job::Adapters::Kubernetes::Helper
246
262
 
247
263
  def pod_status_from_json(json_data)
248
264
  phase = json_data.dig(:status, :phase)
265
+ conditions = json_data.dig(:status, :conditions)
266
+ container_statuses = json_data.dig(:status, :containerStatuses)
267
+ unschedulable = conditions.to_a.any? { |c| c.dig(:reason) == "Unschedulable" }
268
+ ready = !container_statuses.to_a.empty? && container_statuses.to_a.all? { |s| s.dig(:ready) == true }
269
+ return "running" if ready
270
+
249
271
  state = case phase
250
- when "Running"
251
- "running"
252
272
  when "Pending"
253
- "queued"
273
+ if unschedulable
274
+ "queued_held"
275
+ else
276
+ "queued"
277
+ end
254
278
  when "Failed"
255
279
  "suspended"
256
280
  when "Succeeded"
@@ -260,8 +284,6 @@ class OodCore::Job::Adapters::Kubernetes::Helper
260
284
  else
261
285
  "undetermined"
262
286
  end
263
-
264
- OodCore::Job::Status.new(state: state)
265
287
  end
266
288
 
267
289
  def terminated_state(status)
@@ -33,13 +33,36 @@ module OodCore::Job::Adapters::Kubernetes::Resources
33
33
  end
34
34
  end
35
35
 
36
+ class TCPProbe
37
+ attr_accessor :port, :initial_delay_seconds, :failure_threshold, :period_seconds
38
+
39
+ def initialize(port, data)
40
+ data ||= {}
41
+ @port = port
42
+ @initial_delay_seconds = data[:initial_delay_seconds] || 2
43
+ @failure_threshold = data[:failure_threshold] || 5
44
+ @period_seconds = data[:period_seconds] || 5
45
+ end
46
+
47
+ def to_h
48
+ {
49
+ port: port,
50
+ initial_delay_seconds: initial_delay_seconds,
51
+ failure_threshold: failure_threshold,
52
+ period_seconds: period_seconds,
53
+ }
54
+ end
55
+ end
56
+
36
57
  class Container
37
58
  attr_accessor :name, :image, :command, :port, :env, :memory, :cpu, :working_dir,
38
- :restart_policy, :image_pull_secret, :supplemental_groups
59
+ :restart_policy, :image_pull_policy, :image_pull_secret, :supplemental_groups,
60
+ :startup_probe
39
61
 
40
62
  def initialize(
41
63
  name, image, command: [], port: nil, env: {}, memory: "4Gi", cpu: "1",
42
- working_dir: "", restart_policy: "Never", image_pull_secret: nil, supplemental_groups: []
64
+ working_dir: "", restart_policy: "Never", image_pull_policy: nil, image_pull_secret: nil, supplemental_groups: [],
65
+ startup_probe: {}
43
66
  )
44
67
  raise ArgumentError, "containers need valid names and images" unless name && image
45
68
 
@@ -52,8 +75,10 @@ module OodCore::Job::Adapters::Kubernetes::Resources
52
75
  @cpu = cpu.nil? ? "1" : cpu
53
76
  @working_dir = working_dir.nil? ? "" : working_dir
54
77
  @restart_policy = restart_policy.nil? ? "Never" : restart_policy
78
+ @image_pull_policy = image_pull_policy.nil? ? "IfNotPresent" : image_pull_policy
55
79
  @image_pull_secret = image_pull_secret
56
80
  @supplemental_groups = supplemental_groups.nil? ? [] : supplemental_groups
81
+ @startup_probe = TCPProbe.new(@port, startup_probe)
57
82
  end
58
83
 
59
84
  def ==(other)
@@ -66,8 +91,10 @@ module OodCore::Job::Adapters::Kubernetes::Resources
66
91
  cpu == other.cpu &&
67
92
  working_dir == other.working_dir &&
68
93
  restart_policy == other.restart_policy &&
94
+ image_pull_policy == other.image_pull_policy &&
69
95
  image_pull_secret == other.image_pull_secret &&
70
- supplemental_groups == other.supplemental_groups
96
+ supplemental_groups == other.supplemental_groups &&
97
+ startup_probe.to_h == other.startup_probe.to_h
71
98
  end
72
99
  end
73
100
 
@@ -39,7 +39,7 @@ spec:
39
39
  containers:
40
40
  - name: "<%= spec.container.name %>"
41
41
  image: <%= spec.container.image %>
42
- imagePullPolicy: IfNotPresent
42
+ imagePullPolicy: <%= spec.container.image_pull_policy %>
43
43
  <%- unless spec.container.working_dir.empty? -%>
44
44
  workingDir: "<%= spec.container.working_dir %>"
45
45
  <%- end -%>
@@ -48,6 +48,14 @@ spec:
48
48
  valueFrom:
49
49
  fieldRef:
50
50
  fieldPath: metadata.name
51
+ - name: POD_NAMESPACE
52
+ valueFrom:
53
+ fieldRef:
54
+ fieldPath: metadata.namespace
55
+ <%- unless spec.container.port.nil? -%>
56
+ - name: POD_PORT
57
+ value: "<%= spec.container.port %>"
58
+ <%- end -%>
51
59
  <%- spec.container.env.each_pair do |name, value| -%>
52
60
  - name: <%= name %>
53
61
  value: "<%= value %>"
@@ -61,9 +69,16 @@ spec:
61
69
  <%- unless spec.container.port.nil? -%>
62
70
  ports:
63
71
  - containerPort: <%= spec.container.port %>
72
+ startupProbe:
73
+ tcpSocket:
74
+ port: <%= spec.container.startup_probe.port %>
75
+ initialDelaySeconds: <%= spec.container.startup_probe.initial_delay_seconds %>
76
+ failureThreshold: <%= spec.container.startup_probe.failure_threshold %>
77
+ periodSeconds: <%= spec.container.startup_probe.period_seconds %>
64
78
  <%- end -%>
65
- <%- if configmap.mounts? || !all_mounts.empty? -%>
79
+ <%- if !all_mounts.empty? || (!configmap.nil? && configmap.mounts?) -%>
66
80
  volumeMounts:
81
+ <%- unless configmap.nil? -%>
67
82
  <%- configmap.files.each do |file| -%>
68
83
  <%- next if file.mount_path.nil? -%>
69
84
  - name: configmap-volume
@@ -72,6 +87,7 @@ spec:
72
87
  subPath: <%= file.sub_path %>
73
88
  <%- end # end unless file.sub_path.nil? -%>
74
89
  <%- end # end configmap.files.each -%>
90
+ <%- end # unless configmap.nil? -%>
75
91
  <%- all_mounts.each do |mount| -%>
76
92
  - name: <%= mount[:name] %>
77
93
  mountPath: <%= mount[:destination_path] %>
@@ -81,25 +97,36 @@ spec:
81
97
  limits:
82
98
  memory: "<%= spec.container.memory %>"
83
99
  cpu: "<%= spec.container.cpu %>"
100
+ <%- unless script.gpus_per_node.nil? -%>
101
+ <%= gpu_type %>: <%= script.gpus_per_node %>
102
+ <%- end -%>
84
103
  requests:
85
104
  memory: "<%= spec.container.memory %>"
86
105
  cpu: "<%= spec.container.cpu %>"
106
+ <%- unless script.gpus_per_node.nil? -%>
107
+ <%= gpu_type %>: <%= script.gpus_per_node %>
108
+ <%- end -%>
87
109
  securityContext:
88
110
  allowPrivilegeEscalation: false
89
111
  capabilities:
90
112
  drop:
91
113
  - all
92
114
  privileged: false
93
- <%- unless spec.init_containers.nil? -%>
115
+ <%- unless spec.init_containers.empty? -%>
94
116
  initContainers:
95
117
  <%- spec.init_containers.each do |ctr| -%>
96
118
  - name: "<%= ctr.name %>"
97
119
  image: "<%= ctr.image %>"
120
+ imagePullPolicy: <%= ctr.image_pull_policy %>
98
121
  env:
99
122
  - name: POD_NAME
100
123
  valueFrom:
101
124
  fieldRef:
102
125
  fieldPath: metadata.name
126
+ - name: POD_NAMESPACE
127
+ valueFrom:
128
+ fieldRef:
129
+ fieldPath: metadata.namespace
103
130
  <%- ctr.env.each_pair do |name, value| -%>
104
131
  - name: <%= name %>
105
132
  value: "<%= value %>"
@@ -108,8 +135,9 @@ spec:
108
135
  <%- ctr.command.each do |cmd| -%>
109
136
  - "<%= cmd %>"
110
137
  <%- end # command loop -%>
111
- <%- if configmap.init_mounts? || !all_mounts.empty? -%>
138
+ <%- if !all_mounts.empty? || (!configmap.nil? && configmap.init_mounts?) -%>
112
139
  volumeMounts:
140
+ <%- unless configmap.nil? -%>
113
141
  <%- configmap.files.each do |file| -%>
114
142
  <%- next if file.init_mount_path.nil? -%>
115
143
  - name: configmap-volume
@@ -118,6 +146,7 @@ spec:
118
146
  subPath: <%= file.init_sub_path %>
119
147
  <%- end # end unless file.sub_path.nil? -%>
120
148
  <%- end # end configmap.files.each -%>
149
+ <%- end # unless configmap.nil? -%>
121
150
  <%- all_mounts.each do |mount| -%>
122
151
  - name: <%= mount[:name] %>
123
152
  mountPath: <%= mount[:destination_path] %>
@@ -152,6 +181,12 @@ spec:
152
181
  <%- end # if mount is [host,nfs] -%>
153
182
  <%- end # for each mount -%>
154
183
  <%- end # (configmap.to_s.empty? || all_mounts.empty?) -%>
184
+ <%- unless node_selector.empty? -%>
185
+ nodeSelector:
186
+ <%- node_selector.each_pair do |key, value| -%>
187
+ <%= key %>: "<%= value %>"
188
+ <%- end # node_selector.each_pair -%>
189
+ <%- end #unless node_selector.empty? -%>
155
190
  ---
156
191
  <%- unless spec.container.port.nil? -%>
157
192
  apiVersion: v1
@@ -161,6 +196,8 @@ metadata:
161
196
  namespace: <%= namespace %>
162
197
  labels:
163
198
  job: <%= id %>
199
+ app.kubernetes.io/name: <%= container.name %>
200
+ app.kubernetes.io/managed-by: open-ondemand
164
201
  spec:
165
202
  selector:
166
203
  job: <%= id %>
@@ -170,8 +207,8 @@ spec:
170
207
  targetPort: <%= spec.container.port %>
171
208
  type: NodePort
172
209
  <%- end # end for service -%>
173
- ---
174
210
  <%- unless configmap.nil? -%>
211
+ ---
175
212
  apiVersion: v1
176
213
  kind: ConfigMap
177
214
  metadata:
@@ -179,6 +216,8 @@ metadata:
179
216
  namespace: <%= namespace %>
180
217
  labels:
181
218
  job: <%= id %>
219
+ app.kubernetes.io/name: <%= container.name %>
220
+ app.kubernetes.io/managed-by: open-ondemand
182
221
  data:
183
222
  <%- configmap.files.each do |file| -%>
184
223
  <%- next if file.data.nil? || file.filename.nil? -%>
@@ -423,6 +423,7 @@ module OodCore
423
423
  args.concat ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
424
424
  args.concat ['-a', script.job_array_request] unless script.job_array_request.nil?
425
425
  args.concat ['--qos', script.qos] unless script.qos.nil?
426
+ args.concat ['--gpus-per-node', script.gpus_per_node] unless script.gpus_per_node.nil?
426
427
  # ignore nodes, don't know how to do this for slurm
427
428
 
428
429
  # Set dependencies
@@ -159,6 +159,8 @@ module OodCore
159
159
  args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
160
160
  args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
161
161
  args.concat ['-l', "qos=#{script.qos}"] unless script.qos.nil?
162
+ args.concat ['-l', "gpus=#{script.gpus_per_node}"] unless script.gpus_per_node.nil?
163
+
162
164
  # Set environment variables
163
165
  env = script.job_environment.to_h
164
166
  args.concat ["-v", env.keys.join(",")] unless env.empty?
@@ -103,6 +103,10 @@ module OodCore
103
103
  # @return [String, nil] qos
104
104
  attr_reader :qos
105
105
 
106
+ # The GPUs per node for the job
107
+ # @return [Integer, nil] gpus per node
108
+ attr_reader :gpus_per_node
109
+
106
110
  # Object detailing any native specifications that are implementation specific
107
111
  # @note Should not be used at all costs.
108
112
  # @return [Object, nil] native specifications
@@ -136,6 +140,7 @@ module OodCore
136
140
  # @param accounting_id [#to_s, nil] accounting id
137
141
  # @param job_array_request [#to_s, nil] job array request
138
142
  # @param qos [#to_s, nil] qos
143
+ # @param gpus_per_node [#to_i, nil] gpus per node
139
144
  # @param native [Object, nil] native specifications
140
145
  # @param copy_environment [Boolean, nil] copy the environment
141
146
  def initialize(content:, args: nil, submit_as_hold: nil, rerunnable: nil,
@@ -145,7 +150,7 @@ module OodCore
145
150
  output_path: nil, error_path: nil, reservation_id: nil,
146
151
  queue_name: nil, priority: nil, start_time: nil,
147
152
  wall_time: nil, accounting_id: nil, job_array_request: nil,
148
- qos: nil, native: nil, copy_environment: nil, **_)
153
+ qos: nil, gpus_per_node: nil, native: nil, copy_environment: nil, **_)
149
154
  @content = content.to_s
150
155
 
151
156
  @submit_as_hold = submit_as_hold
@@ -170,6 +175,7 @@ module OodCore
170
175
  @accounting_id = accounting_id && accounting_id.to_s
171
176
  @job_array_request = job_array_request && job_array_request.to_s
172
177
  @qos = qos && qos.to_s
178
+ @gpus_per_node = gpus_per_node && gpus_per_node.to_i
173
179
  @native = native
174
180
  @copy_environment = (copy_environment.nil?) ? nil : !! copy_environment
175
181
  end
@@ -200,6 +206,7 @@ module OodCore
200
206
  accounting_id: accounting_id,
201
207
  job_array_request: job_array_request,
202
208
  qos: qos,
209
+ gpus_per_node: gpus_per_node,
203
210
  native: native,
204
211
  copy_environment: copy_environment
205
212
  }
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.16.0"
3
+ VERSION = "0.17.2"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.0
4
+ version: 0.17.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2021-04-20 00:00:00.000000000 Z
13
+ date: 2021-07-16 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -160,6 +160,7 @@ executables: []
160
160
  extensions: []
161
161
  extra_rdoc_files: []
162
162
  files:
163
+ - ".github/dependabot.yml"
163
164
  - ".github/workflows/test.yml"
164
165
  - ".gitignore"
165
166
  - ".rspec"