ood_core 0.16.1 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19665b6db28d01da39093dc90d4a5023ca12264f07b932aebc8ec8c443bafa25
4
- data.tar.gz: d9c8c6d8f30851ea9138c8325aafd750823534a51f36601a20366265ac4feec2
3
+ metadata.gz: 266e24cd2e11c2c712b6128bdc3f82bf9781ae9885f8f0eb21439cb80c889c90
4
+ data.tar.gz: 01e682b6313468371076cdfd4ff2df2f3c06c661af9b4d7d7a65b7dcf3e2d836
5
5
  SHA512:
6
- metadata.gz: 1ed1eaa873366ad5e825ed29c7401dd3bca4a424ab7a689a19479f297ec20d7e019cd53609006b0919a365dd0002eb0c1e9c0cabcc9f69579cf7ae81b33b3ae7
7
- data.tar.gz: 90a4cfa3ee8b1f76ef7e1f28df6d8e64725d1eaff005b4bd4ff7fc8f88e5bfda8a15e706636c18e7b5ac74451071eaea4e6814945ea25e95f6c7ed2de8fd2fec
6
+ metadata.gz: 22721c9d368ec44533d93914f977576ee77786ad0926976fa24067c7f353104edd32baffac34723b730e7f711c5b7581cf4f72f6232d050c719edcfc1b3cb14f
7
+ data.tar.gz: 5d99a3c782aad5e420333653e51073cdb6535b7689053452da7ee7ef234292769e70d4010d751aacd4576daf531700136c2aa0ef7a2efc31edcb29e4c01c8be4
data/CHANGELOG.md CHANGED
@@ -6,6 +6,37 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
6
6
  and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
7
7
 
8
8
  ## [Unreleased]
9
+
10
+ ## [0.17.0] - 5-26-2021
11
+
12
+ ### Fixed
13
+
14
+ - All Kubernetes resources now have the same labels in [280](https://github.com/OSC/ood_core/pull/280).
15
+ - Kubernetes does not crash when no configmap is defined in [282](https://github.com/OSC/ood_core/pull/282).
16
+ - Kubernetes will not specify init containers if there are none in
17
+ [284](https://github.com/OSC/ood_core/pull/284).
18
+
19
+ ### Added
20
+
21
+ - Kubernetes, Slurm and Torque now support the script option `gpus_per_node` in
22
+ [266](https://github.com/OSC/ood_core/pull/266).
23
+ - Kubernetes will now save the pod.yml into the staged root in
24
+ [277](https://github.com/OSC/ood_core/pull/277).
25
+ - Kubernetes now allows for node selector in [264](https://github.com/OSC/ood_core/pull/264).
26
+ - Kubernetes pods now have access the environment variable POD_NAMESPACE in
27
+ [275](https://github.com/OSC/ood_core/pull/275).
28
+ - Kubernetes pods can now specify the image pull policy in [272](https://github.com/OSC/ood_core/pull/272).
29
+ - Cluster config's batch_connect now support `ssh_allow` to disable sshing to compute
30
+ nodes per cluster in [286](https://github.com/OSC/ood_core/pull/286).
31
+ - Kubernetes will now add the templated script content to a configmap in
32
+ [273](https://github.com/OSC/ood_core/pull/273).
33
+
34
+ ### Changed
35
+
36
+ - Kubernetes username prefix no longer appends a - in [271](https://github.com/OSC/ood_core/pull/271).
37
+
38
+
39
+
9
40
  ## [0.16.1] - 2021-04-23
10
41
  ### Fixed
11
42
  - memorized some allow? variables to have better support around ACLS in
@@ -305,7 +336,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
305
336
  ### Added
306
337
  - Initial release!
307
338
 
308
- [Unreleased]: https://github.com/OSC/ood_core/compare/v0.16.1...HEAD
339
+ [Unreleased]: https://github.com/OSC/ood_core/compare/v0.16.0...HEAD
340
+ [0.17.0]: https://github.com/OSC/ood_core/compare/v0.16.1...v0.17.0
309
341
  [0.16.1]: https://github.com/OSC/ood_core/compare/v0.16.0...v0.16.1
310
342
  [0.16.0]: https://github.com/OSC/ood_core/compare/v0.15.1...v0.16.0
311
343
  [0.15.1]: https://github.com/OSC/ood_core/compare/v0.15.0...v0.15.1
@@ -147,6 +147,15 @@ module OodCore
147
147
  @allow = acls.all?(&:allow?)
148
148
  end
149
149
 
150
+ # Whether this cluster supports SSH to batch connect nodes
151
+ # @return [Boolean] whether cluster supports SSH to batch connect node
152
+ def batch_connect_ssh_allow?
153
+ return @batch_connect_ssh_allow if defined?(@batch_connect_ssh_allow)
154
+ return @batch_connect_ssh_allow = true if batch_connect_config.nil?
155
+
156
+ @batch_connect_ssh_allow = batch_connect_config.fetch(:ssh_allow, true)
157
+ end
158
+
150
159
  # The comparison operator
151
160
  # @param other [#to_sym] object to compare against
152
161
  # @return [Boolean] whether objects are equivalent
@@ -23,7 +23,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
23
23
  @cluster = options.fetch(:cluster, 'open-ondemand')
24
24
  @mounts = options.fetch(:mounts, []).map { |m| m.to_h.symbolize_keys }
25
25
  @all_namespaces = options.fetch(:all_namespaces, false)
26
- @username_prefix = options.fetch(:username_prefix, nil)
26
+ @username_prefix = options.fetch(:username_prefix, '')
27
27
  @namespace_prefix = options.fetch(:namespace_prefix, '')
28
28
 
29
29
  @using_context = false
@@ -45,6 +45,9 @@ class OodCore::Job::Adapters::Kubernetes::Batch
45
45
  raise ArgumentError, 'Must specify the script' if script.nil?
46
46
 
47
47
  resource_yml, id = generate_id_yml(script)
48
+ if !script.workdir.nil? && Dir.exist?(script.workdir)
49
+ File.open(File.join(script.workdir, 'pod.yml'), 'w') { |f| f.write resource_yml }
50
+ end
48
51
  call("#{formatted_ns_cmd} create -f -", stdin: resource_yml)
49
52
 
50
53
  id
@@ -146,7 +149,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
146
149
  end
147
150
 
148
151
  def k8s_username
149
- username_prefix.nil? ? username : "#{username_prefix}-#{username}"
152
+ "#{username_prefix}#{username}"
150
153
  end
151
154
 
152
155
  def user
@@ -189,10 +192,12 @@ class OodCore::Job::Adapters::Kubernetes::Batch
189
192
  native_data = script.native
190
193
  container = helper.container_from_native(native_data[:container], default_env)
191
194
  id = generate_id(container.name)
192
- configmap = helper.configmap_from_native(native_data, id)
195
+ configmap = helper.configmap_from_native(native_data, id, script.content)
193
196
  init_containers = helper.init_ctrs_from_native(native_data[:init_containers], container.env)
194
197
  spec = OodCore::Job::Adapters::Kubernetes::Resources::PodSpec.new(container, init_containers: init_containers)
195
198
  all_mounts = native_data[:mounts].nil? ? mounts : mounts + native_data[:mounts]
199
+ node_selector = native_data[:node_selector].nil? ? {} : native_data[:node_selector]
200
+ gpu_type = native_data[:gpu_type].nil? ? "nvidia.com/gpu" : native_data[:gpu_type]
196
201
 
197
202
  template = ERB.new(File.read(resource_file), nil, '-')
198
203
 
@@ -53,6 +53,7 @@ class OodCore::Job::Adapters::Kubernetes::Helper
53
53
  cpu: container[:cpu],
54
54
  working_dir: container[:working_dir],
55
55
  restart_policy: container[:restart_policy],
56
+ image_pull_policy: container[:image_pull_policy],
56
57
  image_pull_secret: container[:image_pull_secret]
57
58
  )
58
59
  end
@@ -80,10 +81,18 @@ class OodCore::Job::Adapters::Kubernetes::Helper
80
81
  # the input configmap hash
81
82
  # @param id [#to_s]
82
83
  # the id to use for giving the configmap a name
84
+ # @param script_content [#to_s]
85
+ # the batch script content
83
86
  # @return [OodCore::Job::Adapters::Kubernetes::Resources::ConfigMap]
84
- def configmap_from_native(native, id)
85
- configmap = native.fetch(:configmap, nil)
86
- return nil if configmap.nil?
87
+ def configmap_from_native(native, id, script_content)
88
+ configmap = native.fetch(:configmap, {})
89
+ configmap[:files] ||= []
90
+ configmap[:files] << {
91
+ filename: 'script.sh',
92
+ data: script_content,
93
+ mount_path: '/ood/script.sh',
94
+ sub_path: 'script.sh',
95
+ } unless configmap[:files].any? { |f| f[:filename] == 'script.sh' }
87
96
 
88
97
  OodCore::Job::Adapters::Kubernetes::Resources::ConfigMap.new(
89
98
  configmap_name(id),
@@ -35,11 +35,11 @@ module OodCore::Job::Adapters::Kubernetes::Resources
35
35
 
36
36
  class Container
37
37
  attr_accessor :name, :image, :command, :port, :env, :memory, :cpu, :working_dir,
38
- :restart_policy, :image_pull_secret, :supplemental_groups
38
+ :restart_policy, :image_pull_policy, :image_pull_secret, :supplemental_groups
39
39
 
40
40
  def initialize(
41
41
  name, image, command: [], port: nil, env: {}, memory: "4Gi", cpu: "1",
42
- working_dir: "", restart_policy: "Never", image_pull_secret: nil, supplemental_groups: []
42
+ working_dir: "", restart_policy: "Never", image_pull_policy: nil, image_pull_secret: nil, supplemental_groups: []
43
43
  )
44
44
  raise ArgumentError, "containers need valid names and images" unless name && image
45
45
 
@@ -52,6 +52,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
52
52
  @cpu = cpu.nil? ? "1" : cpu
53
53
  @working_dir = working_dir.nil? ? "" : working_dir
54
54
  @restart_policy = restart_policy.nil? ? "Never" : restart_policy
55
+ @image_pull_policy = image_pull_policy.nil? ? "IfNotPresent" : image_pull_policy
55
56
  @image_pull_secret = image_pull_secret
56
57
  @supplemental_groups = supplemental_groups.nil? ? [] : supplemental_groups
57
58
  end
@@ -66,6 +67,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
66
67
  cpu == other.cpu &&
67
68
  working_dir == other.working_dir &&
68
69
  restart_policy == other.restart_policy &&
70
+ image_pull_policy == other.image_pull_policy &&
69
71
  image_pull_secret == other.image_pull_secret &&
70
72
  supplemental_groups == other.supplemental_groups
71
73
  end
@@ -39,7 +39,7 @@ spec:
39
39
  containers:
40
40
  - name: "<%= spec.container.name %>"
41
41
  image: <%= spec.container.image %>
42
- imagePullPolicy: IfNotPresent
42
+ imagePullPolicy: <%= spec.container.image_pull_policy %>
43
43
  <%- unless spec.container.working_dir.empty? -%>
44
44
  workingDir: "<%= spec.container.working_dir %>"
45
45
  <%- end -%>
@@ -48,6 +48,10 @@ spec:
48
48
  valueFrom:
49
49
  fieldRef:
50
50
  fieldPath: metadata.name
51
+ - name: POD_NAMESPACE
52
+ valueFrom:
53
+ fieldRef:
54
+ fieldPath: metadata.namespace
51
55
  <%- spec.container.env.each_pair do |name, value| -%>
52
56
  - name: <%= name %>
53
57
  value: "<%= value %>"
@@ -62,8 +66,9 @@ spec:
62
66
  ports:
63
67
  - containerPort: <%= spec.container.port %>
64
68
  <%- end -%>
65
- <%- if configmap.mounts? || !all_mounts.empty? -%>
69
+ <%- if !all_mounts.empty? || (!configmap.nil? && configmap.mounts?) -%>
66
70
  volumeMounts:
71
+ <%- unless configmap.nil? -%>
67
72
  <%- configmap.files.each do |file| -%>
68
73
  <%- next if file.mount_path.nil? -%>
69
74
  - name: configmap-volume
@@ -72,6 +77,7 @@ spec:
72
77
  subPath: <%= file.sub_path %>
73
78
  <%- end # end unless file.sub_path.nil? -%>
74
79
  <%- end # end configmap.files.each -%>
80
+ <%- end # unless configmap.nil? -%>
75
81
  <%- all_mounts.each do |mount| -%>
76
82
  - name: <%= mount[:name] %>
77
83
  mountPath: <%= mount[:destination_path] %>
@@ -81,25 +87,36 @@ spec:
81
87
  limits:
82
88
  memory: "<%= spec.container.memory %>"
83
89
  cpu: "<%= spec.container.cpu %>"
90
+ <%- unless script.gpus_per_node.nil? -%>
91
+ <%= gpu_type %>: <%= script.gpus_per_node %>
92
+ <%- end -%>
84
93
  requests:
85
94
  memory: "<%= spec.container.memory %>"
86
95
  cpu: "<%= spec.container.cpu %>"
96
+ <%- unless script.gpus_per_node.nil? -%>
97
+ <%= gpu_type %>: <%= script.gpus_per_node %>
98
+ <%- end -%>
87
99
  securityContext:
88
100
  allowPrivilegeEscalation: false
89
101
  capabilities:
90
102
  drop:
91
103
  - all
92
104
  privileged: false
93
- <%- unless spec.init_containers.nil? -%>
105
+ <%- unless spec.init_containers.empty? -%>
94
106
  initContainers:
95
107
  <%- spec.init_containers.each do |ctr| -%>
96
108
  - name: "<%= ctr.name %>"
97
109
  image: "<%= ctr.image %>"
110
+ imagePullPolicy: <%= ctr.image_pull_policy %>
98
111
  env:
99
112
  - name: POD_NAME
100
113
  valueFrom:
101
114
  fieldRef:
102
115
  fieldPath: metadata.name
116
+ - name: POD_NAMESPACE
117
+ valueFrom:
118
+ fieldRef:
119
+ fieldPath: metadata.namespace
103
120
  <%- ctr.env.each_pair do |name, value| -%>
104
121
  - name: <%= name %>
105
122
  value: "<%= value %>"
@@ -108,8 +125,9 @@ spec:
108
125
  <%- ctr.command.each do |cmd| -%>
109
126
  - "<%= cmd %>"
110
127
  <%- end # command loop -%>
111
- <%- if configmap.init_mounts? || !all_mounts.empty? -%>
128
+ <%- if !all_mounts.empty? || (!configmap.nil? && configmap.init_mounts?) -%>
112
129
  volumeMounts:
130
+ <%- unless configmap.nil? -%>
113
131
  <%- configmap.files.each do |file| -%>
114
132
  <%- next if file.init_mount_path.nil? -%>
115
133
  - name: configmap-volume
@@ -118,6 +136,7 @@ spec:
118
136
  subPath: <%= file.init_sub_path %>
119
137
  <%- end # end unless file.sub_path.nil? -%>
120
138
  <%- end # end configmap.files.each -%>
139
+ <%- end # unless configmap.nil? -%>
121
140
  <%- all_mounts.each do |mount| -%>
122
141
  - name: <%= mount[:name] %>
123
142
  mountPath: <%= mount[:destination_path] %>
@@ -152,6 +171,12 @@ spec:
152
171
  <%- end # if mount is [host,nfs] -%>
153
172
  <%- end # for each mount -%>
154
173
  <%- end # (configmap.to_s.empty? || all_mounts.empty?) -%>
174
+ <%- unless node_selector.empty? -%>
175
+ nodeSelector:
176
+ <%- node_selector.each_pair do |key, value| -%>
177
+ <%= key %>: "<%= value %>"
178
+ <%- end # node_selector.each_pair -%>
179
+ <%- end #unless node_selector.empty? -%>
155
180
  ---
156
181
  <%- unless spec.container.port.nil? -%>
157
182
  apiVersion: v1
@@ -161,6 +186,8 @@ metadata:
161
186
  namespace: <%= namespace %>
162
187
  labels:
163
188
  job: <%= id %>
189
+ app.kubernetes.io/name: <%= container.name %>
190
+ app.kubernetes.io/managed-by: open-ondemand
164
191
  spec:
165
192
  selector:
166
193
  job: <%= id %>
@@ -170,8 +197,8 @@ spec:
170
197
  targetPort: <%= spec.container.port %>
171
198
  type: NodePort
172
199
  <%- end # end for service -%>
173
- ---
174
200
  <%- unless configmap.nil? -%>
201
+ ---
175
202
  apiVersion: v1
176
203
  kind: ConfigMap
177
204
  metadata:
@@ -179,6 +206,8 @@ metadata:
179
206
  namespace: <%= namespace %>
180
207
  labels:
181
208
  job: <%= id %>
209
+ app.kubernetes.io/name: <%= container.name %>
210
+ app.kubernetes.io/managed-by: open-ondemand
182
211
  data:
183
212
  <%- configmap.files.each do |file| -%>
184
213
  <%- next if file.data.nil? || file.filename.nil? -%>
@@ -423,6 +423,7 @@ module OodCore
423
423
  args.concat ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
424
424
  args.concat ['-a', script.job_array_request] unless script.job_array_request.nil?
425
425
  args.concat ['--qos', script.qos] unless script.qos.nil?
426
+ args.concat ['--gpus-per-node', script.gpus_per_node] unless script.gpus_per_node.nil?
426
427
  # ignore nodes, don't know how to do this for slurm
427
428
 
428
429
  # Set dependencies
@@ -159,6 +159,8 @@ module OodCore
159
159
  args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
160
160
  args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
161
161
  args.concat ['-l', "qos=#{script.qos}"] unless script.qos.nil?
162
+ args.concat ['-l', "gpus=#{script.gpus_per_node}"] unless script.gpus_per_node.nil?
163
+
162
164
  # Set environment variables
163
165
  env = script.job_environment.to_h
164
166
  args.concat ["-v", env.keys.join(",")] unless env.empty?
@@ -103,6 +103,10 @@ module OodCore
103
103
  # @return [String, nil] qos
104
104
  attr_reader :qos
105
105
 
106
+ # The GPUs per node for the job
107
+ # @return [Integer, nil] gpus per node
108
+ attr_reader :gpus_per_node
109
+
106
110
  # Object detailing any native specifications that are implementation specific
107
111
  # @note Should not be used at all costs.
108
112
  # @return [Object, nil] native specifications
@@ -136,6 +140,7 @@ module OodCore
136
140
  # @param accounting_id [#to_s, nil] accounting id
137
141
  # @param job_array_request [#to_s, nil] job array request
138
142
  # @param qos [#to_s, nil] qos
143
+ # @param gpus_per_node [#to_i, nil] gpus per node
139
144
  # @param native [Object, nil] native specifications
140
145
  # @param copy_environment [Boolean, nil] copy the environment
141
146
  def initialize(content:, args: nil, submit_as_hold: nil, rerunnable: nil,
@@ -145,7 +150,7 @@ module OodCore
145
150
  output_path: nil, error_path: nil, reservation_id: nil,
146
151
  queue_name: nil, priority: nil, start_time: nil,
147
152
  wall_time: nil, accounting_id: nil, job_array_request: nil,
148
- qos: nil, native: nil, copy_environment: nil, **_)
153
+ qos: nil, gpus_per_node: nil, native: nil, copy_environment: nil, **_)
149
154
  @content = content.to_s
150
155
 
151
156
  @submit_as_hold = submit_as_hold
@@ -170,6 +175,7 @@ module OodCore
170
175
  @accounting_id = accounting_id && accounting_id.to_s
171
176
  @job_array_request = job_array_request && job_array_request.to_s
172
177
  @qos = qos && qos.to_s
178
+ @gpus_per_node = gpus_per_node && gpus_per_node.to_i
173
179
  @native = native
174
180
  @copy_environment = (copy_environment.nil?) ? nil : !! copy_environment
175
181
  end
@@ -200,6 +206,7 @@ module OodCore
200
206
  accounting_id: accounting_id,
201
207
  job_array_request: job_array_request,
202
208
  qos: qos,
209
+ gpus_per_node: gpus_per_node,
203
210
  native: native,
204
211
  copy_environment: copy_environment
205
212
  }
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.16.1"
3
+ VERSION = "0.17.0"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.1
4
+ version: 0.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2021-04-23 00:00:00.000000000 Z
13
+ date: 2021-05-26 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support