ood_core 0.16.1 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19665b6db28d01da39093dc90d4a5023ca12264f07b932aebc8ec8c443bafa25
4
- data.tar.gz: d9c8c6d8f30851ea9138c8325aafd750823534a51f36601a20366265ac4feec2
3
+ metadata.gz: 266e24cd2e11c2c712b6128bdc3f82bf9781ae9885f8f0eb21439cb80c889c90
4
+ data.tar.gz: 01e682b6313468371076cdfd4ff2df2f3c06c661af9b4d7d7a65b7dcf3e2d836
5
5
  SHA512:
6
- metadata.gz: 1ed1eaa873366ad5e825ed29c7401dd3bca4a424ab7a689a19479f297ec20d7e019cd53609006b0919a365dd0002eb0c1e9c0cabcc9f69579cf7ae81b33b3ae7
7
- data.tar.gz: 90a4cfa3ee8b1f76ef7e1f28df6d8e64725d1eaff005b4bd4ff7fc8f88e5bfda8a15e706636c18e7b5ac74451071eaea4e6814945ea25e95f6c7ed2de8fd2fec
6
+ metadata.gz: 22721c9d368ec44533d93914f977576ee77786ad0926976fa24067c7f353104edd32baffac34723b730e7f711c5b7581cf4f72f6232d050c719edcfc1b3cb14f
7
+ data.tar.gz: 5d99a3c782aad5e420333653e51073cdb6535b7689053452da7ee7ef234292769e70d4010d751aacd4576daf531700136c2aa0ef7a2efc31edcb29e4c01c8be4
data/CHANGELOG.md CHANGED
@@ -6,6 +6,37 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
6
6
  and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
7
7
 
8
8
  ## [Unreleased]
9
+
10
+ ## [0.17.0] - 5-26-2021
11
+
12
+ ### Fixed
13
+
14
+ - All Kubernetes resources now have the same labels in [280](https://github.com/OSC/ood_core/pull/280).
15
+ - Kubernetes does not crash when no configmap is defined in [282](https://github.com/OSC/ood_core/pull/282).
16
+ - Kubernetes will not specify init containers if there are none in
17
+ [284](https://github.com/OSC/ood_core/pull/284).
18
+
19
+ ### Added
20
+
21
+ - Kubernetes, Slurm and Torque now support the script option `gpus_per_node` in
22
+ [266](https://github.com/OSC/ood_core/pull/266).
23
+ - Kubernetes will now save the pod.yml into the staged root in
24
+ [277](https://github.com/OSC/ood_core/pull/277).
25
+ - Kubernetes now allows for node selector in [264](https://github.com/OSC/ood_core/pull/264).
26
+ - Kubernetes pods now have access the environment variable POD_NAMESPACE in
27
+ [275](https://github.com/OSC/ood_core/pull/275).
28
+ - Kubernetes pods can now specify the image pull policy in [272](https://github.com/OSC/ood_core/pull/272).
29
+ - Cluster config's batch_connect now support `ssh_allow` to disable sshing to compute
30
+ nodes per cluster in [286](https://github.com/OSC/ood_core/pull/286).
31
+ - Kubernetes will now add the templated script content to a configmap in
32
+ [273](https://github.com/OSC/ood_core/pull/273).
33
+
34
+ ### Changed
35
+
36
+ - Kubernetes username prefix no longer appends a - in [271](https://github.com/OSC/ood_core/pull/271).
37
+
38
+
39
+
9
40
  ## [0.16.1] - 2021-04-23
10
41
  ### Fixed
11
42
  - memorized some allow? variables to have better support around ACLS in
@@ -305,7 +336,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
305
336
  ### Added
306
337
  - Initial release!
307
338
 
308
- [Unreleased]: https://github.com/OSC/ood_core/compare/v0.16.1...HEAD
339
+ [Unreleased]: https://github.com/OSC/ood_core/compare/v0.16.0...HEAD
340
+ [0.17.0]: https://github.com/OSC/ood_core/compare/v0.16.1...v0.17.0
309
341
  [0.16.1]: https://github.com/OSC/ood_core/compare/v0.16.0...v0.16.1
310
342
  [0.16.0]: https://github.com/OSC/ood_core/compare/v0.15.1...v0.16.0
311
343
  [0.15.1]: https://github.com/OSC/ood_core/compare/v0.15.0...v0.15.1
@@ -147,6 +147,15 @@ module OodCore
147
147
  @allow = acls.all?(&:allow?)
148
148
  end
149
149
 
150
+ # Whether this cluster supports SSH to batch connect nodes
151
+ # @return [Boolean] whether cluster supports SSH to batch connect node
152
+ def batch_connect_ssh_allow?
153
+ return @batch_connect_ssh_allow if defined?(@batch_connect_ssh_allow)
154
+ return @batch_connect_ssh_allow = true if batch_connect_config.nil?
155
+
156
+ @batch_connect_ssh_allow = batch_connect_config.fetch(:ssh_allow, true)
157
+ end
158
+
150
159
  # The comparison operator
151
160
  # @param other [#to_sym] object to compare against
152
161
  # @return [Boolean] whether objects are equivalent
@@ -23,7 +23,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
23
23
  @cluster = options.fetch(:cluster, 'open-ondemand')
24
24
  @mounts = options.fetch(:mounts, []).map { |m| m.to_h.symbolize_keys }
25
25
  @all_namespaces = options.fetch(:all_namespaces, false)
26
- @username_prefix = options.fetch(:username_prefix, nil)
26
+ @username_prefix = options.fetch(:username_prefix, '')
27
27
  @namespace_prefix = options.fetch(:namespace_prefix, '')
28
28
 
29
29
  @using_context = false
@@ -45,6 +45,9 @@ class OodCore::Job::Adapters::Kubernetes::Batch
45
45
  raise ArgumentError, 'Must specify the script' if script.nil?
46
46
 
47
47
  resource_yml, id = generate_id_yml(script)
48
+ if !script.workdir.nil? && Dir.exist?(script.workdir)
49
+ File.open(File.join(script.workdir, 'pod.yml'), 'w') { |f| f.write resource_yml }
50
+ end
48
51
  call("#{formatted_ns_cmd} create -f -", stdin: resource_yml)
49
52
 
50
53
  id
@@ -146,7 +149,7 @@ class OodCore::Job::Adapters::Kubernetes::Batch
146
149
  end
147
150
 
148
151
  def k8s_username
149
- username_prefix.nil? ? username : "#{username_prefix}-#{username}"
152
+ "#{username_prefix}#{username}"
150
153
  end
151
154
 
152
155
  def user
@@ -189,10 +192,12 @@ class OodCore::Job::Adapters::Kubernetes::Batch
189
192
  native_data = script.native
190
193
  container = helper.container_from_native(native_data[:container], default_env)
191
194
  id = generate_id(container.name)
192
- configmap = helper.configmap_from_native(native_data, id)
195
+ configmap = helper.configmap_from_native(native_data, id, script.content)
193
196
  init_containers = helper.init_ctrs_from_native(native_data[:init_containers], container.env)
194
197
  spec = OodCore::Job::Adapters::Kubernetes::Resources::PodSpec.new(container, init_containers: init_containers)
195
198
  all_mounts = native_data[:mounts].nil? ? mounts : mounts + native_data[:mounts]
199
+ node_selector = native_data[:node_selector].nil? ? {} : native_data[:node_selector]
200
+ gpu_type = native_data[:gpu_type].nil? ? "nvidia.com/gpu" : native_data[:gpu_type]
196
201
 
197
202
  template = ERB.new(File.read(resource_file), nil, '-')
198
203
 
@@ -53,6 +53,7 @@ class OodCore::Job::Adapters::Kubernetes::Helper
53
53
  cpu: container[:cpu],
54
54
  working_dir: container[:working_dir],
55
55
  restart_policy: container[:restart_policy],
56
+ image_pull_policy: container[:image_pull_policy],
56
57
  image_pull_secret: container[:image_pull_secret]
57
58
  )
58
59
  end
@@ -80,10 +81,18 @@ class OodCore::Job::Adapters::Kubernetes::Helper
80
81
  # the input configmap hash
81
82
  # @param id [#to_s]
82
83
  # the id to use for giving the configmap a name
84
+ # @param script_content [#to_s]
85
+ # the batch script content
83
86
  # @return [OodCore::Job::Adapters::Kubernetes::Resources::ConfigMap]
84
- def configmap_from_native(native, id)
85
- configmap = native.fetch(:configmap, nil)
86
- return nil if configmap.nil?
87
+ def configmap_from_native(native, id, script_content)
88
+ configmap = native.fetch(:configmap, {})
89
+ configmap[:files] ||= []
90
+ configmap[:files] << {
91
+ filename: 'script.sh',
92
+ data: script_content,
93
+ mount_path: '/ood/script.sh',
94
+ sub_path: 'script.sh',
95
+ } unless configmap[:files].any? { |f| f[:filename] == 'script.sh' }
87
96
 
88
97
  OodCore::Job::Adapters::Kubernetes::Resources::ConfigMap.new(
89
98
  configmap_name(id),
@@ -35,11 +35,11 @@ module OodCore::Job::Adapters::Kubernetes::Resources
35
35
 
36
36
  class Container
37
37
  attr_accessor :name, :image, :command, :port, :env, :memory, :cpu, :working_dir,
38
- :restart_policy, :image_pull_secret, :supplemental_groups
38
+ :restart_policy, :image_pull_policy, :image_pull_secret, :supplemental_groups
39
39
 
40
40
  def initialize(
41
41
  name, image, command: [], port: nil, env: {}, memory: "4Gi", cpu: "1",
42
- working_dir: "", restart_policy: "Never", image_pull_secret: nil, supplemental_groups: []
42
+ working_dir: "", restart_policy: "Never", image_pull_policy: nil, image_pull_secret: nil, supplemental_groups: []
43
43
  )
44
44
  raise ArgumentError, "containers need valid names and images" unless name && image
45
45
 
@@ -52,6 +52,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
52
52
  @cpu = cpu.nil? ? "1" : cpu
53
53
  @working_dir = working_dir.nil? ? "" : working_dir
54
54
  @restart_policy = restart_policy.nil? ? "Never" : restart_policy
55
+ @image_pull_policy = image_pull_policy.nil? ? "IfNotPresent" : image_pull_policy
55
56
  @image_pull_secret = image_pull_secret
56
57
  @supplemental_groups = supplemental_groups.nil? ? [] : supplemental_groups
57
58
  end
@@ -66,6 +67,7 @@ module OodCore::Job::Adapters::Kubernetes::Resources
66
67
  cpu == other.cpu &&
67
68
  working_dir == other.working_dir &&
68
69
  restart_policy == other.restart_policy &&
70
+ image_pull_policy == other.image_pull_policy &&
69
71
  image_pull_secret == other.image_pull_secret &&
70
72
  supplemental_groups == other.supplemental_groups
71
73
  end
@@ -39,7 +39,7 @@ spec:
39
39
  containers:
40
40
  - name: "<%= spec.container.name %>"
41
41
  image: <%= spec.container.image %>
42
- imagePullPolicy: IfNotPresent
42
+ imagePullPolicy: <%= spec.container.image_pull_policy %>
43
43
  <%- unless spec.container.working_dir.empty? -%>
44
44
  workingDir: "<%= spec.container.working_dir %>"
45
45
  <%- end -%>
@@ -48,6 +48,10 @@ spec:
48
48
  valueFrom:
49
49
  fieldRef:
50
50
  fieldPath: metadata.name
51
+ - name: POD_NAMESPACE
52
+ valueFrom:
53
+ fieldRef:
54
+ fieldPath: metadata.namespace
51
55
  <%- spec.container.env.each_pair do |name, value| -%>
52
56
  - name: <%= name %>
53
57
  value: "<%= value %>"
@@ -62,8 +66,9 @@ spec:
62
66
  ports:
63
67
  - containerPort: <%= spec.container.port %>
64
68
  <%- end -%>
65
- <%- if configmap.mounts? || !all_mounts.empty? -%>
69
+ <%- if !all_mounts.empty? || (!configmap.nil? && configmap.mounts?) -%>
66
70
  volumeMounts:
71
+ <%- unless configmap.nil? -%>
67
72
  <%- configmap.files.each do |file| -%>
68
73
  <%- next if file.mount_path.nil? -%>
69
74
  - name: configmap-volume
@@ -72,6 +77,7 @@ spec:
72
77
  subPath: <%= file.sub_path %>
73
78
  <%- end # end unless file.sub_path.nil? -%>
74
79
  <%- end # end configmap.files.each -%>
80
+ <%- end # unless configmap.nil? -%>
75
81
  <%- all_mounts.each do |mount| -%>
76
82
  - name: <%= mount[:name] %>
77
83
  mountPath: <%= mount[:destination_path] %>
@@ -81,25 +87,36 @@ spec:
81
87
  limits:
82
88
  memory: "<%= spec.container.memory %>"
83
89
  cpu: "<%= spec.container.cpu %>"
90
+ <%- unless script.gpus_per_node.nil? -%>
91
+ <%= gpu_type %>: <%= script.gpus_per_node %>
92
+ <%- end -%>
84
93
  requests:
85
94
  memory: "<%= spec.container.memory %>"
86
95
  cpu: "<%= spec.container.cpu %>"
96
+ <%- unless script.gpus_per_node.nil? -%>
97
+ <%= gpu_type %>: <%= script.gpus_per_node %>
98
+ <%- end -%>
87
99
  securityContext:
88
100
  allowPrivilegeEscalation: false
89
101
  capabilities:
90
102
  drop:
91
103
  - all
92
104
  privileged: false
93
- <%- unless spec.init_containers.nil? -%>
105
+ <%- unless spec.init_containers.empty? -%>
94
106
  initContainers:
95
107
  <%- spec.init_containers.each do |ctr| -%>
96
108
  - name: "<%= ctr.name %>"
97
109
  image: "<%= ctr.image %>"
110
+ imagePullPolicy: <%= ctr.image_pull_policy %>
98
111
  env:
99
112
  - name: POD_NAME
100
113
  valueFrom:
101
114
  fieldRef:
102
115
  fieldPath: metadata.name
116
+ - name: POD_NAMESPACE
117
+ valueFrom:
118
+ fieldRef:
119
+ fieldPath: metadata.namespace
103
120
  <%- ctr.env.each_pair do |name, value| -%>
104
121
  - name: <%= name %>
105
122
  value: "<%= value %>"
@@ -108,8 +125,9 @@ spec:
108
125
  <%- ctr.command.each do |cmd| -%>
109
126
  - "<%= cmd %>"
110
127
  <%- end # command loop -%>
111
- <%- if configmap.init_mounts? || !all_mounts.empty? -%>
128
+ <%- if !all_mounts.empty? || (!configmap.nil? && configmap.init_mounts?) -%>
112
129
  volumeMounts:
130
+ <%- unless configmap.nil? -%>
113
131
  <%- configmap.files.each do |file| -%>
114
132
  <%- next if file.init_mount_path.nil? -%>
115
133
  - name: configmap-volume
@@ -118,6 +136,7 @@ spec:
118
136
  subPath: <%= file.init_sub_path %>
119
137
  <%- end # end unless file.sub_path.nil? -%>
120
138
  <%- end # end configmap.files.each -%>
139
+ <%- end # unless configmap.nil? -%>
121
140
  <%- all_mounts.each do |mount| -%>
122
141
  - name: <%= mount[:name] %>
123
142
  mountPath: <%= mount[:destination_path] %>
@@ -152,6 +171,12 @@ spec:
152
171
  <%- end # if mount is [host,nfs] -%>
153
172
  <%- end # for each mount -%>
154
173
  <%- end # (configmap.to_s.empty? || all_mounts.empty?) -%>
174
+ <%- unless node_selector.empty? -%>
175
+ nodeSelector:
176
+ <%- node_selector.each_pair do |key, value| -%>
177
+ <%= key %>: "<%= value %>"
178
+ <%- end # node_selector.each_pair -%>
179
+ <%- end #unless node_selector.empty? -%>
155
180
  ---
156
181
  <%- unless spec.container.port.nil? -%>
157
182
  apiVersion: v1
@@ -161,6 +186,8 @@ metadata:
161
186
  namespace: <%= namespace %>
162
187
  labels:
163
188
  job: <%= id %>
189
+ app.kubernetes.io/name: <%= container.name %>
190
+ app.kubernetes.io/managed-by: open-ondemand
164
191
  spec:
165
192
  selector:
166
193
  job: <%= id %>
@@ -170,8 +197,8 @@ spec:
170
197
  targetPort: <%= spec.container.port %>
171
198
  type: NodePort
172
199
  <%- end # end for service -%>
173
- ---
174
200
  <%- unless configmap.nil? -%>
201
+ ---
175
202
  apiVersion: v1
176
203
  kind: ConfigMap
177
204
  metadata:
@@ -179,6 +206,8 @@ metadata:
179
206
  namespace: <%= namespace %>
180
207
  labels:
181
208
  job: <%= id %>
209
+ app.kubernetes.io/name: <%= container.name %>
210
+ app.kubernetes.io/managed-by: open-ondemand
182
211
  data:
183
212
  <%- configmap.files.each do |file| -%>
184
213
  <%- next if file.data.nil? || file.filename.nil? -%>
@@ -423,6 +423,7 @@ module OodCore
423
423
  args.concat ["-t", seconds_to_duration(script.wall_time)] unless script.wall_time.nil?
424
424
  args.concat ['-a', script.job_array_request] unless script.job_array_request.nil?
425
425
  args.concat ['--qos', script.qos] unless script.qos.nil?
426
+ args.concat ['--gpus-per-node', script.gpus_per_node] unless script.gpus_per_node.nil?
426
427
  # ignore nodes, don't know how to do this for slurm
427
428
 
428
429
  # Set dependencies
@@ -159,6 +159,8 @@ module OodCore
159
159
  args.concat ["-l", "walltime=#{seconds_to_duration(script.wall_time)}"] unless script.wall_time.nil?
160
160
  args.concat ['-t', script.job_array_request] unless script.job_array_request.nil?
161
161
  args.concat ['-l', "qos=#{script.qos}"] unless script.qos.nil?
162
+ args.concat ['-l', "gpus=#{script.gpus_per_node}"] unless script.gpus_per_node.nil?
163
+
162
164
  # Set environment variables
163
165
  env = script.job_environment.to_h
164
166
  args.concat ["-v", env.keys.join(",")] unless env.empty?
@@ -103,6 +103,10 @@ module OodCore
103
103
  # @return [String, nil] qos
104
104
  attr_reader :qos
105
105
 
106
+ # The GPUs per node for the job
107
+ # @return [Integer, nil] gpus per node
108
+ attr_reader :gpus_per_node
109
+
106
110
  # Object detailing any native specifications that are implementation specific
107
111
  # @note Should not be used at all costs.
108
112
  # @return [Object, nil] native specifications
@@ -136,6 +140,7 @@ module OodCore
136
140
  # @param accounting_id [#to_s, nil] accounting id
137
141
  # @param job_array_request [#to_s, nil] job array request
138
142
  # @param qos [#to_s, nil] qos
143
+ # @param gpus_per_node [#to_i, nil] gpus per node
139
144
  # @param native [Object, nil] native specifications
140
145
  # @param copy_environment [Boolean, nil] copy the environment
141
146
  def initialize(content:, args: nil, submit_as_hold: nil, rerunnable: nil,
@@ -145,7 +150,7 @@ module OodCore
145
150
  output_path: nil, error_path: nil, reservation_id: nil,
146
151
  queue_name: nil, priority: nil, start_time: nil,
147
152
  wall_time: nil, accounting_id: nil, job_array_request: nil,
148
- qos: nil, native: nil, copy_environment: nil, **_)
153
+ qos: nil, gpus_per_node: nil, native: nil, copy_environment: nil, **_)
149
154
  @content = content.to_s
150
155
 
151
156
  @submit_as_hold = submit_as_hold
@@ -170,6 +175,7 @@ module OodCore
170
175
  @accounting_id = accounting_id && accounting_id.to_s
171
176
  @job_array_request = job_array_request && job_array_request.to_s
172
177
  @qos = qos && qos.to_s
178
+ @gpus_per_node = gpus_per_node && gpus_per_node.to_i
173
179
  @native = native
174
180
  @copy_environment = (copy_environment.nil?) ? nil : !! copy_environment
175
181
  end
@@ -200,6 +206,7 @@ module OodCore
200
206
  accounting_id: accounting_id,
201
207
  job_array_request: job_array_request,
202
208
  qos: qos,
209
+ gpus_per_node: gpus_per_node,
203
210
  native: native,
204
211
  copy_environment: copy_environment
205
212
  }
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.16.1"
3
+ VERSION = "0.17.0"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.16.1
4
+ version: 0.17.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2021-04-23 00:00:00.000000000 Z
13
+ date: 2021-05-26 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support