ood_core 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cc94feaae3ffa1016b59b053179a18322a7e9c4ad1f937776864a2c2b19fc0a1
4
- data.tar.gz: e41085ddcbbfff4a36723da314f41f1010e4db2fb28209ad781d5c2a6516cd46
3
+ metadata.gz: 53043c13d393367627c85321c8c7ef9d69d7a6cbab687ea32a82ab2077484024
4
+ data.tar.gz: e6c1f60a01e714e5ac02090a9c01ce7b6e96bde96d1568b31c60ab630cecc3c5
5
5
  SHA512:
6
- metadata.gz: 85daa1b26fe2b973ecdbb3b9d2f72b37b142795f637d2db0a73611de0b4081c052b732c56e71ea7d37830bcf3211edc6c01109a2d2514b1a2500b38ac9ac6b98
7
- data.tar.gz: 3a2f47afde0ac909f4d3ea6a7ccf2fcf48b8e13de94ad4e550ee1b2492cf28c7dcc72df179755b332b358f49accb72e8c229a1b506de6614148db14233e6c902
6
+ metadata.gz: a5ee3699f5737abf3158e00341fd7e236b6320893f9a3640f7e5b61c854796512e03277c699db100ebeef402f735a58e82596b50dce152a381b01f43cb93ce3b
7
+ data.tar.gz: 5ff0fc3ed64f3154394aeedc5caed25fd0779304963dbbecf0b6b00083185459abfa40a3787da43f0da21eb475c30bb726c26514e7395ece23b193081e4f2cb1
@@ -0,0 +1,549 @@
1
+ require "time"
2
+ require 'etc'
3
+ require 'tempfile'
4
+ require "ood_core/refinements/hash_extensions"
5
+ require "ood_core/refinements/array_extensions"
6
+ require "ood_core/job/adapters/helper"
7
+
8
+ module OodCore
9
+ module Job
10
+ class Factory
11
+ using Refinements::HashExtensions
12
+
13
+ # Build the HTCondor adapter from a configuration
14
+ # @param config [#to_h] the configuration for job adapter
15
+ # @option config [Object] :bin (nil) Path to HTCondor client binaries
16
+ # @option config [Object] :submit_host ("") Submit job on login node via ssh
17
+ # @option config [Object] :strict_host_checking (true) Whether to use strict host checking when ssh to submit_host
18
+ def self.build_htcondor(config)
19
+ c = config.to_h.symbolize_keys
20
+ bin = c.fetch(:bin, nil)
21
+ bin_overrides = c.fetch(:bin_overrides, {})
22
+ submit_host = c.fetch(:submit_host, "")
23
+ strict_host_checking = c.fetch(:strict_host_checking, true)
24
+ default_universe = c.fetch(:default_universe, "vanilla")
25
+ default_docker_image = c.fetch(:default_docker_image, "ubuntu:latest")
26
+ user_group_map = c.fetch(:user_group_map, nil)
27
+ cluster = c.fetch(:cluster, "")
28
+ additional_attributes = c.fetch(:additional_attributes, {})
29
+ htcondor = Adapters::HTCondor::Batch.new(bin: bin, bin_overrides: bin_overrides,
30
+ submit_host: submit_host, strict_host_checking: strict_host_checking,
31
+ default_universe: default_universe,
32
+ default_docker_image: default_docker_image,
33
+ user_group_map: user_group_map,
34
+ cluster: cluster,
35
+ additional_attributes: additional_attributes,
36
+ )
37
+ Adapters::HTCondor.new(htcondor: htcondor)
38
+ end
39
+ end
40
+
41
+ module Adapters
42
+ # An adapter object that describes the communication with an HTCondor
43
+ # resource manager for job management.
44
+ class HTCondor < Adapter
45
+ using Refinements::HashExtensions
46
+ using Refinements::ArrayExtensions
47
+
48
+ # Object used for simplified communication with an HTCondor batch server
49
+ # @api private
50
+ class Batch
51
+ # The path to the HTCondor client installation binaries
52
+ # @return [Pathname] path to HTCondor binaries
53
+ attr_reader :bin
54
+
55
+ # The path to the HTCondor client installation binaries that override
56
+ # the default binaries
57
+ # @return [Pathname] path to HTCondor binaries overrides
58
+ attr_reader :bin_overrides
59
+
60
+ # The login node where the job is submitted via ssh
61
+ # @return [String] The login node
62
+ attr_reader :submit_host
63
+
64
+ # Whether to use strict host checking when ssh to submit_host
65
+ # @return [Bool]; true if empty
66
+ attr_reader :strict_host_checking
67
+
68
+ # Default universe for jobs submitted to HTCondor
69
+ # @return [String] the default universe for jobs
70
+ attr_reader :default_universe
71
+
72
+ # Default docker image for jobs submitted to HTCondor
73
+ # @return [String] the default docker image for jobs
74
+ attr_reader :default_docker_image
75
+
76
+ # A path to the user/group map for HTCondor jobs
77
+ # The format in the file should adhere to the format used by [AssignAccountingGroup](https://htcondor.readthedocs.io/en/latest/admin-manual/introduction-to-configuration.html#FEATURE:ASSIGNACCOUNTINGGROUP)
78
+ # @return [String,nil] the path to the user/group map file
79
+ attr_reader :user_group_map
80
+
81
+ # The cluster name for this HTCondor instance
82
+ # @return [String] the cluster name
83
+ attr_reader :cluster
84
+
85
+ # Additional attributes to be added to the job submission
86
+ # @return [Hash{#to_s => #to_s}] additional attributes to be added to the job submission
87
+ attr_reader :additional_attributes
88
+
89
+ # The version of HTCondor on the submit_host
90
+ # @return [Gem::Version] the version of HTCondor
91
+ attr_reader :version
92
+
93
+ # The root exception class that all HTCondor-specific exceptions inherit
94
+ # from
95
+ class Error < StandardError; end
96
+
97
+ # @param bin [#to_s] path to HTCondor installation binaries
98
+ # @param submit_host [#to_s] Submits the job on a login node via ssh
99
+ # @param strict_host_checking [Bool] Whether to use strict host checking when ssh to submit_host
100
+ def initialize(bin: nil, bin_overrides: {}, submit_host: "", strict_host_checking: false, default_universe: "vanilla", default_docker_image: "ubuntu:latest", user_group_map: nil, cluster: "", additional_attributes: {})
101
+ @bin = Pathname.new(bin.to_s)
102
+ @bin_overrides = bin_overrides
103
+ @submit_host = submit_host.to_s
104
+ @strict_host_checking = strict_host_checking
105
+ @default_universe = default_universe.to_s
106
+ @default_docker_image = default_docker_image.to_s
107
+ @user_group_map = user_group_map.to_s unless user_group_map.nil?
108
+ @cluster = cluster.to_s
109
+ @additional_attributes = additional_attributes
110
+ @version = get_htcondor_version
111
+ end
112
+
113
+ # Submit a script to the batch server
114
+ # @param args [Array<#to_s>] arguments passed to `condor_submit` command
115
+ # @param env [Hash{#to_s => #to_s}] environment variables set
116
+ # @param script [String] the script to submit
117
+ # @raise [Error] if `condor_submit` command exited unsuccessfully
118
+ # @return [String] the id of the job that was created
119
+ def submit_string(args: [], script_args: [], env: {}, script: "")
120
+ args = args.map(&:to_s)
121
+ script_args = script_args.map(&:to_s).map { |s| s.to_s.gsub('"', "'") } # cannot do double
122
+ env = env.to_h.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
123
+
124
+ path = "#{Dir.tmpdir}/htcondor_submit_#{SecureRandom.uuid}"
125
+
126
+ call("bash", "-c", "cat > #{path}", stdin: script)
127
+ output = call("condor_submit", *args, env: env, stdin: "arguments=#{path.split("/").last} #{script_args.join(" ")}\ntransfer_input_files=#{path}").strip
128
+
129
+ match = output.match(/(cluster )?(\d+)/)
130
+ raise Error, "Failed to parse job ID from output: #{output}" unless match
131
+ match[2]
132
+
133
+ end
134
+
135
+ # Run the `condor_rm` command to remove a job
136
+ # @param id [#to_s] the id of the job to remove
137
+ # @raise [Error] if `condor_rm` command exited unsuccessfully
138
+ def remove_job(id)
139
+ call("condor_rm", id.to_s)
140
+ rescue Error => e
141
+ raise Error, "Failed to remove job #{id}: #{e.message}"
142
+ end
143
+
144
+ # Place a job on hold using `condor_hold`
145
+ # @param id [#to_s] the id of the job to hold
146
+ # @raise [Error] if `condor_hold` command exited unsuccessfully
147
+ def hold_job(id)
148
+ id = id.to_s
149
+ call("condor_hold", id)
150
+ rescue Error => e
151
+ raise Error, "Failed to hold job #{id}: #{e.message}"
152
+ end
153
+
154
+ # Release a job from hold using `condor_release`
155
+ # @param id [#to_s] the id of the job to release
156
+ # @raise [Error] if `condor_release` command exited unsuccessfully
157
+ def release_job(id)
158
+ id = id.to_s
159
+ call("condor_release", id)
160
+ rescue Error => e
161
+ raise Error, "Failed to release job #{id}: #{e.message}"
162
+ end
163
+
164
+ def condor_q_attrs
165
+ {
166
+ id: "ClusterId",
167
+ sub_id: "ProcId",
168
+ status: "JobStatus",
169
+ owner: "Owner",
170
+ acct_group: "AcctGroup",
171
+ job_name: "JobBatchName",
172
+ procs: "CpusProvisioned",
173
+ gpus: "GpusProvisioned",
174
+ submission_time: "QDate",
175
+ dispatch_time: "JobCurrentStartDate",
176
+ sys_cpu_time: "RemoteSysCpu",
177
+ user_cpu_time: "RemoteUserCpu",
178
+ wallclock_time: "RemoteWallClockTime"
179
+ }
180
+ end
181
+
182
+ # Retrieve job information using `condor_q`
183
+ # @param id [#to_s] the id of the job
184
+ # @param owner [String] the owner(s) of the job
185
+ # @raise [Error] if `condor_q` command exited unsuccessfully
186
+ # @return [Array<Hash>] list of details for jobs
187
+ def get_jobs(id: "", owner: nil)
188
+ args = []
189
+ unless id.to_s.empty?
190
+ if id.to_s.include?(".") # if id is a job array, we need to use the ClusterId and ProcId
191
+ cluster_id, proc_id = id.to_s.split(".")
192
+ args.concat ["-constraint", "\"ClusterId == #{cluster_id} && ProcId == #{proc_id}\""]
193
+ else # if id is a single job, we can just use the ClusterId
194
+ args.concat ["-constraint", "\"ClusterId == #{id}\""]
195
+ end
196
+ end
197
+ args.concat ["-constraint", "\"Owner == #{owner}\""] unless owner.to_s.empty?
198
+ args.concat ["-af", *condor_q_attrs.values]
199
+
200
+ output = call("condor_q", *args)
201
+ parse_condor_q_output(output)
202
+ end
203
+
204
+ # Retrieve slot information using `condor_status`
205
+ # @param owner [String] the owner(s) of the slots
206
+ # @raise [Error] if `condor_status` command exited unsuccessfully
207
+ # @return [Array<Hash>] list of details for slots
208
+ def get_slots
209
+ args = ["-af", "Machine", "TotalSlotCPUs", "TotalSlotGPUs", "TotalSlotMemory", "CPUs", "GPUs", "Memory", "NumDynamicSlots"]
210
+ args.concat ["-constraint", "\"DynamicSlot is undefined\""]
211
+
212
+ output = call("condor_status", *args)
213
+ parse_condor_status_output(output)
214
+ end
215
+
216
+
217
+ # Retrieve accounts using user_group_map on @submit_host
218
+ # @return [Hash{String => Array<String>}] mapping of usernames to their groups
219
+ def get_accounts
220
+ raise Error, "user_group_map is not defined" if user_group_map.nil? || user_group_map.empty?
221
+
222
+ # Retrieve accounts, use local file, if exists. Otherwise use from submit_host
223
+ if File.exist?(user_group_map) && File.readable?(user_group_map)
224
+ output = File.read(user_group_map)
225
+ else
226
+ output = call("cat", user_group_map)
227
+ end
228
+ accounts = {}
229
+ output.each_line do |line|
230
+ next if line.strip.empty? || line.start_with?("#") # Skip empty lines and comments
231
+ _, username, groups = line.strip.split(/\s+/, 3)
232
+ accounts[username] = groups.split(",") if username && groups
233
+ end
234
+
235
+ accounts
236
+ rescue Error => e
237
+ raise Error, "Failed to retrieve accounts: #{e.message}"
238
+ end
239
+
240
+ private
241
+
242
+ # Parse the output of `condor_q` into a list of job hashes
243
+ def parse_condor_q_output(output)
244
+ jobs = []
245
+ fields = condor_q_attrs
246
+ output.each_line do |line|
247
+ # Parse each line into a hash
248
+ job_data = line.split
249
+ job = Hash[fields.keys.zip(job_data)]
250
+ job[:submit_host] = @submit_host # Add submit host to job data
251
+ job[:native] = job_data # Add native attributes to job data
252
+ jobs << job
253
+ end
254
+ jobs
255
+ end
256
+
257
+ # Parse the output of `condor_status` into a list of slot hashes
258
+ def parse_condor_status_output(output)
259
+ slots = []
260
+ output.each_line do |line|
261
+ # Parse each line into a hash (custom parsing logic for HTCondor slots)
262
+ slot_data = line.split
263
+ slots << { machine: slot_data[0], total_cpus: slot_data[1].to_i, total_gpus: slot_data[2].to_i, total_memory: slot_data[3].to_i,
264
+ cpus: slot_data[4].to_i, gpus: slot_data[5].to_i, memory: slot_data[6].to_i,
265
+ num_dynamic_slots: slot_data[7].to_i }
266
+ end
267
+ slots
268
+ end
269
+
270
+ # Call a forked HTCondor command
271
+ def call(cmd, *args, env: {}, stdin: "")
272
+ cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
273
+ args = args.map(&:to_s)
274
+
275
+ cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
276
+ o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin.to_s)
277
+ s.success? ? o : raise(Error, e)
278
+ end
279
+
280
+ def get_htcondor_version
281
+ output = call("condor_version")
282
+ match = output.match(/CondorVersion: (\d+\.\d+\.\d+)/)
283
+ raise Error, "Failed to parse HTCondor version from output: #{output}" unless match
284
+ Gem::Version.new(match[1])
285
+ end
286
+ end
287
+
288
+ # Map HTCondor job statuses to symbols
289
+ STATUS_MAP = {
290
+ "1" => :queued,
291
+ "2" => :running,
292
+ "3" => :running,
293
+ "4" => :completed,
294
+ "5" => :queued_held,
295
+ "6" => :running,
296
+ "7" => :suspended
297
+ }.freeze
298
+
299
+ # @api private
300
+ # @param opts [#to_h] the options defining this adapter
301
+ # @option opts [Batch] :htcondor The HTCondor batch object
302
+ # @see Factory.build_htcondor
303
+ def initialize(opts = {})
304
+ o = opts.to_h.symbolize_keys
305
+
306
+ @htcondor = o.fetch(:htcondor) { raise ArgumentError, "No HTCondor object specified. Missing argument: htcondor" }
307
+ end
308
+
309
+ # Submit a job with the attributes defined in the job template instance
310
+ # @param script [Script] script object that describes the script and
311
+ # attributes for the submitted job
312
+ # @raise [JobAdapterError] if something goes wrong submitting a job
313
+ # @return [String] the job id returned after successfully submitting a
314
+ # job
315
+ def submit(script)
316
+ args = []
317
+ args.concat ["-batch-name", "#{script.job_name}"] unless script.job_name.nil?
318
+ args.concat ["-name", "#{script.queue_name}"] unless script.queue_name.nil?
319
+ args.concat ["-a", "priority=#{script.priority}"] unless script.priority.nil?
320
+ args.concat ["-a", "accounting_group=#{script.accounting_id}"] unless script.accounting_id.nil?
321
+
322
+ args.concat ["-a", "submit_as_hold=#{script.hold}"] unless script.submit_as_hold.nil?
323
+ args.concat ["-a", "max_retries=0"] unless !script.rerunnable.nil? && script.rerunnable
324
+
325
+ args.concat ["-a", "allowed_execute_duration=#{script.wall_time}"] unless script.wall_time.nil?
326
+ args.concat ["-a", "periodic_remove='HoldReasonCode == 47'"] unless script.wall_time.nil?
327
+ args.concat ["-a", "deferral_time=#{script.start_time.tv_sec}"] unless script.start_time.nil?
328
+
329
+ args.concat ["-a", "request_cpus=#{script.cores}"] unless script.cores.nil?
330
+ # requesting 1GB of memory per core seems reasonable
331
+ args.concat ["-a", "request_memory=#{script.cores * 1024}"] unless script.native.include?(:request_memory) && !script.native[:request_memory].nil?
332
+ args.concat ["-a", "request_gpus=#{script.gpus_per_node}"] unless script.gpus_per_node.nil?
333
+
334
+ universe = script.native[:universe] || @htcondor.default_universe
335
+ args.concat ["-a", "universe=#{universe}"]
336
+ container_image = script.native[:docker_image] || @htcondor.default_docker_image
337
+ if universe == "docker" then
338
+ args.concat ["-a", "docker_image=#{@htcondor.default_docker_image}"] unless script.native.include?(:docker_image) && !script.native[:docker_image].nil?
339
+ elsif universe == "container" then
340
+ script.native.delete(:docker_image) unless !script.native.include?(:docker_image)
341
+ script.native[:container_image] = container_image
342
+ end
343
+
344
+ args.concat ["-a", "input=#{script.input_path}"] unless script.input_path.nil?
345
+ if script.output_path.nil? then args.concat ["-a", "output=output.txt"] else args.concat ["-a", "output=#{script.output_path}"] end
346
+ if script.error_path.nil? then args.concat ["-a", "error=error.txt"] else args.concat ["-a", "error=#{script.error_path}"] end
347
+ if script.workdir.nil? then args.concat ["-a", "log=job.log"] else args.concat ["-a", "log=#{script.workdir}/job.log"] end
348
+
349
+ args.concat ["-a", "initialdir=#{script.workdir}"] unless script.workdir.nil?
350
+ args.concat ["-a", "\"environment=\\\"#{script.job_environment.to_a.map { |k, v| "#{k}='#{v.gsub("'", "''").gsub('"', "\\\"\\\"")}'" }.join(' ')}\\\"\""] unless script.job_environment.nil? || script.job_environment.empty?
351
+ args.concat ["-a", "getenv=#{script.copy_environment}"] unless script.copy_environment.nil?
352
+
353
+ args.concat ["-a", "should_transfer_files=true"]
354
+ args.concat ["-a", "+OpenOnDemand=true"]
355
+
356
+ # send email when started / terminated
357
+ if script.email_on_started && script.email_on_terminated then
358
+ raise JobAdapterError, "Cannot handle both email_on_started and email_on_terminated set to true" if script.email_on_started && script.email_on_terminated
359
+ # args.concat ["-a", "notification=Always"] # might be supported in the future?
360
+ elsif script.email_on_started then
361
+ if @htcondor.version >= Gem::Version.new("24.10.0") then
362
+ args.concat ["-a", "notification=Start"]
363
+ else
364
+ raise JobAdapterError, "Email notification on job start is not supported by this HTCondor version. Please upgrade to 24.10.0 or later."
365
+ end
366
+ elsif script.email_on_terminated then
367
+ args.concat ["-a", "notification=Complete"]
368
+ else
369
+ args.concat ["-a", "notification=Never"]
370
+ end
371
+ args.concat ["-a", "notify_user=#{script.email}"] unless script.email.nil?
372
+
373
+ args.concat @htcondor.additional_attributes.to_a.map { |k, v| "-a #{k}=#{v}" } unless @htcondor.additional_attributes.nil? || @htcondor.additional_attributes.empty?
374
+ args.concat script.native.to_a.map { |k, v| "-a #{k}=#{v}" } unless script.native.nil? || script.native.empty?
375
+
376
+ content = script.content
377
+
378
+ # Set executable to some shell to execute the script
379
+ if script.shell_path.nil?
380
+ args.concat ["-a", "executable=/bin/bash"]
381
+ else
382
+ args.concat ["-a", "executable=#{script.shell_path}"]
383
+ end
384
+
385
+ # terse to shut up the output, - to get the script arguments from stdin.
386
+ args.concat ["-terse", "-"]
387
+
388
+ if script.job_array_request.nil?
389
+ # If no job array request is specified, we submit a single job
390
+ args.concat ["-queue", "1"]
391
+ else
392
+ # If a job array request is specified, we submit a job array
393
+ # The job array request is expected to be a string like "1-10" or "1,2,3"
394
+ # we must convert 1-3 to 1,2,3.
395
+ if script.job_array_request.include?("-")
396
+ start, finish = script.job_array_request.split("-").map(&:to_i)
397
+ job_ids = (start..finish).to_a.join(",")
398
+ else
399
+ job_ids = script.job_array_request
400
+ end
401
+ # Generate multiple jobs in the job array by setting OODArrayId to the requested array ids
402
+ # While -queue 10 would generate 10 jobs, the ProcId would always be 0-9, not 1-10 - or whatever the request is.
403
+ # So we set the OODArrayId to the requested job ids.
404
+ args.concat ["-queue", "1", "+OODArrayId", "in", job_ids.to_s]
405
+ end
406
+
407
+ script_args = script.args || []
408
+
409
+ @htcondor.submit_string(args: args, script_args: script_args, script: content)
410
+ rescue Batch::Error => e
411
+ raise JobAdapterError, e.message
412
+ end
413
+
414
+ # Retrieve job info from the resource manager
415
+ # @param id [#to_s] the id of the job
416
+ # @raise [JobAdapterError] if something goes wrong getting job info
417
+ # @return [Info] information describing submitted job
418
+ def info(id)
419
+ id = id.to_s
420
+ jobs = @htcondor.get_jobs(id: id)
421
+ jobs.empty? ? Info.new(id: id, status: :completed) : parse_job_info(jobs.first)
422
+ rescue Batch::Error => e
423
+ raise JobAdapterError, e.message
424
+ end
425
+
426
+ # Retrieve information for all jobs
427
+ # @raise [JobAdapterError] if something goes wrong retrieving job info
428
+ # @return [Array<Info>] list of information describing submitted jobs
429
+ def info_all(attrs: nil)
430
+ jobs = @htcondor.get_jobs
431
+ jobs.map { |job| parse_job_info(job) }
432
+ rescue Batch::Error => e
433
+ raise JobAdapterError, e.message
434
+ end
435
+
436
+ # Retrieve the status of a job
437
+ # @param id [#to_s] the id of the job
438
+ # @raise [JobAdapterError] if something goes wrong retrieving the job status
439
+ # @return [Symbol] the status of the job
440
+ def status(id)
441
+ id = id.to_s
442
+ jobs = @htcondor.get_jobs(id: id)
443
+ jobs.empty? ? :completed : get_state(jobs.first[:status])
444
+ rescue Batch::Error => e
445
+ raise JobAdapterError, e.message
446
+ end
447
+
448
+ # Retrieve cluster status information
449
+ # @raise [JobAdapterError] if something goes wrong retrieving cluster status
450
+ # @return [Hash] summary of cluster status including active and total nodes, processors, GPUs, etc.
451
+ def cluster_info
452
+ slots = @htcondor.get_slots
453
+ active_nodes = slots.count { |slot| slot[:num_dynamic_slots] > 0 }
454
+ total_nodes = slots.map { |slot| slot[:machine] }.uniq.count
455
+ active_processors = slots.sum { |slot| slot[:total_cpus] - slot[:cpus] }
456
+ total_processors = slots.sum { |slot| slot[:total_cpus] }
457
+ active_gpus = slots.sum { |slot| slot[:total_gpus] - slot[:gpus] }
458
+ total_gpus = slots.sum { |slot| slot[:total_gpus] }
459
+
460
+ ClusterInfo.new({
461
+ active_nodes: active_nodes,
462
+ total_nodes: total_nodes,
463
+ active_processors: active_processors,
464
+ total_processors: total_processors,
465
+ active_gpus: active_gpus,
466
+ total_gpus: total_gpus
467
+ })
468
+ rescue Batch::Error => e
469
+ raise JobAdapterError, e.message
470
+ end
471
+
472
+ # Indicate that the job adapter supports job arrays
473
+ def supports_job_arrays?
474
+ true
475
+ end
476
+
477
+ # Place a job on hold
478
+ # @param id [#to_s] the id of the job
479
+ # @raise [JobAdapterError] if something goes wrong placing the job on hold
480
+ def hold(id)
481
+ @htcondor.hold_job(id)
482
+ rescue Batch::Error => e
483
+ raise JobAdapterError, e.message
484
+ end
485
+
486
+ # Release a job from hold
487
+ # @param id [#to_s] the id of the job
488
+ # @raise [JobAdapterError] if something goes wrong releasing the job
489
+ def release(id)
490
+ @htcondor.release_job(id)
491
+ rescue Batch::Error => e
492
+ raise JobAdapterError, e.message
493
+ end
494
+ # Delete a job
495
+ # @param id [#to_s] the id of the job
496
+ # @raise [JobAdapterError] if something goes wrong deleting the job
497
+ def delete(id)
498
+ @htcondor.remove_job(id)
499
+ rescue Batch::Error => e
500
+ raise JobAdapterError, e.message
501
+ end
502
+
503
+ # Retrieve the relevant groups for the current user
504
+ # @return [Array<AccountInfo>] list of groups for the current user
505
+ def accounts
506
+ username = Etc.getlogin
507
+ groups = @htcondor.get_accounts[username]
508
+ parse_group_into_account_info(groups)
509
+ rescue Batch::Error => e
510
+ raise JobAdapterError, e.message
511
+ end
512
+
513
+ private
514
+
515
+ def get_state(st)
516
+ STATUS_MAP.fetch(st.to_s, :undetermined)
517
+ end
518
+
519
+ # Parse hash describing HTCondor job status
520
+ def parse_job_info(job)
521
+ Info.new(
522
+ id: job[:id].to_s + (job[:sub_id].to_s.empty? ? "" : ".#{job[:sub_id]}"),
523
+ status: get_state(job[:status]),
524
+ job_name: job[:job_name],
525
+ job_owner: job[:owner],
526
+ accounting_id: job[:acct_group],
527
+ submit_host: job[:submit_host],
528
+ procs: job[:procs].to_i,
529
+ gpus: job[:gpus].to_i,
530
+ submission_time: Time.at(job[:submission_time].to_i),
531
+ dispatch_time: Time.at(job[:dispatch_time].to_i),
532
+ cpu_time: job[:sys_cpu_time].to_i + job[:user_cpu_time].to_i,
533
+ wallclock_time: job[:wallclock_time].to_i,
534
+ native: job[:native],
535
+
536
+ )
537
+ end
538
+
539
+ # Parse group information into AccountInfo objects
540
+ # @param groups [Array<String>] list of group names
541
+ # @return [Array<AccountInfo>] list of AccountInfo objects
542
+ def parse_group_into_account_info(groups)
543
+ groups.map { |group| AccountInfo.new(name: group, cluster: @htcondor.cluster) }
544
+ end
545
+
546
+ end
547
+ end
548
+ end
549
+ end
@@ -0,0 +1,18 @@
1
+ import argparse
2
+
3
+ parser = argparse.ArgumentParser(description="Process job parameters")
4
+ parser.add_argument("--id", type=str, required=True, help="Path to the job script")
5
+ parser.add_argument("--executor", type=str, required=True, help="Executor to be used")
6
+
7
+ args = parser.parse_args()
8
+
9
+ from psij import Job, JobExecutor
10
+
11
+ ex = JobExecutor.get_instance(args.executor)
12
+ job = Job()
13
+ job._native_id = args.id
14
+ # catch exception
15
+ try:
16
+ ex.cancel(job)
17
+ except Exception as e:
18
+ print(f"Invalid job id specified")
@@ -0,0 +1,55 @@
1
+ import argparse
2
+ import json
3
+ from datetime import datetime, timedelta
4
+ import time
5
+
6
+ parser = argparse.ArgumentParser(description="Process job parameters")
7
+ parser.add_argument("--id", type=str, help="Path to the job script")
8
+ parser.add_argument("--owner", type=str, help="the name of job owner")
9
+ parser.add_argument("--executor", type=str, required=True, help="Executor to be used")
10
+
11
+ args = parser.parse_args()
12
+
13
+ from psij import Job, JobExecutor
14
+ from psij.serialize import JSONSerializer
15
+
16
+ ex = JobExecutor.get_instance(args.executor)
17
+ if args.id:
18
+ job = Job()
19
+ job._native_id = args.id
20
+ job_data = ex.info([job])
21
+ elif args.owner:
22
+ job_data = ex.info(owner=args.owner)
23
+ else:
24
+ job_data = ex.info()
25
+
26
+ s = JSONSerializer()
27
+ # create dict for each job.
28
+ # [ {'native_id': native_id, ... }, {'native_id': native_id, ...}, ...]
29
+ data = []
30
+ for job in job_data:
31
+ d = {}
32
+ d["native_id"] = job.native_id
33
+ d["current_state"] = job._status.state.name
34
+ d.update(job.current_info.__dict__)
35
+ d.update(s._from_spec(job.spec))
36
+ # the attributes and resources are nested in the job data.
37
+ # we need to flatten them.
38
+ attr = d["attributes"]
39
+ del d["attributes"]
40
+ d.update(attr)
41
+ # convert deltatime or string to integer
42
+ d["duration"] = job.spec.attributes.duration.total_seconds()
43
+ d["wall_time"] = int(d["wall_time"])
44
+ resources = d["resources"]
45
+ del d["resources"]
46
+ d.update(resources)
47
+ d["submission_time"] = d["submission_time"].strftime("%Y-%m-%d %H:%M:%S")
48
+ if d["dispatch_time"] is not None:
49
+ d["dispatch_time"] = d["dispatch_time"].strftime("%Y-%m-%d %H:%M:%S")
50
+ else:
51
+ d["dispatch_time"] = None
52
+
53
+ data.append(d)
54
+
55
+ print(json.dumps(data))
@@ -0,0 +1,18 @@
1
+ import argparse
2
+
3
+ parser = argparse.ArgumentParser(description="Process job parameters")
4
+ parser.add_argument("--id", type=str, required=True, help="Path to the job script")
5
+ parser.add_argument("--executor", type=str, required=True, help="Executor to be used")
6
+
7
+ args = parser.parse_args()
8
+
9
+ from psij import Job, JobExecutor
10
+
11
+ ex = JobExecutor.get_instance(args.executor)
12
+ job = Job()
13
+ job._native_id = args.id
14
+ # catch exception
15
+ try:
16
+ ex.hold(job)
17
+ except Exception as e:
18
+ print(f"Invalid job id specified")
@@ -0,0 +1,18 @@
1
+ import argparse
2
+
3
+ parser = argparse.ArgumentParser(description="Process job parameters")
4
+ parser.add_argument("--id", type=str, required=True, help="Path to the job script")
5
+ parser.add_argument("--executor", type=str, required=True, help="Executor to be used")
6
+
7
+ args = parser.parse_args()
8
+
9
+ from psij import Job, JobExecutor
10
+
11
+ ex = JobExecutor.get_instance(args.executor)
12
+ job = Job()
13
+ job._native_id = args.id
14
+ # catch exception
15
+ try:
16
+ ex.release(job)
17
+ except Exception as e:
18
+ print(f"Invalid job id specified")
@@ -0,0 +1,28 @@
1
+ import sys
2
+ from psij import Job, JobExecutor
3
+ from psij.serialize import JSONSerializer
4
+ from pathlib import Path
5
+ import json
6
+ import os
7
+
8
+ # create executor instance.
9
+ ex = JobExecutor.get_instance(sys.argv[1])
10
+
11
+ # deserialize json data to job spec.
12
+ deserialize = JSONSerializer()
13
+ d = sys.stdin.read()
14
+ j = json.loads(d)
15
+ spec = deserialize._to_spec(j)
16
+
17
+ # add executor string to each key of custom attributes.
18
+ if sys.argv[1] != "local" and spec.attributes.custom_attributes is not None:
19
+ h = {}
20
+ for k in spec.attributes.custom_attributes.keys():
21
+ h[f"{ex.name}.{k}"] = spec.attributes.custom_attributes[k]
22
+ spec.attributes.custom_attributes = h
23
+
24
+ spec.executable = os.path.expanduser(spec.executable)
25
+ job = Job(spec)
26
+
27
+ ex.submit(job)
28
+ print(job.native_id)
@@ -0,0 +1,410 @@
1
+ require "time"
2
+ require 'etc'
3
+ require "ood_core/refinements/hash_extensions"
4
+ require "ood_core/refinements/array_extensions"
5
+ require "ood_core/job/adapters/helper"
6
+
7
+ require 'json'
8
+ require 'pathname'
9
+
10
+ module OodCore
11
+ module Job
12
+ class Factory
13
+
14
+ using Refinements::HashExtensions
15
+ # Build the PSIJ adapter from a configuration
16
+ # @param config [#to_h] the configuration for job adapter
17
+ # @option config [Object] :bin (nil) Path to PSIJ binaries
18
+ # @option config [#to_h] :bin_overrides ({}) Optional overrides to PSIJ executables
19
+ def self.build_psij(config)
20
+ c = config.to_h.symbolize_keys
21
+ cluster = c.fetch(:cluster, nil)
22
+ conf = c.fetch(:conf, nil)
23
+ bin = c.fetch(:bin, nil)
24
+ bin_overrides = c.fetch(:bin_overrides, {})
25
+ submit_host = c.fetch(:submit_host, "")
26
+ strict_host_checking = c.fetch(:strict_host_checking, true)
27
+ executor = c.fetch(:executor, nil)
28
+ queue_name = c.fetch(:queue_name, nil)
29
+ psij = Adapters::PSIJ::Batch.new(cluster: cluster, conf: conf, bin: bin, bin_overrides: bin_overrides, submit_host: submit_host, strict_host_checking: strict_host_checking, executor: executor, queue_name: queue_name)
30
+ Adapters::PSIJ.new(psij: psij)
31
+ end
32
+ end
33
+
34
+ module Adapters
35
+ class PSIJ < Adapter
36
+ using Refinements::HashExtensions
37
+ using Refinements::ArrayExtensions
38
+ class Batch
39
+
40
+ attr_reader :cluster
41
+ attr_reader :conf
42
+ attr_reader :bin
43
+ attr_reader :bin_overrides
44
+ attr_reader :submit_host
45
+ attr_reader :strict_host_checking
46
+ attr_reader :executor
47
+ attr_reader :queue_name
48
+
49
+ class Error < StandardError; end
50
+
51
+ def initialize(cluster: nil, bin: nil, conf: nil, bin_overrides: {}, submit_host: "", strict_host_checking: true, executor: nil, queue_name: nil)
52
+ @cluster = cluster && cluster.to_s
53
+ @conf = conf && Pathname.new(conf.to_s)
54
+ @bin = Pathname.new(bin.to_s)
55
+ @bin_overrides = bin_overrides
56
+ @submit_host = submit_host.to_s
57
+ @strict_host_checking = strict_host_checking
58
+ @executor = executor
59
+ @queue_name = queue_name
60
+ end
61
+
62
+ def get_jobs(id: "", owner: nil)
63
+ id = id.to_s.strip()
64
+ params = {
65
+ id: id,
66
+ executor: executor,
67
+ }
68
+ args = params.map { |k, v| "--#{k}=#{v}" }
69
+ get_info_path = Pathname.new(__FILE__).dirname.expand_path.join("psij/get_info.py").to_s
70
+ jobs_data = call("python3", get_info_path, *args)
71
+ jobs_data = JSON.parse(jobs_data, symbolize_names: true)
72
+ jobs_data
73
+ end
74
+
75
+ def submit_job_path(args: [], chdir: nil, stdin: nil)
76
+ submit_path = Pathname.new(__FILE__).dirname.expand_path.join("psij/submit.py").to_s
77
+ call("python3", submit_path, *args, chdir: chdir, stdin: stdin)
78
+ end
79
+
80
+ def delete_job(args: [])
81
+ delete_path = Pathname.new(__FILE__).dirname.expand_path.join("psij/delete.py").to_s
82
+ call("python3", delete_path, *args)
83
+ rescue => e
84
+ raise JobAdapterError, e
85
+ end
86
+
87
+ def hold_job(args: [])
88
+ hold_path = Pathname.new(__FILE__).dirname.expand_path.join("psij/hold.py").to_s
89
+ call("python3", hold_path, *args)
90
+ end
91
+
92
+ def release_job(args: [])
93
+ release_path = Pathname.new(__FILE__).dirname.expand_path.join("psij/release.py").to_s
94
+ call("python3", release_path, *args)
95
+ end
96
+
97
+ def seconds_to_duration(time)
98
+ "%02d:%02d:%02d" % [time/3600, time/60%60, time%60]
99
+ end
100
+
101
+ private
102
+ # Call a forked psij script for a given cluster
103
+ def call(cmd, *args, env: {}, stdin: "", chdir: nil)
104
+ cmd = OodCore::Job::Adapters::Helper.bin_path(cmd, bin, bin_overrides)
105
+ cmd, args = OodCore::Job::Adapters::Helper.ssh_wrap(submit_host, cmd, args, strict_host_checking)
106
+ chdir ||= "."
107
+ o, e, s = Open3.capture3(env, cmd, *(args.map(&:to_s)), stdin_data: stdin, chdir: chdir.to_s)
108
+ s.success? ? o : raise(Error, e)
109
+ end
110
+
111
+ end
112
+
113
+
114
+ STATE_MAP = {
115
+ 'NEW' => :undetermined,
116
+ 'QUEUED' => :queued,
117
+ 'HELD' => :queued_held,
118
+ 'ACTIVE' => :running,
119
+ 'COMPLETED' => :completed,
120
+ }
121
+
122
+ def initialize(opts = {})
123
+ o = opts.to_h.symbolize_keys
124
+
125
+ @psij = o.fetch(:psij) { raise ArgumentError, "No psij object specified. Missing argument: psij" }
126
+ end
127
+
128
+
129
+ # The `submit` method saves a job script as a file and prepares a command to submit the job.
130
+ # Each optional argument specifies job dependencies (after, afterok, afternotok, afterany).
131
+ def submit(script, after: [], afterok: [], afternotok: [], afterany: [])
132
+ # convert OOD interfaces to PSI/J interfaces.
133
+ # Conterted variables are shown as follows:
134
+ # OOD | PSI/J(JobSpec)
135
+ # --------------------+----------------------------------------------------
136
+ # submit_as_hold | X (not support)
137
+ # rerunnable | X
138
+ # email_on_started | X
139
+ # email_on_terminated | X
140
+ # args | JobAttributes.custom_attributes
141
+ # job_environment | environment
142
+ # workdir | directory
143
+ # email | X
144
+ # job_name | name
145
+ # shell_path | #!<shell_path>
146
+ # input_path | stdin_path
147
+ # output_path | stdout_path
148
+ # error_path | stderr_path
149
+ # reservation_id | JobAttributes.reservation_id
150
+ # queue_name | JobAttributes.queue_name
151
+ # priority | X
152
+ # start_time | X
153
+ # wall_time | JobAttributes.duration
154
+ # accounting_id | JobAttributes.account or project_name(duplicated)
155
+ # job_array_request | X
156
+ # qos | X
157
+ # gpus_per_node | ResourceSpec.gpu_cores_per_process
158
+ # native | executable (join script.content)
159
+ # copy_environment | inherit_envrionment
160
+ # cores | ResourceSpec.cpu_cores_per_process
161
+ # after | X
162
+ # afterok | X
163
+ # afternotok | X
164
+ # afterany | X
165
+ # OOD does not have following PSI/J's interfaces.
166
+ # JobSpec class:
167
+ # pre_launch, post_launch, launcher
168
+ # ResourceSpec class:
169
+ # node_count, process_count, processes_per_node, exclusive_node_use
170
+
171
+ content = if script.shell_path.nil?
172
+ script.content
173
+ else
174
+ "#!#{script.shell_path}\n#{script.content}"
175
+ end
176
+
177
+ if ! script.native.nil?
178
+ native = script.native.join("\n") unless script.native.nil?
179
+ script.content.concat(native)
180
+ end
181
+
182
+ relative_path = "~/ood_tmp/run.sh"
183
+ full_path = File.expand_path("~/ood_tmp/run.sh")
184
+ FileUtils.mkdir_p(File.dirname(full_path))
185
+ File.open(full_path, "w") do |file|
186
+ file.write(content)
187
+ end
188
+
189
+ File.chmod(0755, full_path)
190
+
191
+ # convert OOD interfaces to PSI/J interfaces.
192
+ params = {
193
+ environment: script.job_environment,
194
+ directory: script.workdir,
195
+ name: script.job_name,
196
+ executable: relative_path,
197
+ stdin_path: script.input_path,
198
+ stdout_path: script.output_path,
199
+ stderr_path: script.error_path,
200
+ inherit_environment: script.copy_environment,
201
+ attributes: {queue_name: script.queue_name,
202
+ reservation_id: script.reservation_id,
203
+ account: script.accounting_id,
204
+ duration: script.wall_time,
205
+ custom_attributes: script.args},
206
+ resources: {__version: 1,
207
+ gpu_cores_per_process: script.gpus_per_node,
208
+ cpu_cores_per_process: script.cores}
209
+ }
210
+
211
+ if params[:attributes][:queue_name].nil?
212
+ params[:attributes][:queue_name] = @psij.queue_name
213
+ end
214
+ if params[:stdout_path].nil?
215
+ params[:stdout_path] = File.join(Dir.pwd, "stdout.txt")
216
+ end
217
+ if params[:stderr_path].nil?
218
+ params[:stderr_path] = File.join(Dir.pwd, "stderr.txt")
219
+ end
220
+
221
+ # add script.native to params[:attributes][:custom_attributes] of PSI/J.
222
+ if script.native && !script.native.empty?
223
+ if params[:attributes][:custom_attributes].nil?
224
+ params[:attributes][:custom_attributes] = script.native
225
+ else
226
+ params[:attributes][:custom_attributes].concat(script.native)
227
+ end
228
+ end
229
+ # Add script.native to params[:attributes][:cutsom_attributes] of PSI/J.
230
+ # Convert script.native array to hash.
231
+ # ['--<name>', 'value'] -> {name: value}
232
+ # ['--<name1>', '--<name2>'] -> {name1: "", name2: ""}
233
+ if ! params[:attributes][:custom_attributes].nil?
234
+ hash = {}
235
+ skip = false
236
+ len = params[:attributes][:custom_attributes].length()-1
237
+ for index in 0..len do
238
+ if skip
239
+ skip = false
240
+ next
241
+ end
242
+ v = params[:attributes][:custom_attributes][index]
243
+ has_hyphen = false
244
+ if v.start_with?("--")
245
+ name = v[2..-1]
246
+ has_hyphen = true
247
+ elsif v.start_with?("-")
248
+ name = v[1..-1]
249
+ has_hyphen = true
250
+ else
251
+ name = v
252
+ end
253
+ if index == len || !has_hyphen || params[:attributes][:custom_attributes][index+1].start_with?("-")
254
+ # if next value is not exist or start with "-", set empty string
255
+ hash[name] = ""
256
+ else
257
+ # if next value is exist and not start with "-", set value
258
+ hash[name] = params[:attributes][:custom_attributes][index+1]
259
+ skip = true
260
+ end
261
+ end
262
+ params[:attributes][:custom_attributes] = hash
263
+ end
264
+
265
+ # reject key which has nil value.
266
+ params[:attributes] = params[:attributes].reject {|_, value |value.nil?}
267
+ params[:resources] = params[:resources].reject {|_, value |value.nil?}
268
+ data = params.reject {|_, value |value.nil?}
269
+
270
+ # serialize params to JSON
271
+ args = []
272
+ args[0] = @psij.executor
273
+
274
+ @psij.submit_job_path(args: args, chdir: script.workdir, stdin: JSON.generate(data))
275
+ rescue Batch::Error => e
276
+ raise JobAdapterError, e
277
+ end
278
+
279
+ def cluster_info
280
+ end
281
+
282
+ def accounts
283
+ end
284
+
285
+ def delete(id)
286
+ id = id.to_s.strip()
287
+ params = {
288
+ id: id,
289
+ executor: @psij.executor,
290
+ }
291
+ args = params.map { |k, v| "--#{k}=#{v}" }
292
+ @psij.delete_job(args: args)
293
+ rescue Batch::Error => e
294
+ raise JobAdapterError, e.message unless /Invalid job id specified/ =~ e.message
295
+ end
296
+
297
+ def hold(id)
298
+ id = id.to_s.strip()
299
+ params = {
300
+ id: id,
301
+ executor: @psij.executor,
302
+ }
303
+ args = params.map { |k, v| "--#{k}=#{v}" }
304
+ @psij.hold_job(args: args)
305
+ rescue Batch::Error => e
306
+ raise JobAdapterError, e.message unless /Invalid job id specified/ =~ e.message
307
+ end
308
+
309
+ def release(id)
310
+ id = id.to_s.strip()
311
+ params = {
312
+ id: id,
313
+ executor: @psij.executor,
314
+ }
315
+ args = params.map { |k, v| "--#{k}=#{v}" }
316
+ @psij.release_job(args: args)
317
+ rescue Batch::Error => e
318
+ raise JobAdapterError, e.message unless /Invalid job id specified/ =~ e.message
319
+ end
320
+
321
+
322
+ def info(id)
323
+ id = id.to_s
324
+
325
+ job_infos = @psij.get_jobs(id: id).map do |v|
326
+ parse_job_info(v)
327
+ end
328
+
329
+ if job_infos.empty?
330
+ Info.new(id: id, status: :completed)
331
+ else
332
+ job_infos.first
333
+ end
334
+ rescue Batch::Error => e
335
+ # set completed status if can't find job id
336
+ if /Invalid job id specified/ =~ e.message
337
+ Info.new(
338
+ id: id,
339
+ status: :completed
340
+ )
341
+ else
342
+ raise JobAdapterError, e.message
343
+ end
344
+ end
345
+
346
+ def info_all(attrs: nil)
347
+ @psij.get_jobs.map do |v|
348
+ parse_job_info(v)
349
+ end
350
+ rescue Batch::Error => e
351
+ raise JobAdapterError, e.message
352
+ end
353
+
354
+ def info_where_owner(owner, attrs: nil)
355
+ owner = Array.wrap(owner).map(&:to_s).join(',')
356
+ @psij.get_jobs(owner: owner).map do |v|
357
+ parse_job_info(v)
358
+ end
359
+ rescue Batch::Error => e
360
+ raise JobAdapterError, e.message
361
+ end
362
+
363
+ def status(id)
364
+ info(id.to_s).status
365
+ end
366
+
367
+ def directive_prefix
368
+ end
369
+
370
+ private
371
+ def get_state(st)
372
+ STATE_MAP.fetch(st, :undetermined)
373
+ end
374
+
375
+ def parse_job_info(v)
376
+ # parse input hash to Info object
377
+ # if v don't have :reosurcelist, set empty array
378
+ if v[:resourcelist].nil? || v[:resourcelist].empty?
379
+ allocated_nodes = [ { name: "" } ]
380
+ else
381
+ allocated_nodes = v[:resourcelist]
382
+ end
383
+ if v[:cpu_time].nil?
384
+ cpu_time = nil
385
+ else
386
+ cpu_time = v[:cpu_time].to_i
387
+ end
388
+ Info.new(
389
+ id: v[:native_id],
390
+ status: get_state(v[:current_state]),
391
+ allocated_nodes: allocated_nodes,
392
+ submit_host: v[:submit_host],
393
+ job_name: v[:name],
394
+ job_owner: v[:owner],
395
+ accounting_id: v[:account],
396
+ procs: v[:process_count] ? v[:process_count].to_i : 0,
397
+ queue_name: v[:queue_name],
398
+ wallclock_time: v[:wall_time],
399
+ wallclock_limit: v[:duration],
400
+ cpu_time: cpu_time,
401
+ submission_time: v[:submission_time] ? Time.parse(v[:submission_time]): nil,
402
+ dispatch_time: v[:dispatch_time] ? Time.parse(v[:dispatch_time]): nil,
403
+ native: v
404
+ )
405
+ end
406
+
407
+ end
408
+ end
409
+ end
410
+ end
@@ -1,4 +1,4 @@
1
1
  module OodCore
2
2
  # The current version of {OodCore}
3
- VERSION = "0.28.0"
3
+ VERSION = "0.29.0"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ood_core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.28.0
4
+ version: 0.29.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Franz
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2025-07-16 00:00:00.000000000 Z
13
+ date: 2025-08-08 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: ood_support
@@ -210,6 +210,7 @@ files:
210
210
  - lib/ood_core/job/adapters/drmaa.rb
211
211
  - lib/ood_core/job/adapters/fujitsu_tcs.rb
212
212
  - lib/ood_core/job/adapters/helper.rb
213
+ - lib/ood_core/job/adapters/htcondor.rb
213
214
  - lib/ood_core/job/adapters/kubernetes.rb
214
215
  - lib/ood_core/job/adapters/kubernetes/batch.rb
215
216
  - lib/ood_core/job/adapters/kubernetes/helper.rb
@@ -224,6 +225,12 @@ files:
224
225
  - lib/ood_core/job/adapters/lsf/batch.rb
225
226
  - lib/ood_core/job/adapters/lsf/helper.rb
226
227
  - lib/ood_core/job/adapters/pbspro.rb
228
+ - lib/ood_core/job/adapters/psij.rb
229
+ - lib/ood_core/job/adapters/psij/delete.py
230
+ - lib/ood_core/job/adapters/psij/get_info.py
231
+ - lib/ood_core/job/adapters/psij/hold.py
232
+ - lib/ood_core/job/adapters/psij/release.py
233
+ - lib/ood_core/job/adapters/psij/submit.py
227
234
  - lib/ood_core/job/adapters/sge.rb
228
235
  - lib/ood_core/job/adapters/sge/batch.rb
229
236
  - lib/ood_core/job/adapters/sge/helper.rb